Jitter: A C++ and Java Comparison

In this article we write two equivalent programs in C++ and in Java that perform the same mathematical calculations in a loop and proceed to measure their jitters.

As you can see from the results below, the main source of jitter is the OS itself, not the choice between C++ and Java. C++ exhibits a jitter compatible with Java, in other words, the JVM is not introducing variance on top of the OS jitter. That’s the case for Java programs that produce zero garbage (no GC jitter) and are properly warmed up (no JIT jitter).

Note: We used the same isolated cpu core for all tests through thread pinning.

Java Version

java version "17.0.1" 2021-10-19 LTS
Java(TM) SE Runtime Environment (build 17.0.1+12-LTS-39)
Java HotSpot(TM) 64-Bit Server VM (build 17.0.1+12-LTS-39, mixed mode, sharing)

Java Jitter

Iterations: 9,000,000
Avg Time: 45.64 nanos
StDev: 41.98 nanos
Min Time: 17 nanos
Max Time: 23531 nanos
75% (6,750,000) = [avg: 38 , stdev: 19.85 , max: 67] - 25% (2,250,000) = [avg: 68 , stdev: 72.02 , min: 67]
90% (8,100,000) = [avg: 43 , stdev: 21.2 , max: 68] - 10% (900,000) = [avg: 69 , stdev: 113.86 , min: 68]
99% (8,910,000) = [avg: 45 , stdev: 21.45, max: 68] - 1% (90,000) = [avg: 79 , stdev: 359.88 , min: 68]
99.9% (8,991,000) = [avg: 45 , stdev: 21.47 , max: 70] - 0.1% (9,000) = [avg: 179 , stdev: 1133.2 , min: 70]
99.99% (8,999,100) = [avg: 45 , stdev: 21.47 , max: 77] - 0.01% (900) = [avg: 1113, stdev: 3445.62 , min: 77]
99.999% (8,999,910) = [avg: 45 , stdev: 21.69 , max: 911] - 0.001% (90) = [avg: 9340, stdev: 6545.94 , min: 911 ]
99.9999% (8,999,991) = [avg: 45 , stdev: 38.11 , max: 15185] - 0.0001% (9) = [avg: 17427, stdev: 2851.13 , min: 15197]
99.99999% (8,999,999) = [avg: 45 , stdev: 41.24 , max: 20875] - 0.00001% (1) = [avg: 23531, stdev: 0.0 , min: 23531]

C++ Jitter (-O3)

Iterations: 9,000,000
Avg Time: 205 nanos
Stdev: 50.38 nanos
Min Time: 203 nanos
Max Time: 23656 nanos
75% (6,750,000) = [avg: 205, stdev: 0.55, max: 206] - 25% (2,250,000) = [avg: 207, stdev: 100.74, min: 206]
90% (8,100,000) = [avg: 205, stdev: 0.56, max: 206] - 10% (900,000) = [avg: 208, stdev: 159.27, min: 206]
99% (8,910,000) = [avg: 205, stdev: 0.65, max: 207] - 1% (90,000) = [avg: 229, stdev: 503.18, min: 207]
99.9% (8,991,000) = [avg: 205, stdev: 0.68, max: 210] - 0.1% (9000) = [avg: 426, stdev: 1577.60, min: 210]
99.99% (8,999,100) = [avg: 205, stdev: 0.69, max: 215] - 0.01% (900) = [avg: 2364, stdev: 4550.97, min: 215]
99.999% (8,999,910) = [avg: 205, stdev: 18.32, max: 13111] - 0.001% (90) = [avg: 14923, stdev: 1900.00, min: 13120]
99.9999% (8,999,991) = [avg: 205, stdev: 46.53, max: 16273] - 0.0001% (9) = [avg: 19409, stdev: 1918.96, min: 17846]
99.99999% (8,999,999) = [avg: 205, stdev: 49.77, max: 21292] - 0.00001% (1) = [avg: 23656, stdev: 0.00, min: 23656]

Java Source Code

package com.coralblocks.coralthreads.sample;

import com.coralblocks.coralbits.bench.Benchmarker;
import com.coralblocks.coralbits.util.SystemUtils;
import com.coralblocks.coralthreads.Affinity;

public class TestJitter {

	// To execute: java -server -verbose:gc -cp coralthreads-all.jar -DbenchWorstPercs=true -DbenchTotals=true -DbenchStdev=true -DbenchMorePercs=true -DdetailedBenchmarker=true -DprocToBind=1 -DexcludeNanoTimeCost=true com.coralblocks.coralthreads.sample.TestJitter 10000000 1000000 1000

	public static void main(String[] args) {
		
		int iterations = Integer.parseInt(args[0]);
		int warmup = Integer.parseInt(args[1]);
		int load = Integer.parseInt(args[2]);
		int procToBind = SystemUtils.getInt("procToBind", -1);
		
		if (procToBind != -1) {
			Affinity.set(procToBind);
		}
		
		Benchmarker bench = Benchmarker.create(warmup);
		
		long x = 0;
		
		for(int i = 0; i < iterations; i++) {
			
			bench.mark();
			
			x += doSomething(load ,i);
			
			bench.measure();
		}
		
		System.out.println("Value computed: " + x);
		bench.printResults();
	}
	
	/*
	 * For speed, it is important to extract the hot code (i.e. the code executed in a loop) to its own method so the JIT can inline/optimize/compile.
	 * 
	 * Note that the main() method above is executed only once.
	 */
	private final static long doSomething(int load, int i) {
		
		long x = 0;
		
		for(int j = 0; j < load; j++) {
			long pow = (i % 8) * (i % 16);
			if (i % 2 == 0) {
				x += pow;
			} else {
				x -= pow;
			}
		}
		
		return x;
	}
}

C++ Source Code

#include <iostream>
#include <string>
#include <random>
#include <cmath>
#include <algorithm>
#include <limits>
#include <sys/time.h>
#include <map>
#include <sched.h>
#include <sstream>
#include <iomanip>

using namespace std;

// TO COMPILE: g++ TestJitter.cpp -o TestJitter -std=c++11 -O3
// TO EXECUTE: ./TestJitter 10000000 1000000 1000 1

static const bool MORE_PERCS = true;
static const bool INCLUDE_WORST_PERCS = true;
static const bool INCLUDE_TOTALS = true;
static const bool INCLUDE_RATIOS = false;
static const bool INCLUDE_STDEV = true;

static const bool EXCLUDE_NANO_TS_COST = true;

long get_nano_ts(timespec* ts) {
	clock_gettime(CLOCK_MONOTONIC, ts);
	return ts->tv_sec * 1000000000 + ts->tv_nsec;
}

static const long NANO_COST_ITERATIONS = 10000000;

static long calc_nano_ts_cost() {

	struct timespec ts;

   	long start = get_nano_ts(&ts);

    long finish = start;

    for (long i = 0; i < NANO_COST_ITERATIONS; i++) {
    	finish = get_nano_ts(&ts);
   	}

    finish = get_nano_ts(&ts);
        
    return (finish - start) / NANO_COST_ITERATIONS;
}

struct mi {
   long value;
};

void add_perc(stringstream& ss, int size, double perc, map<int, mi*>* map) {

	if (map->empty()) return;
	
	int max = -1;
	int minBottom = -1;
	
	long x = round(perc * size);
	long i = 0;
	long iBottom = 0;
	
	long sum = 0;
	long sumBottom = 0;
	
	bool trueForTopFalseForBottom = true;
	bool flag = false;
	
	const int arraySize = 1024 * 1024 * 10;
	int* tempData = new int[arraySize];
	double stdevTop = -1;
	
	for(auto iter = map->begin(); iter != map->end(); iter++) {
	
		if (flag) break;
	
		int time = iter->first;
		long count = (iter->second)->value;
		
		for(int a = 0; a < count; a++) {
		
			if (trueForTopFalseForBottom) {
		
				tempData[i] = time;
		
				i++;
				sum += time;
				
				if (i == x) {
					
					max = time;
					
					if (INCLUDE_STDEV) {
    						
						double avg = (double) sum / (double) i;
						double temp = 0;
						
						for(int b = 0; b < i; b++) {
							int t = tempData[b];
							temp += (avg - t) * (avg - t);
						}
						
						stdevTop = sqrt(((double) temp / (double) i));
					}
				
					if (INCLUDE_WORST_PERCS) {
    					trueForTopFalseForBottom = false;	
    				} else {
    					flag = true;
						break;
    				}
				}
				
			} else {
			
				tempData[iBottom] = time;
			
				iBottom++;
				sumBottom += time;
				if (minBottom == -1) {
					minBottom = time;
				}
			}
		}
	}
	
	ss << " | " << fixed << setprecision(5) << (perc * 100) << "%";
	if (INCLUDE_TOTALS) ss << " (" << i << ")";
	ss << " = [avg: " << (sum / i);
	if (INCLUDE_STDEV) ss << ", stdev: " << fixed << setprecision(2) << stdevTop;
	ss << ", max: " << max << "]";
	if (INCLUDE_WORST_PERCS) {
		ss << " - " << fixed << setprecision(5) << ((1 - perc) * 100) << "%";
		if (INCLUDE_TOTALS) ss << " (" << (iBottom > 0 ? iBottom : 0) << ")";
		ss << " = [avg: " << (iBottom > 0 ? (sumBottom / iBottom) : -1);
		
		if (INCLUDE_STDEV) {
		
			ss << ", stdev: ";
		
			if (iBottom <= 0) {
				ss << "?";
			} else {
			
				double avgBottom = (sumBottom / iBottom);
				
				double temp = 0;
				
				for(int b = 0; b < iBottom; b++) {
					long t = tempData[b];
					temp += (avgBottom - t) * (avgBottom - t);
				}
				
				double stdevBottom = sqrt((double) temp / (double) iBottom);
			
				ss << fixed << setprecision(2) << stdevBottom;
			}
		
		}
		
		ss << ", min: " << (minBottom != -1 ? minBottom : -1) << "]";
		if (INCLUDE_RATIOS) {
		    ss << " R: ";
		    ss << fixed << setprecision(2) << (iBottom > 0 ? (((sumBottom / iBottom) / (double) (sum / i)) - 1) * 100 : -1);
		    ss << "%";
		}
	}
	
	delete[] tempData;
}

int main(int argc, char* argv[]) {
	
	int iterations = stoi(argv[1]);
	int warmup = stoi(argv[2]);
	int load = stoi(argv[3]);
	int proc = stoi(argv[4]);
	
	cpu_set_t my_set;
	CPU_ZERO(&my_set);
	CPU_SET(proc, &my_set);
	sched_setaffinity(0, sizeof(cpu_set_t), &my_set);
	
	long nanoTimeCost = EXCLUDE_NANO_TS_COST ? calc_nano_ts_cost() : 0;
	
	struct timespec ts;
	
	long long x = 0;
	long long totalTime = 0;
	int minTime = numeric_limits<int>::max();
	int maxTime = numeric_limits<int>::min();
	
	map<int, mi*>* results = new map<int, mi*>();
	
	for(int i = 0; i < iterations; i++) {
	
	 	long start = get_nano_ts(&ts);
	
		for(int j = 0; j < load; j++) {
			long p = (i % 8) * (i % 16);
			if (i % 2 == 0) {
				x += p;
			} else {
				x -= p;
			}
			asm(""); // so that the loop is not removed by -O3
		}
		
		long end = get_nano_ts(&ts);

		int res = end - start - nanoTimeCost;
		
		if (res <= 0) res = 1;
		
		if (i >= warmup) {
			totalTime += res;
			minTime = min(minTime, res);
			maxTime = max(maxTime, res);
			
			auto iter = results->find(res);
			
			if (iter != results->end()) {
			
				(iter->second)->value = (iter->second)->value + 1;
				
			} else {
			
				mi* elem = new mi();
				elem->value = 1;
				(*results)[res] = elem;
			}		
		}
	}
	
	int count = iterations - warmup;
	
	double avg = totalTime / count;
	
	cout << "Value computed: " << x << endl;
	cout << "Nano timestamp cost: " << nanoTimeCost << endl;
	
	stringstream ss;
	
	ss << "Iterations: " << count << " | Avg Time: " << avg;


	if (INCLUDE_STDEV) {
	
		long temp = 0;
		long x = 0;
	
		for(auto iter = results->begin(); iter != results->end(); iter++) {
	
			int time = iter->first;
			long count = (iter->second)->value;
			
			for(int a = 0; a < count; a++) {
				temp += (avg - time) * (avg - time);
				x++;
			}
		}
		
		double stdev = sqrt( temp / x );
		
		ss << " | Stdev: " << fixed << setprecision(2) << stdev;
	}
	
	if (count > 0) {
		ss << " | Min Time: " << minTime << " | Max Time: " << maxTime;
	}
	
	add_perc(ss, count, 0.75, results);
	add_perc(ss, count, 0.90, results);
	add_perc(ss, count, 0.99, results);
	add_perc(ss, count, 0.999, results);
	add_perc(ss, count, 0.9999, results);
	add_perc(ss, count, 0.99999, results);
	
	if (MORE_PERCS) {
		add_perc(ss, count, 0.999999, results);
		add_perc(ss, count, 0.9999999, results);
	}

	cout << ss.str() << endl << endl;
		
	delete results;
	
	return 0;
}