Jitter: A C++ and Java Comparison

In this article we write two equivalent programs in C++ and in Java that perform the same mathematical calculations in a loop and proceed to measure their jitters.

As you can see from the results below, the main source of jitter is the OS itself, not the choice between C++ and Java. C++ exhibits a jitter compatible with Java, in other words, the JVM is not introducing variance on top of the OS jitter. That’s the case for Java programs that produce zero garbage (no GC jitter) and are properly warmed up (no JIT jitter).

Note: We used the same isolated cpu core for all tests through thread pinning.

Java Jitter

Iterations: 9,000,000
Avg Time: 22.54 nanos
StDev: 5.53
Min Time: 20 nanos
Max Time: 3163 nanos
75% (6,750,000) = [avg: 22, stdev: 0.42, max: 23] - 25% (2,250,000) = [avg: 23, stdev: 10.96, min: 23]
90% (8,100,000) = [avg: 22, stdev: 0.39, max: 23] - 10% (900,000) = [avg: 24, stdev: 17.29, min: 23]
99% (8,910,000) = [avg: 22, stdev: 0.44, max: 24] - 1% (90,000) = [avg: 36, stdev: 53.31, min: 24]
99.9% (8,991,000) = [avg: 22, stdev: 1.45, max: 84] - 0.1% (9,000) = [avg: 111, stdev: 143.18, min: 84]
99.99% (8,999,100) = [avg: 22, stdev: 2.61, max: 117] - 0.01% (900) = [avg: 271, stdev: 419.53, min: 117]
99.999% (8,999,910) = [avg: 22, stdev: 2.98, max: 709] - 0.001% (90) = [avg: 1350, stdev: 639.89, min: 711]
99.9999% (8,999,991) = [avg: 22, stdev: 4.93, max: 2273] - 0.0001% (9) = [avg: 2513, stdev: 264.45, min: 2314]
99.99999% (8,999,999) = [avg: 22, stdev: 5.43, max: 2776] - 0.00001% (1) = [avg: 3163, stdev: 0.0, min: 3163]

C++ Jitter (-O0)

Iterations: 9,000,000
Avg Time: 225 nanos
Stdev: 13.12
Min Time: 213 nanos
Max Time: 9763 nanos
75% (6,750,000) = [avg: 223, stdev: 3.21, max: 229] - 25% (2,250,000) = [avg: 231, stdev: 24.72, min: 229]
90% (8,100,000) = [avg: 224, stdev: 3.78, max: 230] - 10% (900,000) = [avg: 233, stdev: 38.98, min: 230]
99% (8,910,000) = [avg: 224, stdev: 4.10, max: 233] - 1% (90,000) = [avg: 254, stdev: 121.30, min: 233]
99.9% (8,991,000) = [avg: 225, stdev: 4.20, max: 239] - 0.1% (9,000) = [avg: 422, stdev: 339.93, min: 239]
99.99% (8,999,100) = [avg: 225, stdev: 5.84, max: 487] - 0.01% (900) = [avg: 1119, stdev: 761.51, min: 487]
99.999% (8,999,910) = [avg: 225, stdev: 10.01, max: 2096] - 0.001% (90) = [avg: 2700, stdev: 1031.98, min: 2103]
99.9999% (8,999,991) = [avg: 225, stdev: 12.12, max: 3911] - 0.0001% (9) = [avg: 4939, stdev: 1740.65, min: 4038]
99.99999% (8,999,999) = [avg: 225, stdev: 12.73, max: 5186] - 0.00001% (1) = [avg: 9763, stdev: 0.00, min: 9763]

C++ Jitter (-O1)

Iterations: 9,000,000
Avg Time: 76 nanos
Stdev: 6.48
Min Time: 73 nanos
Max Time: 4340 nanos
75% (6,750,000) = [avg: 75, stdev: 0.81, max: 77] - 25% (2,250,000) = [avg: 77, stdev: 12.92, min: 77]
90% (8,100,000) = [avg: 76, stdev: 0.91, max: 78] - 10% (900,000) = [avg: 78, stdev: 20.38, min: 78]
99% (8,910,000) = [avg: 76, stdev: 1.04, max: 79] - 1% (90,000) = [avg: 83, stdev: 64.22, min: 79]
99.9% (8,991,000) = [avg: 76, stdev: 1.07, max: 80] - 0.1% (9,000) = [avg: 121, stdev: 198.96, min: 80]
99.99% (8,999,100) = [avg: 76, stdev: 1.13, max: 107] - 0.01% (900) = [avg: 438, stdev: 533.08, min: 107]
99.999% (8,999,910) = [avg: 76, stdev: 3.27, max: 995] - 0.001% (90) = [avg: 1770, stdev: 584.20, min: 995]
99.9999% (8,999,991) = [avg: 76, stdev: 5.81, max: 2255] - 0.0001% (9) = [avg: 2989, stdev: 732.98, min: 2264]
99.99999% (8,999,999) = [avg: 76, stdev: 6.39, max: 3881] - 0.00001% (1) = [avg: 4340, stdev: 0.00, min: 4340]

C++ Jitter (-O2)

Iterations: 9,000,000
Avg Time: 113 nanos
Stdev: 8.12
Min Time: 111 nanos
Max Time: 4256 nanos
75% (6,750,000) = [avg: 112, stdev: 0.69, max: 114] - 25% (2,250,000) = [avg: 114, stdev: 16.18, min: 114]
90% (8,100,000) = [avg: 113, stdev: 0.74, max: 114] - 10% (900,000) = [avg: 115, stdev: 25.54, min: 114]
99% (8,910,000) = [avg: 113, stdev: 0.92, max: 116] - 1% (90,000) = [avg: 121, stdev: 80.47, min: 116]
99.9% (8,991,000) = [avg: 113, stdev: 0.95, max: 117] - 0.1% (9,000) = [avg: 171, stdev: 248.99, min: 117]
99.99% (8,999,100) = [avg: 113, stdev: 0.99, max: 161] - 0.01% (900) = [avg: 619, stdev: 629.51, min: 161]
99.999% (8,999,910) = [avg: 113, stdev: 4.51, max: 1758] - 0.001% (90) = [avg: 2172, stdev: 599.49, min: 1764]
99.9999% (8,999,991) = [avg: 113, stdev: 7.24, max: 2989] - 0.0001% (9) = [avg: 3830, stdev: 289.02, min: 3275]
99.99999% (8,999,999) = [avg: 113, stdev: 8.02, max: 4136] - 0.00001% (1) = [avg: 4256, stdev: 0.00, min: 4256]

Java Source Code

package com.coralblocks.coralthreads.sample;

import com.coralblocks.coralbits.bench.Benchmarker;
import com.coralblocks.coralbits.util.SystemUtils;
import com.coralblocks.coralthreads.Affinity;

public class TestJitter {
	
	public static void main(String[] args) {
		
		int iterations = Integer.parseInt(args[0]);
		int warmup = Integer.parseInt(args[1]);
		int load = Integer.parseInt(args[2]);
		int procToBind = SystemUtils.getInt("procToBind", -1);
		
		if (procToBind != -1) {
			Affinity.set(procToBind);
		}
		
		Benchmarker bench = Benchmarker.create(warmup);
		
		long x = 0;
		
		for(int i = 0; i < iterations; i++) {
			
			bench.mark();
			
			x += doSomething(load ,i);
			
			bench.measure();
		}
		
		System.out.println("Value computed: " + x);
		bench.printResults();
	}
	
	/*
	 * For speed, it is important to extract the hot code (i.e. the code executed in a loop) to its own method so the JIT can inline/optimize/compile.
	 * 
	 * Note that the main() method above is executed only once.
	 */
	private final static long doSomething(int load, int i) {
		
		long x = 0;
		
		for(int j = 0; j < load; j++) {
			long pow = (i % 8) * (i % 16);
			if (i % 2 == 0) {
				x += pow;
			} else {
				x -= pow;
			}
		}
		
		return x;
	}
}

C++ Source Code

#include <iostream>
#include <string>
#include <random>
#include <cmath>
#include <algorithm>
#include <limits>
#include <sys/time.h>
#include <map>
#include <sched.h>
#include <sstream>
#include <iomanip>

using namespace std;

// TO COMPILE: g++ TestJitter.cpp -o TestJitter -std=c++11 -O2

static const bool MORE_PERCS = true;
static const bool INCLUDE_WORST_PERCS = true;
static const bool INCLUDE_TOTALS = true;
static const bool INCLUDE_RATIOS = false;
static const bool INCLUDE_STDEV = true;

static const bool EXCLUDE_NANO_TS_COST = true;

long get_nano_ts(timespec* ts) {
	clock_gettime(CLOCK_MONOTONIC, ts);
	return ts->tv_sec * 1000000000 + ts->tv_nsec;
}

static const long NANO_COST_ITERATIONS = 10000000;

static long calc_nano_ts_cost() {

	struct timespec ts;

   	long start = get_nano_ts(&ts);

    long finish = start;

    for (long i = 0; i < NANO_COST_ITERATIONS; i++) {
    	finish = get_nano_ts(&ts);
   	}

    finish = get_nano_ts(&ts);
        
    return (finish - start) / NANO_COST_ITERATIONS;
}

struct mi {
   long value;
};

void add_perc(stringstream& ss, int size, double perc, map<int, mi*>* map) {

	if (map->empty()) return;
	
	int max = -1;
	int minBottom = -1;
	
	long x = round(perc * size);
	long i = 0;
	long iBottom = 0;
	
	long sum = 0;
	long sumBottom = 0;
	
	bool trueForTopFalseForBottom = true;
	bool flag = false;
	
	const int arraySize = 1024 * 1024 * 10;
	int* tempData = new int[arraySize];
	double stdevTop = -1;
	
	for(auto iter = map->begin(); iter != map->end(); iter++) {
	
		if (flag) break;
	
		int time = iter->first;
		long count = (iter->second)->value;
		
		for(int a = 0; a < count; a++) {
		
			if (trueForTopFalseForBottom) {
		
				tempData[i] = time;
		
				i++;
				sum += time;
				
				if (i == x) {
					
					max = time;
					
					if (INCLUDE_STDEV) {
    						
						double avg = (double) sum / (double) i;
						double temp = 0;
						
						for(int b = 0; b < i; b++) {
							int t = tempData[b];
							temp += (avg - t) * (avg - t);
						}
						
						stdevTop = sqrt(((double) temp / (double) i));
					}
				
					if (INCLUDE_WORST_PERCS) {
    					trueForTopFalseForBottom = false;	
    				} else {
    					flag = true;
						break;
    				}
				}
				
			} else {
			
				tempData[iBottom] = time;
			
				iBottom++;
				sumBottom += time;
				if (minBottom == -1) {
					minBottom = time;
				}
			}
		}
	}
	
	ss << " | " << fixed << setprecision(5) << (perc * 100) << "%";
	if (INCLUDE_TOTALS) ss << " (" << i << ")";
	ss << " = [avg: " << (sum / i);
	if (INCLUDE_STDEV) ss << ", stdev: " << fixed << setprecision(2) << stdevTop;
	ss << ", max: " << max << "]";
	if (INCLUDE_WORST_PERCS) {
		ss << " - " << fixed << setprecision(5) << ((1 - perc) * 100) << "%";
		if (INCLUDE_TOTALS) ss << " (" << (iBottom > 0 ? iBottom : 0) << ")";
		ss << " = [avg: " << (iBottom > 0 ? (sumBottom / iBottom) : -1);
		
		if (INCLUDE_STDEV) {
		
			ss << ", stdev: ";
		
			if (iBottom <= 0) {
				ss << "?";
			} else {
			
				double avgBottom = (sumBottom / iBottom);
				
				double temp = 0;
				
				for(int b = 0; b < iBottom; b++) {
					long t = tempData[b];
					temp += (avgBottom - t) * (avgBottom - t);
				}
				
				double stdevBottom = sqrt((double) temp / (double) iBottom);
			
				ss << fixed << setprecision(2) << stdevBottom;
			}
		
		}
		
		ss << ", min: " << (minBottom != -1 ? minBottom : -1) << "]";
		if (INCLUDE_RATIOS) {
		    ss << " R: ";
		    ss << fixed << setprecision(2) << (iBottom > 0 ? (((sumBottom / iBottom) / (double) (sum / i)) - 1) * 100 : -1);
		    ss << "%";
		}
	}
	
	delete[] tempData;
}

int main(int argc, char* argv[]) {
	
	int iterations = stoi(argv[1]);
	int warmup = stoi(argv[2]);
	int load = stoi(argv[3]);
	int proc = stoi(argv[4]);
	
	cpu_set_t my_set;
	CPU_ZERO(&my_set);
	CPU_SET(proc, &my_set);
	sched_setaffinity(0, sizeof(cpu_set_t), &my_set);
	
	long nanoTimeCost = EXCLUDE_NANO_TS_COST ? calc_nano_ts_cost() : 0;
	
	struct timespec ts;
	
	long long x = 0;
	long long totalTime = 0;
	int minTime = numeric_limits<int>::max();
	int maxTime = numeric_limits<int>::min();
	
	map<int, mi*>* results = new map<int, mi*>();
	
	for(int i = 0; i < iterations; i++) {
	
	 	long start = get_nano_ts(&ts);
	
		for(int j = 0; j < load; j++) {
			long p = (i % 8) * (i % 16);
			if (i % 2 == 0) {
				x += p;
			} else {
				x -= p;
			}
		}
		
		long end = get_nano_ts(&ts);

		int res = end - start - nanoTimeCost;
		
		if (res <= 0) res = 1;
		
		if (i >= warmup) {
			totalTime += res;
			minTime = min(minTime, res);
			maxTime = max(maxTime, res);
			
			auto iter = results->find(res);
			
			if (iter != results->end()) {
			
				(iter->second)->value = (iter->second)->value + 1;
				
			} else {
			
				mi* elem = new mi();
				elem->value = 1;
				(*results)[res] = elem;
			}
			
		}
	}
	
	int count = iterations - warmup;
	
	double avg = totalTime / count;
	
	cout << "Value computed: " << x << endl;
	cout << "Nano timestamp cost: " << nanoTimeCost << endl;
	
	stringstream ss;
	
	ss << "Iterations: " << count << " | Avg Time: " << avg;


	if (INCLUDE_STDEV) {
	
		long temp = 0;
		long x = 0;
	
		for(auto iter = results->begin(); iter != results->end(); iter++) {
	
			int time = iter->first;
			long count = (iter->second)->value;
			
			for(int a = 0; a < count; a++) {
				temp += (avg - time) * (avg - time);
				x++;
			}
		}
		
		double stdev = sqrt( temp / x );
		
		ss << " | Stdev: " << fixed << setprecision(2) << stdev;
	}
	
	if (count > 0) {
		ss << " | Min Time: " << minTime << " | Max Time: " << maxTime;
	}
	
	add_perc(ss, count, 0.75, results);
	add_perc(ss, count, 0.90, results);
	add_perc(ss, count, 0.99, results);
	add_perc(ss, count, 0.999, results);
	add_perc(ss, count, 0.9999, results);
	add_perc(ss, count, 0.99999, results);
	
	if (MORE_PERCS) {
		add_perc(ss, count, 0.999999, results);
		add_perc(ss, count, 0.9999999, results);
	}

	cout << ss.str() << endl << endl;
		
	delete results;
	
	return 0;
}