Dear DPDK community, We have been using the code below to measure O/S noise affecting our data plane threads, so we can tune the kernel parameters. The measurement method is based on dedicating an lcore to run the experimental_thread() function, which repeatedly reads the TSC and stores the difference between each TSC interval in a histogram.
Someone might find it useful, so I thought I would share. It might also be a neat companion to the l2reflect application, to show how much of the jitter could possibly be attributed to noise from the O/S itself. Output looks like this: experimental cycles_hz=3407991799 cycles=[ 64.. 127] ns=[ 19.. 37] cycles_avg= 80 ns_avg= 24, calls=102974652359, focus_calls=100.00%, cycles=8261507529060, focus_cycles=100.00% cycles=[ 128.. 255] ns=[ 38.. 75] cycles_avg= 243 ns_avg= 71, calls=126867, focus_calls=0.00%, cycles=30767472, focus_cycles=0.00% cycles=[ 256.. 511] ns=[ 75.. 150] cycles_avg= 341 ns_avg= 100, calls=24751, focus_calls=0.00%, cycles=8437950, focus_cycles=0.00% cycles=[ 512.. 1023] ns=[ 150.. 300] cycles_avg= 597 ns_avg= 175, calls=3996, focus_calls=0.00%, cycles=2385340, focus_cycles=0.00% cycles=[ 1024.. 2047] ns=[ 300.. 601] cycles_avg= 1312 ns_avg= 385, calls=190, focus_calls=0.00%, cycles=249302, focus_cycles=0.00% cycles=[ 2048.. 4095] ns=[ 601.. 1202] cycles_avg= 2456 ns_avg= 721, calls=27, focus_calls=0.00%, cycles=66302, focus_cycles=0.00% cycles=[ 4096.. 8191] ns=[ 1202.. 2403] cycles_avg= 7474 ns_avg= 2193, calls=1941, focus_calls=0.00%, cycles=14507354, focus_cycles=0.00% cycles=[ 8192.. 16383] ns=[ 2404.. 4807] cycles_avg= 8592 ns_avg= 2521, calls=490, focus_calls=0.00%, cycles=4209962, focus_cycles=0.00% bool control_running = true; /* Set to false to stop the threads. */ static uint64_t experimental_calls[65] __rte_cache_aligned; static uint64_t experimental_cycles[65] __rte_cache_aligned; void experimental_thread(void); void experimental_thread(void) { uint64_t tsc_now; uint64_t tsc_before; uint64_t diff; int msb_pos; printf("experimental_thread running on lcore %u\n", rte_lcore_id()); /* Warm up the cache to avoid cache misses. */ memset(experimental_calls, 0, sizeof(experimental_calls)); memset(experimental_cycles, 0, sizeof(experimental_cycles)); tsc_before = rte_rdtsc_precise(); while (control_running) { tsc_now = rte_rdtsc_precise(); diff = tsc_now - tsc_before; tsc_before = tsc_now; msb_pos = rte_fls_u64(diff); /* Return the last (most-significant) bit set: 64..1, or 0 if none. */ experimental_calls[msb_pos]++; experimental_cycles[msb_pos] += diff; } } void experimental_dump(FILE *f); void experimental_dump(FILE *f) { unsigned int idx; uint64_t from = 0, to = 0; uint64_t calls = 0; uint64_t cycles = 0; uint64_t all_calls = 0; uint64_t all_cycles = 0; for (idx = 0; idx <= 64; idx++) { all_calls += experimental_calls[idx]; all_cycles += experimental_cycles[idx]; } fprintf(f, "experimental\n"); fprintf(f, " cycles_hz=%"PRIu64"\n", rte_get_tsc_hz()); for (idx = 0; idx <= 64; idx++) { calls = experimental_calls[idx]; cycles = experimental_cycles[idx]; if (calls == 0) continue; if (idx != 0) { from = 1 << (idx - 1); to = from | (from - 1); } fprintf(f, " cycles=[%8"PRIu64"..%8"PRIu64"] ns=[%8.0f..%8.0f] cycles_avg=%8"PRIu64" ns_avg=%8.0f, calls=%"PRIu64", focus_calls=%.02f%%, cycles=%"PRIu64", focus_cycles=%.02f%%\n", from, to, (float)from * (float)1000000000 / (float)rte_get_tsc_hz(), (float)to * (float)1000000000 / (float)rte_get_tsc_hz(), (cycles + calls / 2) / calls, (float)cycles / (float)calls * (float)1000000000 / (float)rte_get_tsc_hz(), calls, (float)calls / (float)all_calls * (float)100, cycles, (float)cycles / (float)all_cycles * (float)100); } Med venlig hilsen / Kind regards, -Morten Brørup