On 2015/1/22 23:21, Bruce Richardson wrote: > This (size_c) is a run-time constant, not a compile-time constant. To trigger > the > memcpy optimizations inside the compiler, the size value must be constant at > compile time.
Hi, Bruce You are right. When use compile-time constant memcpy is faster.Thank you for all. Here is my test result: rte_memcpy(constant) size:8 time:876 rte_memcpy(variable) size:8 time:2824 rte_memcpy(constant) size:16 time:868 rte_memcpy(variable) size:16 time:4436 rte_memcpy(constant) size:32 time:856 rte_memcpy(variable) size:32 time:3264 rte_memcpy(constant) size:48 time:872 rte_memcpy(variable) size:48 time:3972 rte_memcpy(constant) size:64 time:856 rte_memcpy(variable) size:64 time:3644 rte_memcpy(constant) size:128 time:868 rte_memcpy(variable) size:128 time:4720 rte_memcpy(constant) size:256 time:868 rte_memcpy(variable) size:256 time:9624 Here is my test program(Who know how to use a loop to test 'constant memcpy'?): #include <stdio.h> #include <rte_memcpy.h> #include <rte_cycles.h> int main(int narg, char** args) { int i,t; char buf[256]; int tests[7] = {8,16,32,48,64,128,256}; char buf8[8],buf16[16],buf32[32],buf48[48],buf64[64],buf128[128],buf256[256]; uint64_t start, end; int times = 9999999; uint64_t result_c[7]; if (narg < 2) { printf("usage:./rte_memcpy_test times\n"); return -1; } times = atoi(args[1]); start = rte_rdtsc(); for(t = 0; t < times; t++) { rte_memcpy(buf8, buf8, sizeof buf8); } end = rte_rdtsc(); result_c[0] = end - start; start = rte_rdtsc(); for(t = 0; t < times; t++) { rte_memcpy(buf16, buf16, sizeof buf16); } end = rte_rdtsc(); result_c[1] = end - start; start = rte_rdtsc(); for(t = 0; t < times; t++) { rte_memcpy(buf32, buf32, sizeof buf32); } end = rte_rdtsc(); result_c[2] = end - start; start = rte_rdtsc(); for(t = 0; t < times; t++) { rte_memcpy(buf48, buf48, sizeof buf48); } end = rte_rdtsc(); result_c[3] = end - start; start = rte_rdtsc(); for(t = 0; t < times; t++) { rte_memcpy(buf64, buf64, sizeof buf64); } end = rte_rdtsc(); result_c[4] = end - start; start = rte_rdtsc(); for(t = 0; t < times; t++) { rte_memcpy(buf128, buf128, sizeof buf128); } end = rte_rdtsc(); result_c[5] = end - start; start = rte_rdtsc(); for(t = 0; t < times; t++) { rte_memcpy(buf256, buf256, sizeof buf256); } end = rte_rdtsc(); result_c[6] = end - start; for (i = 0; i < (sizeof tests / sizeof tests[0]); i++) { start = rte_rdtsc(); for(t = 0; t < times; t++) { rte_memcpy(buf, buf, tests[i]); } end = rte_rdtsc(); printf("rte_memcpy(constant) size:%d time:%llu\n", tests[i], result_c[i]); printf("rte_memcpy(variable) size:%d time:%llu\n", tests[i], end - start); } return 0; } -- Regards, Haifeng