Hi, Here is the oprofile results of pgbench. CPU: P4 / Xeon with 2 hyper-threads, speed 2793.55 MHz (estimated) Counted GLOBAL_POWER_EVENTS events with a unit mask of 0x01 (mandatory) count 100000 samples % app name symbol name 134521 6.8312 ipmi_si (no symbols) 94515 4.7996 vmlinux schedule 52609 2.6716 postgres AllocSetAlloc 39659 2.0140 postgres base_yyparse 34605 1.7573 vmlinux mwait_idle 33234 1.6877 vmlinux _spin_lock 31353 1.5922 libc-2.3.4.so memcpy
I think that the performance improves if the call frequency of memcpy is reduced. I measured the place where postgres used memcpy. (Test program is pgbench -t 4000) total-size avg-size caller ------------------------------------------------------------------------ 636185 111968560 176 catcache.c:1129 68236 18436197 270 xlog.c:947 3909 13822874 3536 xlog.c:940 20003 3520528 176 catcache.c:1376 56010 2071477 36 pgstat.c:2288 125524 1902864 15 dynahash.c:948 20001 1760088 88 setrefs.c:205 catcache.c:1129 is memcpy at SearchCatCache, and catcache.c:1376 is memcpy at SearchCatCacheList. memcpy(cur_skey, cache->cc_skey, sizeof(cur_skey)); Attached patch is reduce the memcpy calls from SearchCatCache and SearchCatCacheList. This patch directly uses cache->cc_skey in looking for hash table. Here is an effect of the patch. original: Counted GLOBAL_POWER_EVENTS events samples % app name symbol name 31353 1.5922 libc-2.3.4.so memcpy patched: Counted GLOBAL_POWER_EVENTS events samples % app name symbol name 20629 1.0684 libc-2.3.4.so memcpy --- Atsushi Ogawa
*** ./src/backend/utils/cache/catcache.c.orig 2009-07-06 22:06:52.000000000 +0900 --- ./src/backend/utils/cache/catcache.c 2009-07-06 13:51:48.000000000 +0900 *************** *** 1124,1140 **** /* * initialize the search key information */ ! memcpy(cur_skey, cache->cc_skey, sizeof(cur_skey)); ! cur_skey[0].sk_argument = v1; ! cur_skey[1].sk_argument = v2; ! cur_skey[2].sk_argument = v3; ! cur_skey[3].sk_argument = v4; /* * find the hash bucket in which to look for the tuple */ ! hashValue = CatalogCacheComputeHashValue(cache, cache->cc_nkeys, cur_skey); hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets); /* --- 1124,1141 ---- /* * initialize the search key information + * use cache->cc_skey directly in looking for hash table */ ! cache->cc_skey[0].sk_argument = v1; ! cache->cc_skey[1].sk_argument = v2; ! cache->cc_skey[2].sk_argument = v3; ! cache->cc_skey[3].sk_argument = v4; /* * find the hash bucket in which to look for the tuple */ ! hashValue = CatalogCacheComputeHashValue(cache, cache->cc_nkeys, ! cache->cc_skey); hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets); /* *************** *** 1160,1166 **** HeapKeyTest(&ct->tuple, cache->cc_tupdesc, cache->cc_nkeys, ! cur_skey, res); if (!res) continue; --- 1161,1167 ---- HeapKeyTest(&ct->tuple, cache->cc_tupdesc, cache->cc_nkeys, ! cache->cc_skey, res); if (!res) continue; *************** *** 1222,1227 **** --- 1223,1234 ---- */ relation = heap_open(cache->cc_reloid, AccessShareLock); + /* + * We need copy ScanKey data, because systable_beginscan changes + * the ScanKey data. + */ + memcpy(cur_skey, cache->cc_skey, sizeof(cur_skey)); + scandesc = systable_beginscan(relation, cache->cc_indexoid, IndexScanOK(cache, cur_skey), *************** *** 1371,1389 **** /* * initialize the search key information */ ! memcpy(cur_skey, cache->cc_skey, sizeof(cur_skey)); ! cur_skey[0].sk_argument = v1; ! cur_skey[1].sk_argument = v2; ! cur_skey[2].sk_argument = v3; ! cur_skey[3].sk_argument = v4; /* * compute a hash value of the given keys for faster search. We don't * presently divide the CatCList items into buckets, but this still lets * us skip non-matching items quickly most of the time. */ ! lHashValue = CatalogCacheComputeHashValue(cache, nkeys, cur_skey); /* * scan the items until we find a match or exhaust our list --- 1378,1396 ---- /* * initialize the search key information + * use cache->cc_skey directly in looking for hash table */ ! cache->cc_skey[0].sk_argument = v1; ! cache->cc_skey[1].sk_argument = v2; ! cache->cc_skey[2].sk_argument = v3; ! cache->cc_skey[3].sk_argument = v4; /* * compute a hash value of the given keys for faster search. We don't * presently divide the CatCList items into buckets, but this still lets * us skip non-matching items quickly most of the time. */ ! lHashValue = CatalogCacheComputeHashValue(cache, nkeys, cache->cc_skey); /* * scan the items until we find a match or exhaust our list *************** *** 1410,1416 **** HeapKeyTest(&cl->tuple, cache->cc_tupdesc, nkeys, ! cur_skey, res); if (!res) continue; --- 1417,1423 ---- HeapKeyTest(&cl->tuple, cache->cc_tupdesc, nkeys, ! cache->cc_skey, res); if (!res) continue; *************** *** 1460,1465 **** --- 1467,1478 ---- relation = heap_open(cache->cc_reloid, AccessShareLock); + /* + * We need copy ScanKey data, because systable_beginscan changes + * the ScanKey data. + */ + memcpy(cur_skey, cache->cc_skey, sizeof(cur_skey)); + scandesc = systable_beginscan(relation, cache->cc_indexoid, true,
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers