BTW, if we're to start taking joins on TID seriously, we should also
add the missing hash opclass for TID, so that you can do hash joins
when dealing with a lot of rows.

(In principle this also enables things like hash aggregation, though
I'm not very clear on a use-case for grouping by TID.)

                        regards, tom lane

diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c
index 41d540b..7b25947 100644
*** a/src/backend/utils/adt/tid.c
--- b/src/backend/utils/adt/tid.c
***************
*** 20,25 ****
--- 20,26 ----
  #include <math.h>
  #include <limits.h>
  
+ #include "access/hash.h"
  #include "access/heapam.h"
  #include "access/sysattr.h"
  #include "catalog/namespace.h"
*************** tidsmaller(PG_FUNCTION_ARGS)
*** 239,244 ****
--- 240,272 ----
  	PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1, arg2) <= 0 ? arg1 : arg2);
  }
  
+ Datum
+ hashtid(PG_FUNCTION_ARGS)
+ {
+ 	ItemPointer key = PG_GETARG_ITEMPOINTER(0);
+ 
+ 	/*
+ 	 * While you'll probably have a lot of trouble with a compiler that
+ 	 * insists on appending pad space to struct ItemPointerData, we can at
+ 	 * least make this code work, by not using sizeof(ItemPointerData).
+ 	 * Instead rely on knowing the sizes of the component fields.
+ 	 */
+ 	return hash_any((unsigned char *) key,
+ 					sizeof(BlockIdData) + sizeof(OffsetNumber));
+ }
+ 
+ Datum
+ hashtidextended(PG_FUNCTION_ARGS)
+ {
+ 	ItemPointer key = PG_GETARG_ITEMPOINTER(0);
+ 	uint64		seed = PG_GETARG_INT64(1);
+ 
+ 	/* As above */
+ 	return hash_any_extended((unsigned char *) key,
+ 							 sizeof(BlockIdData) + sizeof(OffsetNumber),
+ 							 seed);
+ }
+ 
  
  /*
   *	Functions to get latest tid of a specified tuple.
diff --git a/src/include/catalog/pg_amop.dat b/src/include/catalog/pg_amop.dat
index e689c9b..436f1bd 100644
*** a/src/include/catalog/pg_amop.dat
--- b/src/include/catalog/pg_amop.dat
***************
*** 1013,1018 ****
--- 1013,1022 ----
  { amopfamily => 'hash/cid_ops', amoplefttype => 'cid', amoprighttype => 'cid',
    amopstrategy => '1', amopopr => '=(cid,cid)', amopmethod => 'hash' },
  
+ # tid_ops
+ { amopfamily => 'hash/tid_ops', amoplefttype => 'tid', amoprighttype => 'tid',
+   amopstrategy => '1', amopopr => '=(tid,tid)', amopmethod => 'hash' },
+ 
  # text_pattern_ops
  { amopfamily => 'hash/text_pattern_ops', amoplefttype => 'text',
    amoprighttype => 'text', amopstrategy => '1', amopopr => '=(text,text)',
diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat
index bbcee26..8ddb699 100644
*** a/src/include/catalog/pg_amproc.dat
--- b/src/include/catalog/pg_amproc.dat
***************
*** 340,345 ****
--- 340,349 ----
    amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashint4' },
  { amprocfamily => 'hash/cid_ops', amproclefttype => 'cid',
    amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashint4extended' },
+ { amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
+   amprocrighttype => 'tid', amprocnum => '1', amproc => 'hashtid' },
+ { amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
+   amprocrighttype => 'tid', amprocnum => '2', amproc => 'hashtidextended' },
  { amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text',
    amprocrighttype => 'text', amprocnum => '1', amproc => 'hashtext' },
  { amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text',
diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat
index 5178d04..c451d36 100644
*** a/src/include/catalog/pg_opclass.dat
--- b/src/include/catalog/pg_opclass.dat
***************
*** 167,172 ****
--- 167,174 ----
    opcintype => 'xid' },
  { opcmethod => 'hash', opcname => 'cid_ops', opcfamily => 'hash/cid_ops',
    opcintype => 'cid' },
+ { opcmethod => 'hash', opcname => 'tid_ops', opcfamily => 'hash/tid_ops',
+   opcintype => 'tid' },
  { opcmethod => 'hash', opcname => 'text_pattern_ops',
    opcfamily => 'hash/text_pattern_ops', opcintype => 'text',
    opcdefault => 'f' },
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat
index 2abd531..e8452e1 100644
*** a/src/include/catalog/pg_operator.dat
--- b/src/include/catalog/pg_operator.dat
***************
*** 204,212 ****
    oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
  
  { oid => '387', oid_symbol => 'TIDEqualOperator', descr => 'equal',
!   oprname => '=', oprcanmerge => 't', oprleft => 'tid', oprright => 'tid',
!   oprresult => 'bool', oprcom => '=(tid,tid)', oprnegate => '<>(tid,tid)',
!   oprcode => 'tideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
  { oid => '402', descr => 'not equal',
    oprname => '<>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
    oprcom => '<>(tid,tid)', oprnegate => '=(tid,tid)', oprcode => 'tidne',
--- 204,213 ----
    oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
  
  { oid => '387', oid_symbol => 'TIDEqualOperator', descr => 'equal',
!   oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'tid',
!   oprright => 'tid', oprresult => 'bool', oprcom => '=(tid,tid)',
!   oprnegate => '<>(tid,tid)', oprcode => 'tideq', oprrest => 'eqsel',
!   oprjoin => 'eqjoinsel' },
  { oid => '402', descr => 'not equal',
    oprname => '<>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
    oprcom => '<>(tid,tid)', oprnegate => '=(tid,tid)', oprcode => 'tidne',
diff --git a/src/include/catalog/pg_opfamily.dat b/src/include/catalog/pg_opfamily.dat
index fe8a324..c5ea37b 100644
*** a/src/include/catalog/pg_opfamily.dat
--- b/src/include/catalog/pg_opfamily.dat
***************
*** 112,117 ****
--- 112,119 ----
    opfmethod => 'hash', opfname => 'xid_ops' },
  { oid => '2226',
    opfmethod => 'hash', opfname => 'cid_ops' },
+ { oid => '2227',
+   opfmethod => 'hash', opfname => 'tid_ops' },
  { oid => '2229',
    opfmethod => 'hash', opfname => 'text_pattern_ops' },
  { oid => '2231',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index acb0154..6e1e1df 100644
*** a/src/include/catalog/pg_proc.dat
--- b/src/include/catalog/pg_proc.dat
***************
*** 2484,2489 ****
--- 2484,2495 ----
  { oid => '2796', descr => 'smaller of two',
    proname => 'tidsmaller', prorettype => 'tid', proargtypes => 'tid tid',
    prosrc => 'tidsmaller' },
+ { oid => '2233', descr => 'hash',
+   proname => 'hashtid', prorettype => 'int4', proargtypes => 'tid',
+   prosrc => 'hashtid' },
+ { oid => '2234', descr => 'hash',
+   proname => 'hashtidextended', prorettype => 'int8', proargtypes => 'tid int8',
+   prosrc => 'hashtidextended' },
  
  { oid => '1296',
    proname => 'timedate_pl', prolang => '14', prorettype => 'timestamp',
diff --git a/src/test/regress/expected/tidscan.out b/src/test/regress/expected/tidscan.out
index 521ed1b..6a8afcb 100644
*** a/src/test/regress/expected/tidscan.out
--- b/src/test/regress/expected/tidscan.out
*************** EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF
*** 176,179 ****
--- 176,223 ----
  UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
  ERROR:  cursor "c" is not positioned on a row
  ROLLBACK;
+ -- bulk joins on CTID
+ -- (these plans don't use TID scans, but this still seems like an
+ -- appropriate place for these tests)
+ EXPLAIN (COSTS OFF)
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+                QUERY PLAN               
+ ----------------------------------------
+  Aggregate
+    ->  Hash Join
+          Hash Cond: (t1.ctid = t2.ctid)
+          ->  Seq Scan on tenk1 t1
+          ->  Hash
+                ->  Seq Scan on tenk1 t2
+ (6 rows)
+ 
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+  count 
+ -------
+  10000
+ (1 row)
+ 
+ SET enable_hashjoin TO off;
+ EXPLAIN (COSTS OFF)
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+                QUERY PLAN                
+ -----------------------------------------
+  Aggregate
+    ->  Merge Join
+          Merge Cond: (t1.ctid = t2.ctid)
+          ->  Sort
+                Sort Key: t1.ctid
+                ->  Seq Scan on tenk1 t1
+          ->  Sort
+                Sort Key: t2.ctid
+                ->  Seq Scan on tenk1 t2
+ (9 rows)
+ 
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+  count 
+ -------
+  10000
+ (1 row)
+ 
+ RESET enable_hashjoin;
  DROP TABLE tidscan;
diff --git a/src/test/regress/sql/tidscan.sql b/src/test/regress/sql/tidscan.sql
index a8472e0..aa5c997 100644
*** a/src/test/regress/sql/tidscan.sql
--- b/src/test/regress/sql/tidscan.sql
*************** EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF
*** 63,66 ****
--- 63,78 ----
  UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
  ROLLBACK;
  
+ -- bulk joins on CTID
+ -- (these plans don't use TID scans, but this still seems like an
+ -- appropriate place for these tests)
+ EXPLAIN (COSTS OFF)
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+ SET enable_hashjoin TO off;
+ EXPLAIN (COSTS OFF)
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+ RESET enable_hashjoin;
+ 
  DROP TABLE tidscan;

Reply via email to