From 11cbdf7f78d229336e6a75d908a6625a421565e9 Mon Sep 17 00:00:00 2001
From: Maxime Schoemans <maxime.schoemans@enterprisedb.com>
Date: Thu, 16 Apr 2026 16:28:17 +0200
Subject: [PATCH v8 3/3] Remove duplicate selectivity functions between range
 and multirange

The multirange selectivity code duplicated 10 helper functions from
rangetypes_selfuncs.c. Since both range and multirange types use the
same histogram format (STATISTIC_KIND_BOUNDS_HISTOGRAM) and the same
RangeBound representation, the functions are identical.

Make the 10 shared functions non-static in rangetypes_selfuncs.c,
export them via a new rangetypes_selfuncs.h header, and remove the
copies from multirangetypes_selfuncs.c.
---
 .../utils/adt/multirangetypes_selfuncs.c      | 772 +-----------------
 src/backend/utils/adt/rangetypes_selfuncs.c   |  46 +-
 src/include/utils/rangetypes_selfuncs.h       |  54 ++
 3 files changed, 67 insertions(+), 805 deletions(-)
 create mode 100644 src/include/utils/rangetypes_selfuncs.h

diff --git a/src/backend/utils/adt/multirangetypes_selfuncs.c b/src/backend/utils/adt/multirangetypes_selfuncs.c
index e3d5c527e03..73e6d490295 100644
--- a/src/backend/utils/adt/multirangetypes_selfuncs.c
+++ b/src/backend/utils/adt/multirangetypes_selfuncs.c
@@ -27,6 +27,7 @@
 #include "utils/lsyscache.h"
 #include "utils/multirangetypes.h"
 #include "utils/rangetypes.h"
+#include "utils/rangetypes_selfuncs.h"
 #include "utils/selfuncs.h"
 #include "utils/typcache.h"
 
@@ -38,37 +39,6 @@ static double calc_hist_selectivity(TypeCacheEntry *typcache,
 									VariableStatData *vardata,
 									const MultirangeType *constval,
 									Oid operator);
-static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
-										   const RangeBound *constbound,
-										   const RangeBound *hist,
-										   int hist_nvalues, bool equal);
-static int	rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value,
-						   const RangeBound *hist, int hist_length, bool equal);
-static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value,
-						   const RangeBound *hist1, const RangeBound *hist2);
-static float8 get_len_position(double value, double hist1, double hist2);
-static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1,
-						   const RangeBound *bound2);
-static int	length_hist_bsearch(const Datum *length_hist_values,
-								int length_hist_nvalues, double value,
-								bool equal);
-static double calc_length_hist_frac(const Datum *length_hist_values,
-									int length_hist_nvalues, double length1,
-									double length2, bool equal);
-static double calc_hist_selectivity_contained(TypeCacheEntry *typcache,
-											  const RangeBound *lower,
-											  RangeBound *upper,
-											  const RangeBound *hist_lower,
-											  int hist_nvalues,
-											  const Datum *length_hist_values,
-											  int length_hist_nvalues);
-static double calc_hist_selectivity_contains(TypeCacheEntry *typcache,
-											 const RangeBound *lower,
-											 const RangeBound *upper,
-											 const RangeBound *hist_lower,
-											 int hist_nvalues,
-											 const Datum *length_hist_values,
-											 int length_hist_nvalues);
 
 /*
  * Returns a default selectivity estimate for given operator, when we don't
@@ -698,746 +668,6 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
 	return hist_selec;
 }
 
-
-/*
- * Look up the fraction of values less than (or equal, if 'equal' argument
- * is true) a given const in a histogram of range bounds.
- */
-static double
-calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound,
-							 const RangeBound *hist, int hist_nvalues, bool equal)
-{
-	Selectivity selec;
-	int			index;
-
-	/*
-	 * Find the histogram bin the given constant falls into. Estimate
-	 * selectivity as the number of preceding whole bins.
-	 */
-	index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal);
-	selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1);
-
-	/* Adjust using linear interpolation within the bin */
-	if (index >= 0 && index < hist_nvalues - 1)
-		selec += get_position(typcache, constbound, &hist[index],
-							  &hist[index + 1]) / (Selectivity) (hist_nvalues - 1);
-
-	return selec;
-}
-
-/*
- * Binary search on an array of range bounds. Returns greatest index of range
- * bound in array which is less(less or equal) than given range bound. If all
- * range bounds in array are greater or equal(greater) than given range bound,
- * return -1. When "equal" flag is set conditions in brackets are used.
- *
- * This function is used in scalar operator selectivity estimation. Another
- * goal of this function is to find a histogram bin where to stop
- * interpolation of portion of bounds which are less than or equal to given bound.
- */
-static int
-rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist,
-			   int hist_length, bool equal)
-{
-	int			lower = -1,
-				upper = hist_length - 1,
-				cmp,
-				middle;
-
-	while (lower < upper)
-	{
-		middle = (lower + upper + 1) / 2;
-		cmp = range_cmp_bounds(typcache, &hist[middle], value);
-
-		if (cmp < 0 || (equal && cmp == 0))
-			lower = middle;
-		else
-			upper = middle - 1;
-	}
-	return lower;
-}
-
-
-/*
- * Binary search on length histogram. Returns greatest index of range length in
- * histogram which is less than (less than or equal) the given length value. If
- * all lengths in the histogram are greater than (greater than or equal) the
- * given length, returns -1.
- */
-static int
-length_hist_bsearch(const Datum *length_hist_values, int length_hist_nvalues,
-					double value, bool equal)
-{
-	int			lower = -1,
-				upper = length_hist_nvalues - 1,
-				middle;
-
-	while (lower < upper)
-	{
-		double		middleval;
-
-		middle = (lower + upper + 1) / 2;
-
-		middleval = DatumGetFloat8(length_hist_values[middle]);
-		if (middleval < value || (equal && middleval <= value))
-			lower = middle;
-		else
-			upper = middle - 1;
-	}
-	return lower;
-}
-
-/*
- * Get relative position of value in histogram bin in [0,1] range.
- */
-static float8
-get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1,
-			 const RangeBound *hist2)
-{
-	bool		has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
-	float8		position;
-
-	if (!hist1->infinite && !hist2->infinite)
-	{
-		float8		bin_width;
-
-		/*
-		 * Both bounds are finite. Assuming the subtype's comparison function
-		 * works sanely, the value must be finite, too, because it lies
-		 * somewhere between the bounds.  If it doesn't, arbitrarily return
-		 * 0.5.
-		 */
-		if (value->infinite)
-			return 0.5;
-
-		/* Can't interpolate without subdiff function */
-		if (!has_subdiff)
-			return 0.5;
-
-		/* Calculate relative position using subdiff function. */
-		bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
-													 typcache->rng_collation,
-													 hist2->val,
-													 hist1->val));
-		if (isnan(bin_width) || bin_width <= 0.0)
-			return 0.5;			/* punt for NaN or zero-width bin */
-
-		position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
-													typcache->rng_collation,
-													value->val,
-													hist1->val))
-			/ bin_width;
-
-		if (isnan(position))
-			return 0.5;			/* punt for NaN from subdiff, Inf/Inf, etc */
-
-		/* Relative position must be in [0,1] range */
-		position = Max(position, 0.0);
-		position = Min(position, 1.0);
-		return position;
-	}
-	else if (hist1->infinite && !hist2->infinite)
-	{
-		/*
-		 * Lower bin boundary is -infinite, upper is finite. If the value is
-		 * -infinite, return 0.0 to indicate it's equal to the lower bound.
-		 * Otherwise return 1.0 to indicate it's infinitely far from the lower
-		 * bound.
-		 */
-		return ((value->infinite && value->lower) ? 0.0 : 1.0);
-	}
-	else if (!hist1->infinite && hist2->infinite)
-	{
-		/* same as above, but in reverse */
-		return ((value->infinite && !value->lower) ? 1.0 : 0.0);
-	}
-	else
-	{
-		/*
-		 * If both bin boundaries are infinite, they should be equal to each
-		 * other, and the value should also be infinite and equal to both
-		 * bounds. (But don't Assert that, to avoid crashing if a user creates
-		 * a datatype with a broken comparison function).
-		 *
-		 * Assume the value to lie in the middle of the infinite bounds.
-		 */
-		return 0.5;
-	}
-}
-
-
-/*
- * Get relative position of value in a length histogram bin in [0,1] range.
- */
-static double
-get_len_position(double value, double hist1, double hist2)
-{
-	if (!isinf(hist1) && !isinf(hist2))
-	{
-		/*
-		 * Both bounds are finite. The value should be finite too, because it
-		 * lies somewhere between the bounds. If it doesn't, just return
-		 * something.
-		 */
-		if (isinf(value))
-			return 0.5;
-
-		return 1.0 - (hist2 - value) / (hist2 - hist1);
-	}
-	else if (isinf(hist1) && !isinf(hist2))
-	{
-		/*
-		 * Lower bin boundary is -infinite, upper is finite. Return 1.0 to
-		 * indicate the value is infinitely far from the lower bound.
-		 */
-		return 1.0;
-	}
-	else if (isinf(hist1) && isinf(hist2))
-	{
-		/* same as above, but in reverse */
-		return 0.0;
-	}
-	else
-	{
-		/*
-		 * If both bin boundaries are infinite, they should be equal to each
-		 * other, and the value should also be infinite and equal to both
-		 * bounds. (But don't Assert that, to avoid crashing unnecessarily if
-		 * the caller messes up)
-		 *
-		 * Assume the value to lie in the middle of the infinite bounds.
-		 */
-		return 0.5;
-	}
-}
-
-/*
- * Measure distance between two range bounds.
- */
-static float8
-get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)
-{
-	bool		has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
-
-	if (!bound1->infinite && !bound2->infinite)
-	{
-		/*
-		 * Neither bound is infinite, use subdiff function or return default
-		 * value of 1.0 if no subdiff is available.
-		 */
-		if (has_subdiff)
-		{
-			float8		res;
-
-			res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
-												   typcache->rng_collation,
-												   bound2->val,
-												   bound1->val));
-			/* Reject possible NaN result, also negative result */
-			if (isnan(res) || res < 0.0)
-				return 1.0;
-			else
-				return res;
-		}
-		else
-			return 1.0;
-	}
-	else if (bound1->infinite && bound2->infinite)
-	{
-		/* Both bounds are infinite */
-		if (bound1->lower == bound2->lower)
-			return 0.0;
-		else
-			return get_float8_infinity();
-	}
-	else
-	{
-		/* One bound is infinite, the other is not */
-		return get_float8_infinity();
-	}
-}
-
-/*
- * Calculate the average of function P(x), in the interval [length1, length2],
- * where P(x) is the fraction of tuples with length < x (or length <= x if
- * 'equal' is true).
- */
-static double
-calc_length_hist_frac(const Datum *length_hist_values, int length_hist_nvalues,
-					  double length1, double length2, bool equal)
-{
-	double		frac;
-	double		A,
-				B,
-				PA,
-				PB;
-	double		pos;
-	int			i;
-	double		area;
-
-	Assert(length2 >= length1);
-
-	if (length2 < 0.0)
-		return 0.0;				/* shouldn't happen, but doesn't hurt to check */
-
-	/* All lengths in the table are <= infinite. */
-	if (isinf(length2) && equal)
-		return 1.0;
-
-	/*----------
-	 * The average of a function between A and B can be calculated by the
-	 * formula:
-	 *
-	 *			B
-	 *	  1		/
-	 * -------	| P(x)dx
-	 *	B - A	/
-	 *			A
-	 *
-	 * The geometrical interpretation of the integral is the area under the
-	 * graph of P(x). P(x) is defined by the length histogram. We calculate
-	 * the area in a piecewise fashion, iterating through the length histogram
-	 * bins. Each bin is a trapezoid:
-	 *
-	 *		 P(x2)
-	 *		  /|
-	 *		 / |
-	 * P(x1)/  |
-	 *	   |   |
-	 *	   |   |
-	 *	---+---+--
-	 *	   x1  x2
-	 *
-	 * where x1 and x2 are the boundaries of the current histogram, and P(x1)
-	 * and P(x1) are the cumulative fraction of tuples at the boundaries.
-	 *
-	 * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1)
-	 *
-	 * The first bin contains the lower bound passed by the caller, so we
-	 * use linear interpolation between the previous and next histogram bin
-	 * boundary to calculate P(x1). Likewise for the last bin: we use linear
-	 * interpolation to calculate P(x2). For the bins in between, x1 and x2
-	 * lie on histogram bin boundaries, so P(x1) and P(x2) are simply:
-	 * P(x1) =	  (bin index) / (number of bins)
-	 * P(x2) = (bin index + 1 / (number of bins)
-	 */
-
-	/* First bin, the one that contains lower bound */
-	i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal);
-	if (i >= length_hist_nvalues - 1)
-		return 1.0;
-
-	if (i < 0)
-	{
-		i = 0;
-		pos = 0.0;
-	}
-	else
-	{
-		/* interpolate length1's position in the bin */
-		pos = get_len_position(length1,
-							   DatumGetFloat8(length_hist_values[i]),
-							   DatumGetFloat8(length_hist_values[i + 1]));
-	}
-	PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
-	B = length1;
-
-	/*
-	 * In the degenerate case that length1 == length2, simply return
-	 * P(length1). This is not merely an optimization: if length1 == length2,
-	 * we'd divide by zero later on.
-	 */
-	if (length2 == length1)
-		return PB;
-
-	/*
-	 * Loop through all the bins, until we hit the last bin, the one that
-	 * contains the upper bound. (if lower and upper bounds are in the same
-	 * bin, this falls out immediately)
-	 */
-	area = 0.0;
-	for (; i < length_hist_nvalues - 1; i++)
-	{
-		double		bin_upper = DatumGetFloat8(length_hist_values[i + 1]);
-
-		/* check if we've reached the last bin */
-		if (!(bin_upper < length2 || (equal && bin_upper <= length2)))
-			break;
-
-		/* the upper bound of previous bin is the lower bound of this bin */
-		A = B;
-		PA = PB;
-
-		B = bin_upper;
-		PB = (double) i / (double) (length_hist_nvalues - 1);
-
-		/*
-		 * Add the area of this trapezoid to the total. The point of the
-		 * if-check is to avoid NaN, in the corner case that PA == PB == 0,
-		 * and B - A == Inf. The area of a zero-height trapezoid (PA == PB ==
-		 * 0) is zero, regardless of the width (B - A).
-		 */
-		if (PA > 0 || PB > 0)
-			area += 0.5 * (PB + PA) * (B - A);
-	}
-
-	/* Last bin */
-	A = B;
-	PA = PB;
-
-	B = length2;				/* last bin ends at the query upper bound */
-	if (i >= length_hist_nvalues - 1)
-		pos = 0.0;
-	else
-	{
-		if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1]))
-			pos = 0.0;
-		else
-			pos = get_len_position(length2,
-								   DatumGetFloat8(length_hist_values[i]),
-								   DatumGetFloat8(length_hist_values[i + 1]));
-	}
-	PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
-
-	if (PA > 0 || PB > 0)
-		area += 0.5 * (PB + PA) * (B - A);
-
-	/*
-	 * Ok, we have calculated the area, ie. the integral. Divide by width to
-	 * get the requested average.
-	 *
-	 * Avoid NaN arising from infinite / infinite. This happens at least if
-	 * length2 is infinite. It's not clear what the correct value would be in
-	 * that case, so 0.5 seems as good as any value.
-	 */
-	if (isinf(area) && isinf(length2))
-		frac = 0.5;
-	else
-		frac = area / (length2 - length1);
-
-	return frac;
-}
-
-/*
- * Calculate selectivity of "var <@ const" operator, ie. estimate the fraction
- * of multiranges that fall within the constant lower and upper bounds. This uses
- * the histograms of range lower bounds and range lengths, on the assumption
- * that the range lengths are independent of the lower bounds.
- *
- * The caller has already checked that constant lower and upper bounds are
- * finite.
- */
-static double
-calc_hist_selectivity_contained(TypeCacheEntry *typcache,
-								const RangeBound *lower, RangeBound *upper,
-								const RangeBound *hist_lower, int hist_nvalues,
-								const Datum *length_hist_values, int length_hist_nvalues)
-{
-	int			i,
-				upper_index;
-	float8		prev_dist;
-	double		bin_width;
-	double		upper_bin_width;
-	double		sum_frac;
-
-	/*
-	 * Begin by finding the bin containing the upper bound, in the lower bound
-	 * histogram. Any range with a lower bound > constant upper bound can't
-	 * match, ie. there are no matches in bins greater than upper_index.
-	 */
-	upper->inclusive = !upper->inclusive;
-	upper->lower = true;
-	upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
-								 false);
-
-	/*
-	 * If the upper bound value is below the histogram's lower limit, there
-	 * are no matches.
-	 */
-	if (upper_index < 0)
-		return 0.0;
-
-	/*
-	 * If the upper bound value is at or beyond the histogram's upper limit,
-	 * start our loop at the last actual bin, as though the upper bound were
-	 * within that bin; get_position will clamp its result to 1.0 anyway.
-	 * (This corresponds to assuming that the data population above the
-	 * histogram's upper limit is empty, exactly like what we just assumed for
-	 * the lower limit.)
-	 */
-	upper_index = Min(upper_index, hist_nvalues - 2);
-
-	/*
-	 * Calculate upper_bin_width, ie. the fraction of the (upper_index,
-	 * upper_index + 1) bin which is greater than upper bound of query range
-	 * using linear interpolation of subdiff function.
-	 */
-	upper_bin_width = get_position(typcache, upper,
-								   &hist_lower[upper_index],
-								   &hist_lower[upper_index + 1]);
-
-	/*
-	 * In the loop, dist and prev_dist are the distance of the "current" bin's
-	 * lower and upper bounds from the constant upper bound.
-	 *
-	 * bin_width represents the width of the current bin. Normally it is 1.0,
-	 * meaning a full width bin, but can be less in the corner cases: start
-	 * and end of the loop. We start with bin_width = upper_bin_width, because
-	 * we begin at the bin containing the upper bound.
-	 */
-	prev_dist = 0.0;
-	bin_width = upper_bin_width;
-
-	sum_frac = 0.0;
-	for (i = upper_index; i >= 0; i--)
-	{
-		double		dist;
-		double		length_hist_frac;
-		bool		final_bin = false;
-
-		/*
-		 * dist -- distance from upper bound of query range to lower bound of
-		 * the current bin in the lower bound histogram. Or to the lower bound
-		 * of the constant range, if this is the final bin, containing the
-		 * constant lower bound.
-		 */
-		if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0)
-		{
-			dist = get_distance(typcache, lower, upper);
-
-			/*
-			 * Subtract from bin_width the portion of this bin that we want to
-			 * ignore.
-			 */
-			bin_width -= get_position(typcache, lower, &hist_lower[i],
-									  &hist_lower[i + 1]);
-			if (bin_width < 0.0)
-				bin_width = 0.0;
-			final_bin = true;
-		}
-		else
-			dist = get_distance(typcache, &hist_lower[i], upper);
-
-		/*
-		 * Estimate the fraction of tuples in this bin that are narrow enough
-		 * to not exceed the distance to the upper bound of the query range.
-		 */
-		length_hist_frac = calc_length_hist_frac(length_hist_values,
-												 length_hist_nvalues,
-												 prev_dist, dist, true);
-
-		/*
-		 * Add the fraction of tuples in this bin, with a suitable length, to
-		 * the total.
-		 */
-		sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
-
-		if (final_bin)
-			break;
-
-		bin_width = 1.0;
-		prev_dist = dist;
-	}
-
-	return sum_frac;
-}
-
-/*
- * Calculate selectivity of "var @> const" operator, ie. estimate the fraction
- * of multiranges that contain the constant lower and upper bounds. This uses
- * the histograms of range lower bounds and range lengths, on the assumption
- * that the range lengths are independent of the lower bounds.
- */
-static double
-calc_hist_selectivity_contains(TypeCacheEntry *typcache,
-							   const RangeBound *lower, const RangeBound *upper,
-							   const RangeBound *hist_lower, int hist_nvalues,
-							   const Datum *length_hist_values, int length_hist_nvalues)
-{
-	int			i,
-				lower_index;
-	double		bin_width,
-				lower_bin_width;
-	double		sum_frac;
-	float8		prev_dist;
-
-	/* Find the bin containing the lower bound of query range. */
-	lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
-								 true);
-
-	/*
-	 * If the lower bound value is below the histogram's lower limit, there
-	 * are no matches.
-	 */
-	if (lower_index < 0)
-		return 0.0;
-
-	/*
-	 * If the lower bound value is at or beyond the histogram's upper limit,
-	 * start our loop at the last actual bin, as though the upper bound were
-	 * within that bin; get_position will clamp its result to 1.0 anyway.
-	 * (This corresponds to assuming that the data population above the
-	 * histogram's upper limit is empty, exactly like what we just assumed for
-	 * the lower limit.)
-	 */
-	lower_index = Min(lower_index, hist_nvalues - 2);
-
-	/*
-	 * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
-	 * lower_index + 1) bin which is greater than lower bound of query range
-	 * using linear interpolation of subdiff function.
-	 */
-	lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
-								   &hist_lower[lower_index + 1]);
-
-	/*
-	 * Loop through all the lower bound bins, smaller than the query lower
-	 * bound. In the loop, dist and prev_dist are the distance of the
-	 * "current" bin's lower and upper bounds from the constant upper bound.
-	 * We begin from query lower bound, and walk backwards, so the first bin's
-	 * upper bound is the query lower bound, and its distance to the query
-	 * upper bound is the length of the query range.
-	 *
-	 * bin_width represents the width of the current bin. Normally it is 1.0,
-	 * meaning a full width bin, except for the first bin, which is only
-	 * counted up to the constant lower bound.
-	 */
-	prev_dist = get_distance(typcache, lower, upper);
-	sum_frac = 0.0;
-	bin_width = lower_bin_width;
-	for (i = lower_index; i >= 0; i--)
-	{
-		float8		dist;
-		double		length_hist_frac;
-
-		/*
-		 * dist -- distance from upper bound of query range to current value
-		 * of lower bound histogram or lower bound of query range (if we've
-		 * reach it).
-		 */
-		dist = get_distance(typcache, &hist_lower[i], upper);
-
-		/*
-		 * Get average fraction of length histogram which covers intervals
-		 * longer than (or equal to) distance to upper bound of query range.
-		 */
-		length_hist_frac =
-			1.0 - calc_length_hist_frac(length_hist_values,
-										length_hist_nvalues,
-										prev_dist, dist, false);
-
-		sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
-
-		bin_width = 1.0;
-		prev_dist = dist;
-	}
-
-	return sum_frac;
-}
-
-/*
- * Estimate join selectivity P(X < Y) using rangebound histograms.
- *
- * Based on: Diogo Repas, Zhicheng Luo, Maxime Schoemans, Mahmoud Sakr, 2022
- * "Selectivity Estimation of Inequality Joins In Databases"
- * https://doi.org/10.48550/arXiv.2206.07396
- *
- * hist1 and hist2 are arrays of RangeBound entries from the bounds histograms
- * of two range-typed or multirange-typed attributes X and Y, respectively.
- * Each array has at least 2 entries (one histogram bin).  The entries carry
- * full bound metadata (lower/upper flag, inclusive/exclusive), and all
- * comparisons use range_cmp_bounds() so that bound semantics are preserved.
- *
- * The algorithm models each attribute's distribution as a piecewise function
- * derived from its histogram, then computes:
- *   P(X < Y) = 0.5 * sum( (F_X(prev) + F_X(cur)) * (F_Y(cur) - F_Y(prev)) )
- * by parallel-scanning both histograms.
- *
- * The initial fast-forward loops skip histogram entries that fall entirely
- * before the other histogram's range, so the main loop only processes the
- * overlapping region.  Bounds checks are required because the histograms may
- * be completely disjoint (e.g., all of X is below all of Y).
- */
-static double
-calc_hist_join_selectivity(TypeCacheEntry *typcache,
-						   const RangeBound *hist1, int nhist1,
-						   const RangeBound *hist2, int nhist2)
-{
-	int			i,
-				j;
-	double		selectivity = 0.0;
-	double		prev_sel1 = -1.0;	/* negative sentinel skips first iter */
-	double		prev_sel2 = 0.0;
-
-	Assert(nhist1 > 1);
-	Assert(nhist2 > 1);
-
-	/*
-	 * Fast-forward past hist1 entries that are entirely below hist2[0], and
-	 * vice versa.  Bounds checks prevent out-of-bounds access when the
-	 * histograms are fully disjoint.
-	 */
-	for (i = 0; i < nhist1 &&
-		 range_cmp_bounds(typcache, &hist1[i], &hist2[0]) < 0; i++)
-		;
-	for (j = 0; j < nhist2 &&
-		 range_cmp_bounds(typcache, &hist2[j], &hist1[0]) < 0; j++)
-		;
-
-	/*
-	 * Handle fully-separated histograms.  When all bounds in hist1 are below
-	 * all bounds in hist2, P(X < Y) is ~1.0.  When all of hist2 is below
-	 * hist1, P(X < Y) is ~0.0.  We return immediately rather than falling
-	 * into the overlap walk with invalid indices.
-	 */
-	if (i >= nhist1)
-		return 1.0;
-	if (j >= nhist2)
-		return 0.0;
-
-	/* Walk the overlapping region of both histograms */
-	while (i < nhist1 && j < nhist2)
-	{
-		double		cur_sel1,
-					cur_sel2;
-		RangeBound	cur_sync;
-		int			cmp;
-
-		cmp = range_cmp_bounds(typcache, &hist1[i], &hist2[j]);
-		if (cmp < 0)
-			cur_sync = hist1[i++];
-		else if (cmp > 0)
-			cur_sync = hist2[j++];
-		else
-		{
-			/* Equal bounds: advance both */
-			cur_sync = hist1[i];
-			i++;
-			j++;
-		}
-		cur_sel1 = calc_hist_selectivity_scalar(typcache, &cur_sync,
-												hist1, nhist1, false);
-		cur_sel2 = calc_hist_selectivity_scalar(typcache, &cur_sync,
-												hist2, nhist2, false);
-
-		/* Skip the first iteration (no previous point yet) */
-		if (prev_sel1 >= 0)
-			selectivity += (prev_sel1 + cur_sel1) * (cur_sel2 - prev_sel2);
-
-		prev_sel1 = cur_sel1;
-		prev_sel2 = cur_sel2;
-	}
-
-	/* P(X < Y) = 0.5 * Sum(...) */
-	selectivity /= 2;
-
-	/* Include remainder of hist2 if hist1 was exhausted first */
-	if (j < nhist2)
-		selectivity += 1 - prev_sel2;
-
-	return selectivity;
-}
-
 /*
  * multirangejoinsel -- join selectivity for multirange operators
  *
diff --git a/src/backend/utils/adt/rangetypes_selfuncs.c b/src/backend/utils/adt/rangetypes_selfuncs.c
index cc702f28610..4f4baa7dc1a 100644
--- a/src/backend/utils/adt/rangetypes_selfuncs.c
+++ b/src/backend/utils/adt/rangetypes_selfuncs.c
@@ -26,6 +26,7 @@
 #include "utils/fmgrprotos.h"
 #include "utils/lsyscache.h"
 #include "utils/rangetypes.h"
+#include "utils/rangetypes_selfuncs.h"
 #include "utils/selfuncs.h"
 #include "utils/typcache.h"
 
@@ -35,29 +36,6 @@ static double default_range_selectivity(Oid operator);
 static double calc_hist_selectivity(TypeCacheEntry *typcache,
 									VariableStatData *vardata, const RangeType *constval,
 									Oid operator);
-static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
-										   const RangeBound *constbound,
-										   const RangeBound *hist, int hist_nvalues,
-										   bool equal);
-static int	rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value,
-						   const RangeBound *hist, int hist_length, bool equal);
-static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value,
-						   const RangeBound *hist1, const RangeBound *hist2);
-static float8 get_len_position(double value, double hist1, double hist2);
-static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1,
-						   const RangeBound *bound2);
-static int	length_hist_bsearch(const Datum *length_hist_values,
-								int length_hist_nvalues, double value, bool equal);
-static double calc_length_hist_frac(const Datum *length_hist_values,
-									int length_hist_nvalues, double length1, double length2, bool equal);
-static double calc_hist_selectivity_contained(TypeCacheEntry *typcache,
-											  const RangeBound *lower, RangeBound *upper,
-											  const RangeBound *hist_lower, int hist_nvalues,
-											  const Datum *length_hist_values, int length_hist_nvalues);
-static double calc_hist_selectivity_contains(TypeCacheEntry *typcache,
-											 const RangeBound *lower, const RangeBound *upper,
-											 const RangeBound *hist_lower, int hist_nvalues,
-											 const Datum *length_hist_values, int length_hist_nvalues);
 
 /*
  * Returns a default selectivity estimate for given operator, when we don't
@@ -592,7 +570,7 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
  * Look up the fraction of values less than (or equal, if 'equal' argument
  * is true) a given const in a histogram of range bounds.
  */
-static double
+double
 calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound,
 							 const RangeBound *hist, int hist_nvalues, bool equal)
 {
@@ -624,7 +602,7 @@ calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbo
  * goal of this function is to find a histogram bin where to stop
  * interpolation of portion of bounds which are less than or equal to given bound.
  */
-static int
+int
 rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist,
 			   int hist_length, bool equal)
 {
@@ -653,7 +631,7 @@ rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBou
  * all lengths in the histogram are greater than (greater than or equal) the
  * given length, returns -1.
  */
-static int
+int
 length_hist_bsearch(const Datum *length_hist_values, int length_hist_nvalues,
 					double value, bool equal)
 {
@@ -679,7 +657,7 @@ length_hist_bsearch(const Datum *length_hist_values, int length_hist_nvalues,
 /*
  * Get relative position of value in histogram bin in [0,1] range.
  */
-static float8
+float8
 get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1,
 			 const RangeBound *hist2)
 {
@@ -758,7 +736,7 @@ get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound
 /*
  * Get relative position of value in a length histogram bin in [0,1] range.
  */
-static double
+double
 get_len_position(double value, double hist1, double hist2)
 {
 	if (!isinf(hist1) && !isinf(hist2))
@@ -803,7 +781,7 @@ get_len_position(double value, double hist1, double hist2)
 /*
  * Measure distance between two range bounds.
  */
-static float8
+float8
 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)
 {
 	bool		has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
@@ -851,7 +829,7 @@ get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBoun
  * where P(x) is the fraction of tuples with length < x (or length <= x if
  * 'equal' is true).
  */
-static double
+double
 calc_length_hist_frac(const Datum *length_hist_values, int length_hist_nvalues,
 					  double length1, double length2, bool equal)
 {
@@ -1014,7 +992,7 @@ calc_length_hist_frac(const Datum *length_hist_values, int length_hist_nvalues,
  * The caller has already checked that constant lower and upper bounds are
  * finite.
  */
-static double
+double
 calc_hist_selectivity_contained(TypeCacheEntry *typcache,
 								const RangeBound *lower, RangeBound *upper,
 								const RangeBound *hist_lower, int hist_nvalues,
@@ -1135,7 +1113,7 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache,
  * the histograms of range lower bounds and range lengths, on the assumption
  * that the range lengths are independent of the lower bounds.
  */
-static double
+double
 calc_hist_selectivity_contains(TypeCacheEntry *typcache,
 							   const RangeBound *lower, const RangeBound *upper,
 							   const RangeBound *hist_lower, int hist_nvalues,
@@ -1230,7 +1208,7 @@ calc_hist_selectivity_contains(TypeCacheEntry *typcache,
  * https://doi.org/10.48550/arXiv.2206.07396
  *
  * hist1 and hist2 are arrays of RangeBound entries from the bounds histograms
- * of two range-typed attributes X and Y, respectively.  Each array has at
+ * of two range- or multirange-typed attributes X and Y, respectively.  Each array has at
  * least 2 entries (one histogram bin).  The entries carry full bound metadata
  * (lower/upper flag, inclusive/exclusive), and all comparisons use
  * range_cmp_bounds() so that bound semantics are preserved.
@@ -1245,7 +1223,7 @@ calc_hist_selectivity_contains(TypeCacheEntry *typcache,
  * overlapping region.  Bounds checks are required because the histograms may
  * be completely disjoint (e.g., all of X is below all of Y).
  */
-static double
+double
 calc_hist_join_selectivity(TypeCacheEntry *typcache,
 						   const RangeBound *hist1, int nhist1,
 						   const RangeBound *hist2, int nhist2)
diff --git a/src/include/utils/rangetypes_selfuncs.h b/src/include/utils/rangetypes_selfuncs.h
new file mode 100644
index 00000000000..be6bda9ab11
--- /dev/null
+++ b/src/include/utils/rangetypes_selfuncs.h
@@ -0,0 +1,54 @@
+/*-------------------------------------------------------------------------
+ *
+ * rangetypes_selfuncs.h
+ *	  Shared helper functions for range and multirange selectivity estimation.
+ *
+ * These functions are defined in rangetypes_selfuncs.c and used by both
+ * rangetypes_selfuncs.c and multirangetypes_selfuncs.c.
+ *
+ * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/rangetypes_selfuncs.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RANGETYPES_SELFUNCS_H
+#define RANGETYPES_SELFUNCS_H
+
+#include "utils/rangetypes.h"
+
+extern double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
+										   const RangeBound *constbound,
+										   const RangeBound *hist, int hist_nvalues,
+										   bool equal);
+extern int	rbound_bsearch(TypeCacheEntry *typcache,
+						   const RangeBound *value, const RangeBound *hist,
+						   int hist_length, bool equal);
+extern int	length_hist_bsearch(const Datum *length_hist_values,
+								int length_hist_nvalues,
+								double value, bool equal);
+extern float8 get_position(TypeCacheEntry *typcache,
+						   const RangeBound *value,
+						   const RangeBound *hist1, const RangeBound *hist2);
+extern double get_len_position(double value, double hist1, double hist2);
+extern float8 get_distance(TypeCacheEntry *typcache,
+						   const RangeBound *bound1, const RangeBound *bound2);
+extern double calc_length_hist_frac(const Datum *length_hist_values,
+									int length_hist_nvalues,
+									double length1, double length2, bool equal);
+extern double calc_hist_selectivity_contained(TypeCacheEntry *typcache,
+											  const RangeBound *lower, RangeBound *upper,
+											  const RangeBound *hist_lower, int hist_nvalues,
+											  const Datum *length_hist_values,
+											  int length_hist_nvalues);
+extern double calc_hist_selectivity_contains(TypeCacheEntry *typcache,
+											 const RangeBound *lower, const RangeBound *upper,
+											 const RangeBound *hist_lower, int hist_nvalues,
+											 const Datum *length_hist_values,
+											 int length_hist_nvalues);
+extern double calc_hist_join_selectivity(TypeCacheEntry *typcache,
+										 const RangeBound *hist1, int nhist1,
+										 const RangeBound *hist2, int nhist2);
+
+#endif							/* RANGETYPES_SELFUNCS_H */
-- 
2.50.1 (Apple Git-155)

