On Fri, Jan 19, 2024 at 7:03 PM Daniel Gustafsson <dan...@yesql.se> wrote:

> > On 19 Jan 2024, at 23:09, Kirk Wolak <wol...@gmail.com> wrote:
>
> >  From a FUTURE email, I noticed pg_jit_available() and it's set to f??
>
> Right, then this installation does not contain the necessary library to JIT
> compile the query.
>
> > Okay, so does this require a special BUILD command?
>
> Yes, it requires that you compile with --with-llvm.  If you don't have
> llvm in
> the PATH you might need to set LLVM_CONFIG to point to your llvm-config
> binary.
> With autotools that would be something like:
>
>     ./configure <other params> --with-llvm LLVM_CONFIG=/path/to/llvm-config
>
> --
> Daniel Gustafsson
>

Thank you, that made it possible to build and run...
UNFORTUNATELY this has a CLEAR memory leak (visible in htop)
I am watching it already consuming 6% of my system memory.

I am re-attaching my script.  WHICH includes the settings to FORCE JIT.
It also does an EXPLAIN so you can verify that JIT is on (this is what I
added/noticed!)
And it takes over 20 minutes to get this far.  It's still running.
I am re-attaching the script. (as I tweaked it).

This creates 90 million rows of data, so it takes a while.
I BELIEVE that it consumes far less memory if you do not fetch any rows (I
had problems reproducing it if no rows were fetched).
So, this may be beyond the planning stages.

Thanks,

Kirk Out!
CREATE TABLE pg_temp.parts
(
    seid             bigint,
    r_field_name_1   smallint,
    fr_field_name    smallint           NOT NULL,
    p1_field_name    varchar(4),
    qty_field_name   integer,
    p5_field_name    varchar(30),
    partnum          varchar(30),
    st_field_name    smallint DEFAULT 0 NOT NULL
); -- drop table pg_temp.parts;

INSERT INTO pg_temp.parts (seid, partnum, qty_field_name, fr_field_name, 
st_field_name)
SELECT (RANDOM() * 500 + 1)::bigint                                   AS seid,
       trunc(RANDOM() * 10000)::text                                         AS 
partnum,
       CASE
           WHEN q.rnd BETWEEN 0 AND 0.45 THEN FLOOR(RANDOM() * 900) + 100 -- 
Random number in the range [100, 999]
           WHEN q.rnd BETWEEN 0.46 AND 0.96 THEN LEAST(TRUNC(FLOOR(RANDOM() * 
999999) + 1000)::int, 999999::int) -- Random number in the range [1000, 9999]
           ELSE FLOOR(RANDOM() * 9000000) + 1000000 -- Random number in the 
range [100000, 999999]
           END                                                         AS 
qty_field_name,
       CASE WHEN RANDOM() < 0.72 THEN 0::smallint ELSE 1::smallint END AS 
fr_field_name,
       CASE WHEN RANDOM() < 0.46 THEN 1::smallint ELSE 2::smallint END AS 
st_field_name
  FROM (SELECT RANDOM() AS rnd, x FROM GENERATE_SERIES(1, 90_000_000) x) q;

CREATE INDEX idx_parts_supid ON pg_temp.parts USING btree (seid, p1_field_name, 
partnum, st_field_name, r_field_name_1, qty_field_name);
CREATE INDEX idx_parts_p5 ON pg_temp.parts USING btree (p5_field_name, seid, 
st_field_name, r_field_name_1, p1_field_name);
CREATE INDEX idx_parts_partnum ON pg_temp.parts USING btree (partnum, seid, 
st_field_name, r_field_name_1, p1_field_name);

CREATE OR REPLACE FUNCTION pg_temp.fx(asupplier bigint = 497 )
    RETURNS void
    LANGUAGE plpgsql
AS
$function$
DECLARE
    supplier_parts       CURSOR (sid bigint) FOR  -- Again, selecting with 
COUNT() would reduce 1 query per row!
        SELECT
            partnum, qty_field_name, st_field_name, sum(qty_field_name) as qty
            FROM pg_temp.parts
            WHERE seid = sid AND (st_field_name = 1)
            GROUP BY partnum, qty_field_name, st_field_name
            ORDER BY partnum, qty_field_name, st_field_name;
    supplier_part_qty_matches CURSOR (sid bigint, pnum varchar(30), pqty 
bigint) FOR
        SELECT DISTINCT
            seid, fr_field_name, partnum, st_field_name
            FROM pg_temp.parts
            WHERE seid <> sid AND partnum = pnum AND qty_field_name = pqty
            ORDER BY seid, partnum;

    a_partnum     varchar(30);
    a_qty         integer;
    a_st          smallint;
    a_cnt         integer = 0;
    b_partnum     varchar(30);
    b_fr          smallint;
    b_seid        bigint;
    b_st          smallint;
    b_cnt         bigint = 0;
BEGIN
    RAISE NOTICE '%', (SELECT (PG_SIZE_PRETTY(SUM(used_bytes)), 
PG_SIZE_PRETTY(SUM(total_bytes)), PG_SIZE_PRETTY(SUM(free_bytes))) FROM 
pg_get_backend_memory_contexts());
    OPEN supplier_parts (asupplier);
    LOOP
        FETCH supplier_parts INTO a_partnum, a_qty, a_st, a_qty;
        EXIT WHEN NOT FOUND;
        a_cnt := a_cnt + 1;
        OPEN supplier_part_qty_matches (sid := asupplier, pnum := a_partnum, 
pqty := a_qty);
        LOOP
            FETCH supplier_part_qty_matches INTO b_seid, b_fr, b_partnum, b_st;
            b_cnt := b_cnt + 1;
            EXIT WHEN TRUE;  -- no Need to loop here  One FETCH per query 
triggers the losses.
        END LOOP;
        CLOSE supplier_part_qty_matches;
    END LOOP;
    CLOSE supplier_parts;
    RAISE NOTICE '-----------after close, Count a: %, count b: %', a_cnt, b_cnt;
    RAISE NOTICE '%', (SELECT (PG_SIZE_PRETTY(SUM(used_bytes)), 
PG_SIZE_PRETTY(SUM(total_bytes)), PG_SIZE_PRETTY(SUM(free_bytes))) FROM 
pg_get_backend_memory_contexts());
    --perform meminfo();
END;
$function$;

SET jit = on;
SET jit_above_cost = 0;
SET jit_inline_above_cost = 0;
SET jit_optimize_above_cost = 0;

-- This will use JIT until the table is analyzed, which causes the problem
explain SELECT DISTINCT seid, fr_field_name, st_field_name
          FROM pg_temp.parts
         WHERE seid <> 497 AND partnum >= '1'
         ORDER BY seid;

-- But using JIT results in loss of some bytes.
SELECT pg_temp.fx(497);


Reply via email to