On Mon, 25 Aug 2003, Stephan Szabo wrote:

> Date: Mon, 25 Aug 2003 00:43:56 -0700 (PDT)
> From: Stephan Szabo <[EMAIL PROTECTED]>
> To: mike <[EMAIL PROTECTED]>
> Cc: [EMAIL PROTECTED]
> Subject: Re: [BUGS] index not used afer VACUUM ANALYZE
>
> On Thu, 21 Aug 2003, mike wrote:
>
> > Hi,
> > I hav a db as specified in nit.sql
> > flows has 763488 entries.
> >
> > After dropping/creating/loading the db and running auswert.sh I get
> > the attached result from query1.txt.
> > After 'VACUUM ANALYZE' I get the results from query2.txt
> >
> > As you can see, the indexes are not used any longer.
> > Why?
>
> It looks like the row estimates changed to say that a large % of the rows
> match the condition.  Is that true?  In any case, what does EXPLAIN

Partially.
I have statistical records (763488) - various IP-Traffic - collected for one
month.
After collection I try to condense the data for dayly statistics.

The EXPLAIN ANALYZE output is attached:
a1.txt is before, a2.txt after VACUUM ANALYZE run.



Bye/2
---
Michael Reifenberger, Business Unit Manager SAP-Basis, Plaut Consulting
Comp: [EMAIL PROTECTED] | Priv: [EMAIL PROTECTED]
      http://www.plaut.de           |       http://www.Reifenberger.com
#
# Anzahl Sätze in der DB, von Datum, bis Datum, Anzahl Tage
#
                                                     QUERY PLAN                        
                             
--------------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=41037.49..41037.50 rows=1 width=4) (actual time=6307.81..6307.82 
rows=1 loops=1)
   ->  Seq Scan on flows  (cost=0.00..31493.88 rows=763488 width=4) (actual 
time=0.02..3134.26 rows=763488 loops=1)
 Total runtime: 6323.81 msec
(3 rows)

#
# Tagesstatistik je Protokoll
#
                                                                   QUERY PLAN          
                                                          
-------------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=14991.34..14991.66 rows=131 width=52) (actual time=16473.23..16473.77 
rows=159 loops=1)
   Sort Key: f.date, (sum(f.sdbytes) + sum(f.dsbytes))
   ->  HashAggregate  (cost=14984.76..14986.73 rows=131 width=52) (actual 
time=16471.51..16472.41 rows=159 loops=1)
         ->  Hash Join  (cost=2.62..14947.53 rows=2482 width=52) (actual 
time=1.53..11924.14 rows=753800 loops=1)
               Hash Cond: ("outer"."type" = "inner".pnum)
               ->  Index Scan using flows_2 on flows f  (cost=0.00..14786.46 rows=3818 
width=24) (actual time=0.15..4180.55 rows=753800 loops=1)
                     Index Cond: ((date >= '2003-07-01'::date) AND (date <= 
'2003-08-01'::date))
               ->  Hash  (cost=2.30..2.30 rows=130 width=36) (actual time=1.17..1.17 
rows=0 loops=1)
                     ->  Seq Scan on protos p  (cost=0.00..2.30 rows=130 width=36) 
(actual time=0.06..0.62 rows=130 loops=1)
 Total runtime: 16499.55 msec
(10 rows)

#
# Tagesstatistik gesamt
#
                                                                QUERY PLAN             
                                                    
-------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=14834.22..14834.23 rows=2 width=20) (actual time=8519.01..8519.12 rows=31 
loops=1)
   Sort Key: date, (sum(sdbytes) + sum(dsbytes))
   ->  GroupAggregate  (cost=0.00..14834.21 rows=2 width=20) (actual 
time=309.70..8518.71 rows=31 loops=1)
         ->  Index Scan using flows_2 on flows f  (cost=0.00..14786.46 rows=3818 
width=20) (actual time=0.19..3989.96 rows=753800 loops=1)
               Index Cond: ((date >= '2003-07-01'::date) AND (date <= 
'2003-08-01'::date))
 Total runtime: 8519.59 msec
(6 rows)

#
# Top 50: max( Bytes )
#
                                                                   QUERY PLAN          
                                                          
-------------------------------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=14884.07..14884.20 rows=50 width=72) (actual time=15852.17..15852.70 
rows=50 loops=1)
   ->  Sort  (cost=14884.07..14884.58 rows=201 width=72) (actual 
time=15852.16..15852.34 rows=50 loops=1)
         Sort Key: (sum(sdbytes) + sum(dsbytes))
         ->  HashAggregate  (cost=14872.36..14876.38 rows=201 width=72) (actual 
time=15406.83..15551.54 rows=23410 loops=1)
               ->  Index Scan using flows_2 on flows f  (cost=0.00..14824.64 rows=3818 
width=72) (actual time=0.21..6820.67 rows=753800 loops=1)
                     Index Cond: ((date >= '2003-07-01'::date) AND (date <= 
'2003-08-01'::date))
 Total runtime: 15907.26 msec
(7 rows)

#
# Anzahl Sätze in der DB, von Datum, bis Datum, Anzahl Tage
#
                                                     QUERY PLAN                        
                             
--------------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=41037.49..41037.50 rows=1 width=4) (actual time=6226.68..6226.69 
rows=1 loops=1)
   ->  Seq Scan on flows  (cost=0.00..31493.88 rows=763488 width=4) (actual 
time=0.02..3071.08 rows=763488 loops=1)
 Total runtime: 6227.08 msec
(3 rows)

#
# Tagesstatistik je Protokoll
#
                                                               QUERY PLAN              
                                                  
-----------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=175003.67..175013.77 rows=4038 width=28) (actual time=30314.13..30314.68 
rows=159 loops=1)
   Sort Key: f.date, (sum(f.sdbytes) + sum(f.dsbytes))
   ->  GroupAggregate  (cost=161732.39..174761.81 rows=4038 width=28) (actual 
time=23452.70..30313.26 rows=159 loops=1)
         ->  Sort  (cost=161732.39..163585.08 rows=741077 width=28) (actual 
time=23452.30..26414.32 rows=753800 loops=1)
               Sort Key: f.date, p.pname
               ->  Hash Join  (cost=2.62..46430.11 rows=741077 width=28) (actual 
time=1.31..11960.64 rows=753800 loops=1)
                     Hash Cond: ("outer"."type" = "inner".pnum)
                     ->  Seq Scan on flows f  (cost=0.00..35311.32 rows=741077 
width=24) (actual time=0.07..3909.10 rows=753800 loops=1)
                           Filter: ((date >= '2003-07-01'::date) AND (date <= 
'2003-08-01'::date))
                     ->  Hash  (cost=2.30..2.30 rows=130 width=12) (actual 
time=1.13..1.13 rows=0 loops=1)
                           ->  Seq Scan on protos p  (cost=0.00..2.30 rows=130 
width=12) (actual time=0.05..0.58 rows=130 loops=1)
 Total runtime: 30319.64 msec
(12 rows)

#
# Tagesstatistik gesamt
#
                                                         QUERY PLAN                    
                                      
-----------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=44576.06..44576.14 rows=32 width=20) (actual time=7703.97..7704.07 
rows=31 loops=1)
   Sort Key: date, (sum(sdbytes) + sum(dsbytes))
   ->  HashAggregate  (cost=44574.78..44575.26 rows=32 width=20) (actual 
time=7703.55..7703.72 rows=31 loops=1)
         ->  Seq Scan on flows f  (cost=0.00..35311.32 rows=741077 width=20) (actual 
time=0.07..3450.81 rows=753800 loops=1)
               Filter: ((date >= '2003-07-01'::date) AND (date <= '2003-08-01'::date))
 Total runtime: 7704.81 msec
(6 rows)

#
# Top 50: max( Bytes )
#
                                                               QUERY PLAN              
                                                  
-----------------------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=306484.35..306484.47 rows=50 width=30) (actual time=39336.01..39336.64 
rows=50 loops=1)
   ->  Sort  (cost=306484.35..308337.04 rows=741077 width=30) (actual 
time=39336.00..39336.18 rows=50 loops=1)
         Sort Key: (sum(sdbytes) + sum(dsbytes))
         ->  GroupAggregate  (cost=161634.37..187572.07 rows=741077 width=30) (actual 
time=30577.11..39023.60 rows=23410 loops=1)
               ->  Sort  (cost=161634.37..163487.06 rows=741077 width=30) (actual 
time=30576.95..34875.71 rows=753800 loops=1)
                     Sort Key: network(set_masklen(sip, 16)), network(set_masklen(dip, 
16))
                     ->  Seq Scan on flows f  (cost=0.00..42722.09 rows=741077 
width=30) (actual time=0.12..6544.05 rows=753800 loops=1)
                           Filter: ((date >= '2003-07-01'::date) AND (date <= 
'2003-08-01'::date))
 Total runtime: 39399.83 msec
(9 rows)

vdat='2003-07-01'
bdat='2003-08-01'

# Auswertung tabelleninhalt
echo "#"
echo "# Anzahl Sätze in der DB, von Datum, bis Datum, Anzahl Tage"
echo "#"
psql -U pgsql nitpicker << EOF1
  EXPLAIN ANALYZE select count(*),
  min(date) as Von, max(date) as Bis, max(date)-min(date) as Tage
  from flows;
EOF1

# Auswertung Tage, Protokoll, Bytes, Pakete aufsummiert
echo "#"
echo "# Tagesstatistik je Protokoll"
echo "#"
psql -U pgsql nitpicker << EOF2
EXPLAIN ANALYZE  select f.date as datum, p.pname as Protokoll,
    sum(f.sdbytes) + sum(f.dsbytes) as Bytes,
    sum(f.sdpkgs) + sum(f.dspkgs) as Pakete
  from protos p, flows f
  where
  f.date >= '$vdat' and f.date <= '$bdat' and
  f.type = p.pnum
  group by datum, protokoll
  order by datum, Bytes desc
  ;
EOF2

echo "#"
echo "# Tagesstatistik gesamt"
echo "#"
psql -U pgsql nitpicker << EOF3
EXPLAIN ANALYZE  select f.date as datum,
    sum(f.sdbytes) + sum(f.dsbytes) as Bytes,
    sum(f.sdpkgs) + sum(f.dspkgs) as Pakete
  from flows f
  where
  f.date >= '$vdat' and f.date <= '$bdat'
  group by datum
  order by datum, Bytes desc
  ;
EOF3

echo "#"
echo "# Top 50: max( Bytes )"
echo "#"
psql -U pgsql nitpicker << EOF4
EXPLAIN ANALYZE  select count(*),
  network(set_masklen(sip,16)), network(set_masklen(dip,16)),
  sum(sdbytes) + sum(dsbytes) as bytes
  from flows f
  where
  f.date >= '$vdat' and f.date <= '$bdat'
  group by network(set_masklen(sip,16)), network(set_masklen(dip,16))
  order by bytes desc
  limit 50;
EOF4

#psql -U pgsql nitpicker << EOF
#  select
#  -- n1.nname as src, n2.nname as dst
#  count(*)
#  from flows f, networks n1, networks n2, protos p
#  where
#  f.sip << n1.nnet and
#  f.dip << n2.nnet
#EOF

#psql -U pgsql nitpicker -c "\
#select \
#  f.sip, f.dip, p.pname, \
#  f.sdpkgs, f.dspkgs \
#  from flows f, protos p where \
#  not f.sip << '194.39.177.0/24' and \
#  not f.sip << '194.99.75.0/24' and \
#  not f.sip = '195.212.179.2' and \
#  not f.dip << '194.39.177.0/24' and \
#  not f.dip << '194.99.75.0/24' and \
#  not f.dip = '195.212.179.2' and \
#  f.type = p.pnum \
#  order by sip, dip;"

#psql -U pgsql nitpicker << EOFX
#  drop table volumen;
#  create table volumen (
#    year int4,
#    week int4,
#    snet inet,
#    dnet inet
#  );
#  drop table n_temp;
#  create table n_temp (
#    year int4,
#    week int4,
#    snet inet,
#    dnet inet,
#    sdbytes int8,
#    dsbytes int8,
#    sdpkgs int8,
#    dspkgs int8
#  );
#EOFX
#date
#psql -U pgsql nitpicker << EOF
#  insert into n_temp
#  select
#    date_part( 'year', f.date ) ,
#    date_part( 'week', f.date ),
#    n.nnet, network( set_masklen( f.dip, 16 ) ),
#    sum( f.sdbytes ), sum( f.dsbytes ),
#    sum( f.sdpkgs ), sum( f.dspkgs )
#  from flows f, networks n
#  where
#    f.sip <<= n.nnet and
#    not f.dip <<= n.nnet
#  group by f.date, n.nnet, network( set_masklen( f.dip, 16 ) )
#  union
#  select
#    date_part( 'year', f.date ) ,
#    date_part( 'week', f.date ),
#    n.nnet, network( set_masklen( f.sip, 16 ) ),
#    sum( f.dsbytes ), sum( f.sdbytes ),
#    sum( f.dspkgs ), sum( f.sdpkgs )
#  from flows f, networks n
#  where
#    f.dip <<= n.nnet and
#    not f.sip <<= n.nnet
#  group by f.date, n.nnet, network( set_masklen( f.sip, 16 ) )
#  union
#  select
#    date_part( 'year', f.date ) ,
#    date_part( 'week', f.date ),
#    network( set_masklen( f.sip, 16 ) ), network( set_masklen( f.dip, 16 ) ),
#    sum( f.sdbytes ), sum( f.dsbytes ),
#    sum( f.sdpkgs ), sum( f.dspkgs )
#  from flows f, networks n
#  where
#    not f.sip <<= n.nnet and
#    not f.dip <<= n.nnet
#  group by f.date, network( set_masklen( f.sip, 16 ) ), network( set_masklen( f.dip, 
16 ) )
#EOF
#date
#  f.date >= '2003-03-28' and f.date <= '2003-04-01'
---------------------------(end of broadcast)---------------------------
TIP 3: if posting/reading through Usenet, please send an appropriate
      subscribe-nomail command to [EMAIL PROTECTED] so that your
      message can get through to the mailing list cleanly

Reply via email to