query using stats

Navdeep Agrawal Mon, 13 Oct 2014 00:52:39 -0700

Hi ,
I am trying to run query using stats with following flags as set,but it is 
always running map reduce job instead of getting direct result from metastore 
(Hive 0.13.0.2.1.2.1-471).can some please suggest me to run optimized query or 
a workaround it .
Thanks in advance


Set hive.stats.reliable=true
Set hive.fetch.column.stats=true
Set hive.stats.fetch.partition.stats=true
Set hive.compute.query.using.stats=true
query
explain select max(load_inst_id) from ndnslog_orc1 where query_date > 
'2012-01-01';//I have also tried explain select max(load_inst_id) from 
ndnslog_orc1 where query_date  = '2013-01-01' but same result as I have only 
one partition .
OK
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: ndnslog_orc1
            filterExpr: (query_date > '2012-01-01') (type: boolean)
            Statistics: Num rows: 331575 Data size: 2652601 Basic stats: 
COMPLETE Column stats: NONE
            Select Operator
              expressions: load_inst_id (type: bigint)
              outputColumnNames: load_inst_id
              Statistics: Num rows: 331575 Data size: 2652601 Basic stats: 
COMPLETE Column stats: NONE
              Group By Operator
                aggregations: max(load_inst_id)
                mode: hash
                outputColumnNames: _col0
                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                Reduce Output Operator
                  sort order:
                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                  value expressions: _col0 (type: bigint)
      Reduce Operator Tree:
        Group By Operator
          aggregations: max(VALUE._col0)
          mode: mergepartial
          outputColumnNames: _col0
          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
          Select Operator
            expressions: _col0 (type: bigint)
            outputColumnNames: _col0
            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
            File Output Operator
              compressed: false
              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1


where I had already run analyze statement as I can see statistics in hive meta 
store.

My insert statement is

set hive.stats.autogather=false;
set hive.fetch.column.stats=true;
set hive.stats.fetch.partition.stats=true;
set hive.compute.query.using.stats=true;
set hive.cbo.enable=true;
--set mapred.reduce.tasks=20;
--set hive.exec.reducers.max=100;
--set mapreduce.job.reduce.slowstart.completedmaps=0.95;
--set mapreduce.task.io.sort.mb=1600;
set mapred.job.queue.name=queue1;

use ${database};
--use ssa;

insert into table ${managed_table} partition (query_date)
select
${hivevar:LOADID} as load_inst_id,
src_filename,
from_unixtime(query_time) as query_time,
client_ip,
query_type,
query_domain,
response_domain,
ttl,
response_type,
response_info,
to_date(from_unixtime(query_time)) as query_date
from  ${external_table} ;

/Navdeep

query using stats

Reply via email to