----------------------------------------------------------- This is an automatically generated e-mail. To reply, visit: https://reviews.apache.org/r/785/#review719 -----------------------------------------------------------
trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsAggregator.java <https://reviews.apache.org/r/785/#comment1458> Isn't isValidStatics() should take "key" as a parameter rather than "rowID"? "key" should indicate which statistics this is right? trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsPublisher.java <https://reviews.apache.org/r/785/#comment1459> should be >= here - Ning On 2011-05-26 02:52:55, Tomasz Nykiel wrote: > > ----------------------------------------------------------- > This is an automatically generated e-mail. To reply, visit: > https://reviews.apache.org/r/785/ > ----------------------------------------------------------- > > (Updated 2011-05-26 02:52:55) > > > Review request for hive. > > > Summary > ------- > > Currently, when executing INSERT OVERWRITE and ANALYZE TABLE commands we > collect statistics about the number of rows per partition/table. > Other statistics (e.g., total table/partition size) are derived from the file > system. > > We introduce a new feature for collecting information about the sizes of > uncompressed data, to be able to determine the efficiency of compression. > On top of adding the new statistic collected, this patch extends the stats > collection mechanism, so any new statistics could be added easily. > > 1. serializer/deserializer classes are amended to accommodate collecting > sizes of uncompressed data, when serializing/deserializing objects. > We support: > > Columnar SerDe > LazySimpleSerDe > LazyBinarySerDe > > For other SerDe classes the uncompressed siez will be 0. > > 2. StatsPublisher / StatsAggregator interfaces are extended to support > multi-stats collection for both JDBC and HBase. > > 3. For both INSERT OVERWRITE and ANALYZE statements, FileSinkOperator and > TableScanOperator respectively are extended to support multi-stats collection. > > (2) and (3) enable easy extension for other types of statistics. > > 4. Collecting uncompressed size can be disabled by setting: > > hive.stats.collect.uncompressedsize = false > > > This addresses bug HIVE-2185. > https://issues.apache.org/jira/browse/HIVE-2185 > > > Diffs > ----- > > trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 1127756 > > trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java > 1127756 > > trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/TypedBytesSerDe.java > 1127756 > > trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/s3/S3LogDeserializer.java > 1127756 > trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java > 1127756 > > trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsAggregator.java > 1127756 > > trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsPublisher.java > 1127756 > > trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsSetupConstants.java > 1127756 > > trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsUtils.java > PRE-CREATION > trunk/hbase-handler/src/test/queries/hbase_stats.q 1127756 > trunk/hbase-handler/src/test/results/hbase_stats.q.out 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java > 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Stat.java 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java > 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java > 1127756 > > trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java > 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java > 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsAggregator.java > 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsPublisher.java > 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsSetupConst.java > 1127756 > > trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java > 1127756 > > trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java > 1127756 > > trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java > 1127756 > trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java > PRE-CREATION > trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisher.java > 1127756 > > trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisherEnhanced.java > PRE-CREATION > trunk/ql/src/test/org/apache/hadoop/hive/serde2/TestSerDe.java 1127756 > trunk/ql/src/test/queries/clientpositive/stats14.q PRE-CREATION > trunk/ql/src/test/queries/clientpositive/stats15.q PRE-CREATION > trunk/ql/src/test/results/clientpositive/bucketmapjoin1.q.out 1127756 > trunk/ql/src/test/results/clientpositive/bucketmapjoin2.q.out 1127756 > trunk/ql/src/test/results/clientpositive/bucketmapjoin3.q.out 1127756 > trunk/ql/src/test/results/clientpositive/bucketmapjoin4.q.out 1127756 > trunk/ql/src/test/results/clientpositive/bucketmapjoin5.q.out 1127756 > trunk/ql/src/test/results/clientpositive/combine2.q.out 1127756 > trunk/ql/src/test/results/clientpositive/filter_join_breaktask.q.out > 1127756 > trunk/ql/src/test/results/clientpositive/join_map_ppr.q.out 1127756 > trunk/ql/src/test/results/clientpositive/merge3.q.out 1127756 > trunk/ql/src/test/results/clientpositive/merge4.q.out 1127756 > trunk/ql/src/test/results/clientpositive/pcr.q.out 1127756 > trunk/ql/src/test/results/clientpositive/sample10.q.out 1127756 > trunk/ql/src/test/results/clientpositive/stats11.q.out 1127756 > trunk/ql/src/test/results/clientpositive/stats14.q.out PRE-CREATION > trunk/ql/src/test/results/clientpositive/stats15.q.out PRE-CREATION > trunk/ql/src/test/results/clientpositive/union22.q.out 1127756 > trunk/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java > 1127756 > > trunk/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java > 1127756 > trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeStats.java > PRE-CREATION > trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeStatsStruct.java > PRE-CREATION > trunk/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java 1127756 > trunk/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java 1127756 > > trunk/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java > 1127756 > > trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java > 1127756 > > trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java > 1127756 > > trunk/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDe.java > 1127756 > > trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java > 1127756 > trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java > 1127756 > > trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java > 1127756 > > trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java > 1127756 > > trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java > 1127756 > trunk/serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java > PRE-CREATION > > Diff: https://reviews.apache.org/r/785/diff > > > Testing > ------- > > - additional JUnit test for Serializer/Deserializer amended classes > - additional queries for TestCliDriver over multi-partition tables > - all other JUnit tests > - standalone setup > > > Thanks, > > Tomasz > >