Google "Kylin aggregation group" and the first result is: http://kylin.apache.org/blog/2016/02/18/new-aggregation-group/
On Mon, Apr 3, 2017 at 12:03 PM, [email protected] <[email protected]> wrote: > 你好,少峰: > kylin cube中,无论是使用 aggregation group还是其他cube优化策略,最终得到的都是一系列组合(如:<day_time, > gender>),而这些组合实际上是与 cuboid 唯一对应的。 > 在使用sql查询的时候,如果没有对应的 cuobid,那么查询是失败的(排除 extend、derived的维度组合)。 > > 下图是,apache kylin官网对 aggregation group的解析。按同样的规则,在上封邮件中定义的Cube, > 应该只会产生10种维度组合,即 > <day_time, gender> 576 > <day_time, age> 544 > <day_time, brand> 528 > <day_time, model> 520 > <day_time, resolution> 516 > <day_time, os_version> 514 > <day_time, ntt> 513 > <ts_minute> 256 > <ts_hour> 128 > <day_time> 512 > 对应的 cuboid 为后面的数字。从cube_statistics任务中看,最后只有 1023,516,576,513,528,514, > 544,520 这些组合(查询Hbase Meta表也是这种情况)。 > > 在 《Apache Kylin 权威指南》一书中,有介绍在一些极端情况下(如:precisely define the > cuboids/combinations) aggregation group 的使用方法。 > 所以,我以为目前 kylin 是支持这种定义方法的。 > > > > > > > > > > > ------------------------------ > [email protected] > > > *From:* ShaoFeng Shi <[email protected]> > *Date:* 2017-04-02 22:17 > *To:* user <[email protected]> > *Subject:* Re: How Kylin Cuboid Scheduler Work With Aggregation Groups ? > Hi Bing, > > An aggregation group is a dimension group, or say a sub-cube; it is NOT a > cuboid. > > I guess you want to precisely define the cuboids/combinations, that isn't > supported as in many cases user couldn't list all the combinations they > use. But you can describe them with the agg group / mandatory / joint as > close as possible. > > 2017-03-31 15:49 GMT+08:00 [email protected] <[email protected]>: > >> Hi,all >> I have a Cube, the desc is : >> >> { >> "uuid": "bcf11be2-83e4-497e-9e35-a402460a6446", >> "last_modified": 1490860973892, >> "version": "1.6.0", >> "name": "adx_flow_insight", >> "model_name": "adx_operator", >> "description": "", >> "null_string": null, >> "dimensions": [ >> { >> "name": "GENDER", >> "table": "FLOW_INSIGHT.VIEW_FLOW_INSIGHT", >> "column": "GENDER", >> "derived": null >> }, >> { >> "name": "AGE", >> "table": "FLOW_INSIGHT.VIEW_FLOW_INSIGHT", >> "column": "AGE", >> "derived": null >> }, >> { >> "name": "BRAND", >> "table": "FLOW_INSIGHT.VIEW_FLOW_INSIGHT", >> "column": "BRAND", >> "derived": null >> }, >> { >> "name": "MODEL", >> "table": "FLOW_INSIGHT.VIEW_FLOW_INSIGHT", >> "column": "MODEL", >> "derived": null >> }, >> { >> "name": "RESOLUTION", >> "table": "FLOW_INSIGHT.VIEW_FLOW_INSIGHT", >> "column": "RESOLUTION", >> "derived": null >> }, >> { >> "name": "OS_VERSION", >> "table": "FLOW_INSIGHT.VIEW_FLOW_INSIGHT", >> "column": "OS_VERSION", >> "derived": null >> }, >> { >> "name": "NTT", >> "table": "FLOW_INSIGHT.VIEW_FLOW_INSIGHT", >> "column": "NTT", >> "derived": null >> }, >> { >> "name": "TS_MINUTE", >> "table": "FLOW_INSIGHT.VIEW_FLOW_INSIGHT", >> "column": "TS_MINUTE", >> "derived": null >> }, >> { >> "name": "TS_HOUR", >> "table": "FLOW_INSIGHT.VIEW_FLOW_INSIGHT", >> "column": "TS_HOUR", >> "derived": null >> }, >> { >> "name": "DAY_TIME", >> "table": "FLOW_INSIGHT.VIEW_FLOW_INSIGHT", >> "column": "DAY_TIME", >> "derived": null >> } >> ], >> "measures": [ >> { >> "name": "_COUNT_", >> "function": { >> "expression": "COUNT", >> "parameter": { >> "type": "constant", >> "value": "1", >> "next_parameter": null >> }, >> "returntype": "bigint" >> }, >> "dependent_measure_ref": null >> }, >> { >> "name": "REQUEST_PV", >> "function": { >> "expression": "SUM", >> "parameter": { >> "type": "column", >> "value": "REQUEST", >> "next_parameter": null >> }, >> "returntype": "bigint" >> }, >> "dependent_measure_ref": null >> }, >> { >> "name": "IMPRESS_PV", >> "function": { >> "expression": "SUM", >> "parameter": { >> "type": "column", >> "value": "IMPRESS", >> "next_parameter": null >> }, >> "returntype": "bigint" >> }, >> "dependent_measure_ref": null >> }, >> { >> "name": "CLICK_PV", >> "function": { >> "expression": "SUM", >> "parameter": { >> "type": "column", >> "value": "CLICK", >> "next_parameter": null >> }, >> "returntype": "bigint" >> }, >> "dependent_measure_ref": null >> }, >> { >> "name": "FILL_PV", >> "function": { >> "expression": "SUM", >> "parameter": { >> "type": "column", >> "value": "FILL", >> "next_parameter": null >> }, >> "returntype": "bigint" >> }, >> "dependent_measure_ref": null >> }, >> { >> "name": "UV_DID", >> "function": { >> "expression": "COUNT_DISTINCT", >> "parameter": { >> "type": "column", >> "value": "DID", >> "next_parameter": null >> }, >> "returntype": "hllc(10)" >> }, >> "dependent_measure_ref": null >> } >> ], >> "dictionaries": [], >> "rowkey": { >> "rowkey_columns": [ >> { >> "column": "DAY_TIME", >> "encoding": "date", >> "isShardBy": false >> }, >> { >> "column": "TS_MINUTE", >> "encoding": "integer:4", >> "isShardBy": false >> }, >> { >> "column": "TS_HOUR", >> "encoding": "integer:4", >> "isShardBy": false >> }, >> { >> "column": "GENDER", >> "encoding": "dict", >> "isShardBy": false >> }, >> { >> "column": "AGE", >> "encoding": "dict", >> "isShardBy": false >> }, >> { >> "column": "BRAND", >> "encoding": "dict", >> "isShardBy": false >> }, >> { >> "column": "MODEL", >> "encoding": "dict", >> "isShardBy": false >> }, >> { >> "column": "RESOLUTION", >> "encoding": "dict", >> "isShardBy": false >> }, >> { >> "column": "OS_VERSION", >> "encoding": "dict", >> "isShardBy": false >> }, >> { >> "column": "NTT", >> "encoding": "dict", >> "isShardBy": false >> } >> ] >> }, >> "hbase_mapping": { >> "column_family": [ >> { >> "name": "F1", >> "columns": [ >> { >> "qualifier": "M", >> "measure_refs": [ >> "_COUNT_", >> "REQUEST_PV", >> "IMPRESS_PV", >> "CLICK_PV", >> "FILL_PV" >> ] >> } >> ] >> }, >> { >> "name": "F2", >> "columns": [ >> { >> "qualifier": "M", >> "measure_refs": [ >> "UV_DID" >> ] >> } >> ] >> } >> ] >> }, >> "aggregation_groups": [ >> { >> "includes": [ >> "DAY_TIME", >> "GENDER" >> ], >> "select_rule": { >> "hierarchy_dims": [], >> "mandatory_dims": [ >> "DAY_TIME" >> ], >> "joint_dims": [] >> } >> }, >> { >> "includes": [ >> "DAY_TIME", >> "AGE" >> ], >> "select_rule": { >> "hierarchy_dims": [], >> "mandatory_dims": [ >> "DAY_TIME" >> ], >> "joint_dims": [] >> } >> }, >> { >> "includes": [ >> "DAY_TIME", >> "BRAND" >> ], >> "select_rule": { >> "hierarchy_dims": [], >> "mandatory_dims": [ >> "DAY_TIME" >> ], >> "joint_dims": [] >> } >> }, >> { >> "includes": [ >> "DAY_TIME", >> "MODEL" >> ], >> "select_rule": { >> "hierarchy_dims": [], >> "mandatory_dims": [ >> "DAY_TIME" >> ], >> "joint_dims": [] >> } >> }, >> { >> "includes": [ >> "DAY_TIME", >> "RESOLUTION" >> ], >> "select_rule": { >> "hierarchy_dims": [], >> "mandatory_dims": [ >> "RESOLUTION" >> ], >> "joint_dims": [] >> } >> }, >> { >> "includes": [ >> "DAY_TIME", >> "OS_VERSION" >> ], >> "select_rule": { >> "hierarchy_dims": [], >> "mandatory_dims": [ >> "DAY_TIME" >> ], >> "joint_dims": [] >> } >> }, >> { >> "includes": [ >> "DAY_TIME", >> "NTT" >> ], >> "select_rule": { >> "hierarchy_dims": [], >> "mandatory_dims": [ >> "DAY_TIME" >> ], >> "joint_dims": [] >> } >> }, >> { >> "includes": [ >> "TS_MINUTE" >> ], >> "select_rule": { >> "hierarchy_dims": [], >> "mandatory_dims": [ >> "TS_MINUTE" >> ], >> "joint_dims": [] >> } >> }, >> { >> "includes": [ >> "TS_HOUR" >> ], >> "select_rule": { >> "hierarchy_dims": [], >> "mandatory_dims": [ >> "TS_HOUR" >> ], >> "joint_dims": [] >> } >> } >> ], >> "signature": "DSSmByHn2sATiETlBdjANQ==", >> "notify_list": [], >> "status_need_notify": [ >> "ERROR", >> "DISCARDED", >> "SUCCEED" >> ], >> "partition_date_start": 1488326400000, >> "partition_date_end": 3153600000000, >> "auto_merge_time_ranges": [ >> 604800000, >> 2419200000 >> ], >> "retention_range": 0, >> "engine_type": 2, >> "storage_type": 2, >> "override_kylin_properties": { >> "kylin.job.mr.config.override.mapreduce.job.queuename": "ad" >> } >> >> } >> >> There have 10 dims, and use aggregation groups. I want Cube only >> contains 10 combs: >> <day_time, gender> 576 >> <day_time, age> 544 >> <day_time, brand> 528 >> <day_time, model> 520 >> <day_time, resolution> 516 >> <day_time, os_version> 514 >> <day_time, ntt> 513 >> <ts_minute> 256 >> <ts_hour> 128 >> <day_time> 512 >> >> But the Cuboid Scheduler parse as follower: >> >> 2017-03-31 15:32:47,735 (main) [INFO - org..apache.kylin.cub >> e.CubeManager.loadAllCubeInstance(CubeManager.java:908)] >> Loaded 4 cubes, fail on 0 cubes >> 1023 >> 516 >> 576 >> 513 >> 528 >> 514 >> 544 >> 520 >> 2017-03-31 15:32:47,742 (Thread-0) [INFO - org.apache.hadoop >> .hbase.client.ConnectionManager$HConnectionImplementation.cl >> oseMasterService(ConnectionManager.java:2259)] Closing >> master protocol: MasterService >> >> Question: >> How Aggregation Groups Work? I can not set single dim in >> aggregation? >> >> Thanks for you suggestion~~~ >> >> ------------------------------ >> [email protected] <[email protected]> >> > > > > -- > Best regards, > > Shaofeng Shi 史少锋 > >
