Hi Tongxin,

Are these dimensions’ cardinality very very small?
"PWDTYPE”,
“IS_NEW_CARD”,
"REQ_ORDER_YEWU_TYPE”,
"ORDER_YEWU_TYPE”,


If yes, try to put them in one joint aggr group. This may help to reduce the 
expansion rate.


Thanks,
Dong Li


Original Message
Sender:仇同心[email protected]
Recipient:[email protected][email protected]; 
[email protected][email protected]
Date:Monday, Nov 28, 2016 12:52
Subject:Cube optimization for help


Hi,all
There is a cube optimization for help.
Cuhe has 15 dimensions, including 14 normal dimensions and 1 derived dimension, 
and the cardinality of all dimensions is not high;And this cube also has 10 
measures, including 1 count expression,2 sum expressions and 7 COUNT_DISTINCT 
expressions, and the cardinality of all measures’s column is more than one 
hundred million, the Return Type of 7 COUNT_DISTINCT expressions is hllc(14).

I founded that the step of “Build N - Dimension Cuboid Data” cost most of the 
time, I hope you can provide me some optimization plan!

The info about the segment: Start Time: 2016-11-10 00:00:00 End Time: 
2016-11-24 00:00:00

Source Records: 201,027,127
Cube Size: 1.73 TB
Expansion Rate:14025.91%
The Duration of this job was 1,961.62 mins
The detailed build steps of job :
 2016-11-25 11:15:24 GMT+8
 #1 Step Name: Create Intermediate Flat Hive Table
 Duration: 1.57 mins

2016-11-25 11:16:58 GMT+8
 #2 Step Name: Redistribute Flat Hive Table
 Duration: 3.32 mins

2016-11-25 11:20:17 GMT+8
 #3 Step Name: Extract Fact Table Distinct Columns
 Data Size: 8.57 MB
Duration: 19.14 mins

2016-11-25 11:39:26 GMT+8
 #4 Step Name: Build Dimension Dictionary
 Duration: 0.03 mins
2016-11-25 11:39:27 GMT+8
 #5 Step Name: Save Cuboid Statistics
 Duration: 0.01 mins

2016-11-25 11:39:28 GMT+8
 #6 Step Name: Create HTable
 Duration: 0.07 mins
2016-11-25 11:39:32 GMT+8
 #7 Step Name: Build Base Cuboid Data
 Data Size: 2.53 GB
Duration: 14.59 mins

2016-11-25 11:54:08 GMT+8
 #8 Step Name: Build N-Dimension Cuboid Data : 13-Dimension
 Data Size: 2.31 GB
Duration: 31.27 mins

2016-11-25 12:25:24 GMT+8
 #9 Step Name: Build N-Dimension Cuboid Data : 12-Dimension
 Data Size: 17.81 GB
Duration: 430.54 mins

2016-11-25 19:35:56 GMT+8
#10 Step Name: Build N-Dimension Cuboid Data : 11-Dimension
 Data Size: 62.58 GB
Duration: 273.30 mins

2016-11-26 00:09:15 GMT+8
 #11 Step Name: Build N-Dimension Cuboid Data : 10-Dimension
 Data Size: 135.99 GB
Duration: 138.05 mins

2016-11-26 02:27:18 GMT+8
 #12 Step Name: Build N-Dimension Cuboid Data : 9-Dimension
 Data Size: 211.91 GB
Duration: 313.53 mins

2016-11-26 07:40:49 GMT+8
 #13 Step Name: Build N-Dimension Cuboid Data : 8-Dimension
 Data Size: 252.22 GB
Duration: 278.18 mins

2016-11-26 12:19:01 GMT+8
 #14 Step Name: Build N-Dimension Cuboid Data : 7-Dimension
 Data Size: 234.18 GB
Duration: 168.91 mins

2016-11-26 15:07:55 GMT+8
#15 Step Name: Build N-Dimension Cuboid Data : 6-Dimension
Data Size: 169.02 GB
Duration: 96.18 mins

2016-11-26 16:44:06 GMT+8
#16 Step Name: Build N-Dimension Cuboid Data : 5-Dimension
Data Size: 92.81 GB
Duration: 50.24 mins

2016-11-26 17:34:20 GMT+8
#17 Step Name: Build N-Dimension Cuboid Data : 4-Dimension
Data Size: 36.45 GB
Duration: 16.21 mins

2016-11-26 17:50:33 GMT+8
#18 Step Name: Build N-Dimension Cuboid Data : 3-Dimension
Data Size: 8.91 GB
Duration: 7.75 mins

2016-11-26 17:58:18 GMT+8
#19 Step Name: Build N-Dimension Cuboid Data : 2-Dimension
Data Size: 1.00 GB
Duration: 3.08 mins

2016-11-26 18:01:23 GMT+8
#20 Step Name: Build N-Dimension Cuboid Data : 1-Dimension
Data Size: 0.12 KB
Duration: 0.57 mins

2016-11-26 18:01:57 GMT+8
#21 Step Name: Build Cube
Duration: 0.00 mins
2016-11-26 18:01:57 GMT+8
#22 Step Name: Convert Cuboid Data to HFile
Data Size: 1.73 TB
Duration: 114.57 mins

2016-11-26 19:56:31 GMT+8
#23 Step Name: Load HFile to HBase Table
Duration: 0.38 mins
2016-11-26 19:56:54 GMT+8
#24 Step Name: Update Cube Info
Duration: 0.00 mins

2016-11-26 19:56:54 GMT+8
#25 Step Name: Garbage Collection
Duration: 0.12 mins


The following is the json of the cube:

{
 "uuid": "ab54fdc8-253d-430e-a193-4c17bbfd0872",
 "last_modified": 1480039756561,
 "version": "1.6.0",
 "name": "cube_pay_syt_order_det_analysis_new",
 "model_name": "pay_syt_order_det_analysis",
 "description": "",
 "null_string": null,
 "dimensions": [
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ETL_DT",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "ETL_DT",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.PWDTYPE",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "PWDTYPE",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.REAL_NAME",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "REAL_NAME",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.IS_NEW_CARD",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "IS_NEW_CARD",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.REQ_ORDER_YEWU_TYPE",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "REQ_ORDER_YEWU_TYPE",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ORDER_YEWU_TYPE",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "ORDER_YEWU_TYPE",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_FIRST_CATE_NAME",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "ITEM_FIRST_CATE_NAME",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_SECOND_CATE_NAME",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "ITEM_SECOND_CATE_NAME",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_THIRD_CATE_NAME",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "ITEM_THIRD_CATE_NAME",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.ITEM_BRAND_NAME",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "ITEM_BRAND_NAME",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.COUNTY_NAME",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "COUNTY_NAME",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.CITY_NAME",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "CITY_NAME",
 "derived": null
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.PROVINCE_NAME",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "PROVINCE_NAME",
 "derived": null
 },
 {
 "name": "DIM.DIM_DAY_DERIVED",
 "table": "DIM.DIM_DAY",
 "column": null,
 "derived": [
 "DIM_DAY_NAME",
 "DIM_DAY_TXDATE"
 ]
 },
 {
 "name": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D.PAY_TYPE",
 "table": "DMT.DMT_KYLIN_PAY_SYT_ORDR_DET_I_D",
 "column": "PAY_TYPE",
 "derived": null
 }
 ],
 "measures": [
 {
 "name": "_COUNT_",
 "function": {
 "expression": "COUNT",
 "parameter": {
 "type": "constant",
 "value": "1",
 "next_parameter": null
 },
 "returntype": "bigint"
 },
 "dependent_measure_ref": null
 },
 {
 "name": "应付金额",
 "function": {
 "expression": "SUM",
 "parameter": {
 "type": "column",
 "value": "AMOUNT",
 "next_parameter": null
 },
 "returntype": "decimal(25,4)"
 },
 "dependent_measure_ref": null
 },
 {
 "name": "商品成功支付金额",
 "function": {
 "expression": "SUM",
 "parameter": {
 "type": "column",
 "value": "USER_ACTUAL_PAY_AMOUNT",
"next_parameter": null
 },
 "returntype": "decimal(25,4)"
 },
 "dependent_measure_ref": null
 },
 {
 "name": "商城APP请求单",
 "function": {
 "expression": "COUNT_DISTINCT",
 "parameter": {
 "type": "column",
 "value": "SERIAL_NUM",
 "next_parameter": null
 },
 "returntype": "hllc(14)"
 },
 "dependent_measure_ref": null
 },
 {
 "name": "请求订单数",
 "function": {
 "expression": "COUNT_DISTINCT",
 "parameter": {
 "type": "column",
 "value": "REQ_OUTBIZNO",
 "next_parameter": null
 },
 "returntype": "hllc(14)"
 },
 "dependent_measure_ref": null
 },
 {
 "name": "支付订单数",
 "function": {
 "expression": "COUNT_DISTINCT",
 "parameter": {
 "type": "column",
 "value": "OUTBIZNO",
 "next_parameter": null
 },
 "returntype": "hllc(14)"
 },
 "dependent_measure_ref": null
 },
 {
 "name": "成功支付订单数",
 "function": {
 "expression": "COUNT_DISTINCT",
 "parameter": {
 "type": "column",
 "value": "SUC_OUTBIZNO",
 "next_parameter": null
 },
 "returntype": "hllc(14)"
 },
 "dependent_measure_ref": null
 },
 {
 "name": "支付单数",
 "function": {
 "expression": "COUNT_DISTINCT",
 "parameter": {
 "type": "column",
 "value": "PAYID",
 "next_parameter": null
 },
 "returntype": "hllc(15)"
 },
 "dependent_measure_ref": null
 },
 {
 "name": "成功支付单数",
 "function": {
 "expression": "COUNT_DISTINCT",
 "parameter": {
 "type": "column",
 "value": "SUC_PAYID",
 "next_parameter": null
 },
 "returntype": "hllc(14)"
 },
 "dependent_measure_ref": null
 },
 {
 "name": "用户数",
 "function": {
 "expression": "COUNT_DISTINCT",
 "parameter": {
 "type": "column",
 "value": "PIN",
 "next_parameter": null
 },
 "returntype": "hllc(14)"
 },
 "dependent_measure_ref": null
 }
 ],
 "dictionaries": [],
 "rowkey": {
 "rowkey_columns": [
 {
 "column": "ETL_DT",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "COUNTY_NAME",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "ITEM_BRAND_NAME",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "ITEM_THIRD_CATE_NAME",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "ITEM_SECOND_CATE_NAME",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "ITEM_FIRST_CATE_NAME",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "CITY_NAME",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "PROVINCE_NAME",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "REQ_ORDER_YEWU_TYPE",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "ORDER_YEWU_TYPE",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "PWDTYPE",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "IS_NEW_CARD",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "REAL_NAME",
 "encoding": "dict",
 "isShardBy": false
 },
 {
 "column": "PAY_TYPE",
 "encoding": "dict",
 "isShardBy": false
 }
 ]
 },
 "hbase_mapping": {
 "column_family": [
 {
 "name": "F1",
 "columns": [
 {
 "qualifier": "M",
 "measure_refs": [
 "_COUNT_",
 "应付金额",
 "商品成功支付金额"
 ]
 }
 ]
 },
 {
 "name": "F2",
 "columns": [
 {
 "qualifier": "M",
 "measure_refs": [
 "商城APP请求单",
 "请求订单数",
 "支付订单数",
 "成功支付订单数",
 "支付单数",
 "成功支付单数",
 "用户数"
 ]
 }
 ]
 }
 ]
 },
 "aggregation_groups": [
 {
 "includes": [
 "ETL_DT",
 "PWDTYPE",
 "REAL_NAME",
 "IS_NEW_CARD",
 "REQ_ORDER_YEWU_TYPE",
 "ORDER_YEWU_TYPE",
 "ITEM_FIRST_CATE_NAME",
 "ITEM_SECOND_CATE_NAME",
 "ITEM_THIRD_CATE_NAME",
 "ITEM_BRAND_NAME",
 "COUNTY_NAME",
 "CITY_NAME",
 "PROVINCE_NAME"
 ],
 "select_rule": {
 "hierarchy_dims": [
 [
 "COUNTY_NAME",
 "CITY_NAME",
 "PROVINCE_NAME"
 ],
 [
 "ITEM_FIRST_CATE_NAME",
 "ITEM_SECOND_CATE_NAME",
 "ITEM_THIRD_CATE_NAME"
 ]
 ],
 "mandatory_dims": [
 "ETL_DT"
 ],
 "joint_dims": []
 }
 }
 ],
 "signature": "Ukf/u2JnSO44TPQWDJFhyQ==",
 "notify_list": [],
 "status_need_notify": [
 "ERROR",
 "DISCARDED",
 "SUCCEED"
 ],
 "partition_date_start": 1478736000000,
 "partition_date_end": 3153600000000,
 "auto_merge_time_ranges": [
 604800000,
 2419200000
 ],
 "retention_range": 0,
 "engine_type": 2,
 "storage_type": 2,
 "override_kylin_properties": {}
}

Reply via email to