/tags/release-0.0.0-rc0/hive/external/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out
text | 313 lines | 305 code | 8 blank | 0 comment | 0 complexity | efaa452beddc46c6b9c00b998b976dd6 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
- PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, C3 INT, c4 INT) STORED AS TEXTFILE
- PREHOOK: type: CREATETABLE
- POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, C3 INT, c4 INT) STORED AS TEXTFILE
- POSTHOOK: type: CREATETABLE
- POSTHOOK: Output: default@dest1
- PREHOOK: query: EXPLAIN EXTENDED
- FROM srcpart src
- INSERT OVERWRITE TABLE dest1
- SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value)
- WHERE src.ds = '2008-04-08'
- GROUP BY substr(src.key,1,1)
- PREHOOK: type: QUERY
- POSTHOOK: query: EXPLAIN EXTENDED
- FROM srcpart src
- INSERT OVERWRITE TABLE dest1
- SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value)
- WHERE src.ds = '2008-04-08'
- GROUP BY substr(src.key,1,1)
- POSTHOOK: type: QUERY
- ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))))
- STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
- STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Alias -> Map Operator Tree:
- src
- TableScan
- alias: src
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate:
- expr: (ds = '2008-04-08')
- type: boolean
- Select Operator
- expressions:
- expr: key
- type: string
- expr: value
- type: string
- outputColumnNames: key, value
- Group By Operator
- aggregations:
- expr: count(DISTINCT substr(value, 5))
- expr: sum(substr(value, 5))
- expr: sum(DISTINCT substr(value, 5))
- expr: count(DISTINCT value)
- bucketGroup: false
- keys:
- expr: substr(key, 1, 1)
- type: string
- expr: substr(value, 5)
- type: string
- expr: value
- type: string
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- expr: _col2
- type: string
- sort order: +++
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
- value expressions:
- expr: _col3
- type: bigint
- expr: _col4
- type: double
- expr: _col5
- type: double
- expr: _col6
- type: bigint
- Needs Tagging: false
- Path -> Alias:
- pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [src]
- pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [src]
- Path -> Partition:
- pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
- Partition
- base file name: hr=11
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- hr 11
- properties:
- bucket_count -1
- columns key,value
- columns.types string:string
- file.inputformat org.apache.hadoop.mapred.TextInputFormat
- file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
- name default.srcpart
- partition_columns ds/hr
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1297328959
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value
- columns.types string:string
- file.inputformat org.apache.hadoop.mapred.TextInputFormat
- file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/srcpart
- name default.srcpart
- partition_columns ds/hr
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1297328959
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
- pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
- Partition
- base file name: hr=12
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- hr 12
- properties:
- bucket_count -1
- columns key,value
- columns.types string:string
- file.inputformat org.apache.hadoop.mapred.TextInputFormat
- file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
- name default.srcpart
- partition_columns ds/hr
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1297328959
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value
- columns.types string:string
- file.inputformat org.apache.hadoop.mapred.TextInputFormat
- file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/srcpart
- name default.srcpart
- partition_columns ds/hr
- serialization.ddl struct srcpart { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1297328959
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
- Reduce Operator Tree:
- Group By Operator
- aggregations:
- expr: count(DISTINCT KEY._col1:0._col0)
- expr: sum(VALUE._col1)
- expr: sum(DISTINCT KEY._col1:1._col0)
- expr: count(DISTINCT KEY._col1:2._col0)
- bucketGroup: false
- keys:
- expr: KEY._col0
- type: string
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col1
- type: bigint
- expr: concat(_col0, _col2)
- type: string
- expr: _col3
- type: double
- expr: _col4
- type: bigint
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: UDFToInteger(_col1)
- type: int
- expr: _col2
- type: string
- expr: UDFToInteger(_col3)
- type: int
- expr: UDFToInteger(_col4)
- type: int
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- File Output Operator
- compressed: false
- GlobalTableId: 1
- directory: pfile:/data/users/sdong/www/open-source-hive1/build/ql/scratchdir/hive_2011-02-10_01-54-56_939_392063944204159853/-ext-10000
- NumFilesPerFileSink: 1
- Stats Publishing Key Prefix: pfile:/data/users/sdong/www/open-source-hive1/build/ql/scratchdir/hive_2011-02-10_01-54-56_939_392063944204159853/-ext-10000/
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,c1,c2,c3,c4
- columns.types string:int:string:int:int
- file.inputformat org.apache.hadoop.mapred.TextInputFormat
- file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/dest1
- name default.dest1
- serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1297331696
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Stage: Stage-0
- Move Operator
- tables:
- replace: true
- source: pfile:/data/users/sdong/www/open-source-hive1/build/ql/scratchdir/hive_2011-02-10_01-54-56_939_392063944204159853/-ext-10000
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,c1,c2,c3,c4
- columns.types string:int:string:int:int
- file.inputformat org.apache.hadoop.mapred.TextInputFormat
- file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/dest1
- name default.dest1
- serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1297331696
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
- tmp directory: pfile:/data/users/sdong/www/open-source-hive1/build/ql/scratchdir/hive_2011-02-10_01-54-56_939_392063944204159853/-ext-10001
- Stage: Stage-2
- Stats-Aggr Operator
- Stats Aggregation Key Prefix: pfile:/data/users/sdong/www/open-source-hive1/build/ql/scratchdir/hive_2011-02-10_01-54-56_939_392063944204159853/-ext-10000/
- PREHOOK: query: FROM srcpart src
- INSERT OVERWRITE TABLE dest1
- SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value)
- WHERE src.ds = '2008-04-08'
- GROUP BY substr(src.key,1,1)
- PREHOOK: type: QUERY
- PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
- PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
- PREHOOK: Output: default@dest1
- POSTHOOK: query: FROM srcpart src
- INSERT OVERWRITE TABLE dest1
- SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value)
- WHERE src.ds = '2008-04-08'
- GROUP BY substr(src.key,1,1)
- POSTHOOK: type: QUERY
- POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
- POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
- POSTHOOK: Output: default@dest1
- POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ]
- POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ]
- POSTHOOK: Lineage: dest1.c3 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ]
- POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ]
- POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ]
- PREHOOK: query: SELECT dest1.* FROM dest1
- PREHOOK: type: QUERY
- PREHOOK: Input: default@dest1
- PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-55-03_112_7822131255872092269/-mr-10000
- POSTHOOK: query: SELECT dest1.* FROM dest1
- POSTHOOK: type: QUERY
- POSTHOOK: Input: default@dest1
- POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-55-03_112_7822131255872092269/-mr-10000
- POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ]
- POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ]
- POSTHOOK: Lineage: dest1.c3 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ]
- POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ]
- POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ]
- 0 1 00.0 0 1
- 1 71 132828.0 10044 71
- 2 69 251142.0 15780 69
- 3 62 364008.0 20119 62
- 4 74 4105526.0 30965 74
- 5 6 5794.0 278 6
- 6 5 6796.0 331 5
- 7 6 71470.0 447 6
- 8 8 81524.0 595 8
- 9 7 92094.0 577 7