PageRenderTime 50ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.2.0-rc0/hive/external/ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out

#
text | 675 lines | 655 code | 20 blank | 0 comment | 0 complexity | 005fccf2150c64af1f21fae8ddd14650 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. PREHOOK: query: create table url_t (key string, fullurl string)
  2. PREHOOK: type: CREATETABLE
  3. POSTHOOK: query: create table url_t (key string, fullurl string)
  4. POSTHOOK: type: CREATETABLE
  5. POSTHOOK: Output: default@url_t
  6. PREHOOK: query: insert overwrite table url_t
  7. select * from (
  8. select '1', 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' from src limit 1
  9. union all
  10. select '2', 'https://www.socs.uts.edu.au:80/MosaicDocs-old/url-primer.html?k1=tps#chapter1' from src limit 1
  11. union all
  12. select '3', 'ftp://sites.google.com/a/example.com/site/page' from src limit 1
  13. union all
  14. select '4', cast(null as string) from src limit 1
  15. union all
  16. select '5', 'htttp://' from src limit 1
  17. union all
  18. select '6', '[invalid url string]' from src limit 1
  19. ) s
  20. PREHOOK: type: QUERY
  21. PREHOOK: Input: default@src
  22. PREHOOK: Output: default@url_t
  23. POSTHOOK: query: insert overwrite table url_t
  24. select * from (
  25. select '1', 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' from src limit 1
  26. union all
  27. select '2', 'https://www.socs.uts.edu.au:80/MosaicDocs-old/url-primer.html?k1=tps#chapter1' from src limit 1
  28. union all
  29. select '3', 'ftp://sites.google.com/a/example.com/site/page' from src limit 1
  30. union all
  31. select '4', cast(null as string) from src limit 1
  32. union all
  33. select '5', 'htttp://' from src limit 1
  34. union all
  35. select '6', '[invalid url string]' from src limit 1
  36. ) s
  37. POSTHOOK: type: QUERY
  38. POSTHOOK: Input: default@src
  39. POSTHOOK: Output: default@url_t
  40. POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
  41. POSTHOOK: Lineage: url_t.key EXPRESSION []
  42. PREHOOK: query: describe function parse_url_tuple
  43. PREHOOK: type: DESCFUNCTION
  44. POSTHOOK: query: describe function parse_url_tuple
  45. POSTHOOK: type: DESCFUNCTION
  46. POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
  47. POSTHOOK: Lineage: url_t.key EXPRESSION []
  48. parse_url_tuple(url, partname1, partname2, ..., partnameN) - extracts N (N>=1) parts from a URL.
  49. It takes a URL and one or multiple partnames, and returns a tuple. All the input parameters and output column types are string.
  50. PREHOOK: query: describe function extended parse_url_tuple
  51. PREHOOK: type: DESCFUNCTION
  52. POSTHOOK: query: describe function extended parse_url_tuple
  53. POSTHOOK: type: DESCFUNCTION
  54. POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
  55. POSTHOOK: Lineage: url_t.key EXPRESSION []
  56. parse_url_tuple(url, partname1, partname2, ..., partnameN) - extracts N (N>=1) parts from a URL.
  57. It takes a URL and one or multiple partnames, and returns a tuple. All the input parameters and output column types are string.
  58. Partname: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, USERINFO, QUERY:<KEY_NAME>
  59. Note: Partnames are case-sensitive, and should not contain unnecessary white spaces.
  60. Example:
  61. > SELECT b.* FROM src LATERAL VIEW parse_url_tuple(fullurl, 'HOST', 'PATH', 'QUERY', 'QUERY:id') b as host, path, query, query_id LIMIT 1;
  62. > SELECT parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from src a;
  63. PREHOOK: query: explain
  64. select a.key, b.* from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi, au, us, qk1 order by a.key
  65. PREHOOK: type: QUERY
  66. POSTHOOK: query: explain
  67. select a.key, b.* from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi, au, us, qk1 order by a.key
  68. POSTHOOK: type: QUERY
  69. POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
  70. POSTHOOK: Lineage: url_t.key EXPRESSION []
  71. ABSTRACT SYNTAX TREE:
  72. (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple (. (TOK_TABLE_OR_COL a) fullurl) 'HOST' 'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY' 'USERINFO' 'QUERY:k1') ho pa qu re pr fi au us qk1 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME url_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
  73. STAGE DEPENDENCIES:
  74. Stage-1 is a root stage
  75. Stage-0 is a root stage
  76. STAGE PLANS:
  77. Stage: Stage-1
  78. Map Reduce
  79. Alias -> Map Operator Tree:
  80. a
  81. TableScan
  82. alias: a
  83. Lateral View Forward
  84. Select Operator
  85. SELECT * : (no compute)
  86. Lateral View Join Operator
  87. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
  88. Select Operator
  89. expressions:
  90. expr: _col0
  91. type: string
  92. expr: _col2
  93. type: string
  94. expr: _col3
  95. type: string
  96. expr: _col4
  97. type: string
  98. expr: _col5
  99. type: string
  100. expr: _col6
  101. type: string
  102. expr: _col7
  103. type: string
  104. expr: _col8
  105. type: string
  106. expr: _col9
  107. type: string
  108. expr: _col10
  109. type: string
  110. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
  111. Reduce Output Operator
  112. key expressions:
  113. expr: _col0
  114. type: string
  115. sort order: +
  116. tag: -1
  117. value expressions:
  118. expr: _col0
  119. type: string
  120. expr: _col1
  121. type: string
  122. expr: _col2
  123. type: string
  124. expr: _col3
  125. type: string
  126. expr: _col4
  127. type: string
  128. expr: _col5
  129. type: string
  130. expr: _col6
  131. type: string
  132. expr: _col7
  133. type: string
  134. expr: _col8
  135. type: string
  136. expr: _col9
  137. type: string
  138. Select Operator
  139. expressions:
  140. expr: fullurl
  141. type: string
  142. expr: 'HOST'
  143. type: string
  144. expr: 'PATH'
  145. type: string
  146. expr: 'QUERY'
  147. type: string
  148. expr: 'REF'
  149. type: string
  150. expr: 'PROTOCOL'
  151. type: string
  152. expr: 'FILE'
  153. type: string
  154. expr: 'AUTHORITY'
  155. type: string
  156. expr: 'USERINFO'
  157. type: string
  158. expr: 'QUERY:k1'
  159. type: string
  160. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
  161. UDTF Operator
  162. function name: parse_url_tuple
  163. Lateral View Join Operator
  164. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
  165. Select Operator
  166. expressions:
  167. expr: _col0
  168. type: string
  169. expr: _col2
  170. type: string
  171. expr: _col3
  172. type: string
  173. expr: _col4
  174. type: string
  175. expr: _col5
  176. type: string
  177. expr: _col6
  178. type: string
  179. expr: _col7
  180. type: string
  181. expr: _col8
  182. type: string
  183. expr: _col9
  184. type: string
  185. expr: _col10
  186. type: string
  187. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
  188. Reduce Output Operator
  189. key expressions:
  190. expr: _col0
  191. type: string
  192. sort order: +
  193. tag: -1
  194. value expressions:
  195. expr: _col0
  196. type: string
  197. expr: _col1
  198. type: string
  199. expr: _col2
  200. type: string
  201. expr: _col3
  202. type: string
  203. expr: _col4
  204. type: string
  205. expr: _col5
  206. type: string
  207. expr: _col6
  208. type: string
  209. expr: _col7
  210. type: string
  211. expr: _col8
  212. type: string
  213. expr: _col9
  214. type: string
  215. Reduce Operator Tree:
  216. Extract
  217. File Output Operator
  218. compressed: false
  219. GlobalTableId: 0
  220. table:
  221. input format: org.apache.hadoop.mapred.TextInputFormat
  222. output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
  223. Stage: Stage-0
  224. Fetch Operator
  225. limit: -1
  226. PREHOOK: query: select a.key, b.* from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi, au, us, qk1 order by a.key
  227. PREHOOK: type: QUERY
  228. PREHOOK: Input: default@url_t
  229. PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-48-08_686_2438339126682195604/-mr-10000
  230. POSTHOOK: query: select a.key, b.* from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi, au, us, qk1 order by a.key
  231. POSTHOOK: type: QUERY
  232. POSTHOOK: Input: default@url_t
  233. POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-48-08_686_2438339126682195604/-mr-10000
  234. POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
  235. POSTHOOK: Lineage: url_t.key EXPRESSION []
  236. 1 facebook.com /path1/p.php k1=v1&k2=v2 Ref1 http /path1/p.php?k1=v1&k2=v2 facebook.com NULL v1
  237. 2 www.socs.uts.edu.au /MosaicDocs-old/url-primer.html k1=tps chapter1 https /MosaicDocs-old/url-primer.html?k1=tps www.socs.uts.edu.au:80 NULL tps
  238. 3 sites.google.com /a/example.com/site/page NULL NULL ftp /a/example.com/site/page sites.google.com NULL NULL
  239. 4 NULL NULL NULL NULL NULL NULL NULL NULL NULL
  240. 5 NULL NULL NULL NULL NULL NULL NULL NULL NULL
  241. 6 NULL NULL NULL NULL NULL NULL NULL NULL NULL
  242. PREHOOK: query: explain
  243. select parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from url_t a order by ho, pa, qu
  244. PREHOOK: type: QUERY
  245. POSTHOOK: query: explain
  246. select parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from url_t a order by ho, pa, qu
  247. POSTHOOK: type: QUERY
  248. POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
  249. POSTHOOK: Lineage: url_t.key EXPRESSION []
  250. ABSTRACT SYNTAX TREE:
  251. (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME url_t) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple (. (TOK_TABLE_OR_COL a) fullurl) 'HOST' 'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY' 'USERINFO' 'QUERY:k1') ho pa qu re pr fi au us qk1)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ho)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL pa)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL qu)))))
  252. STAGE DEPENDENCIES:
  253. Stage-1 is a root stage
  254. Stage-0 is a root stage
  255. STAGE PLANS:
  256. Stage: Stage-1
  257. Map Reduce
  258. Alias -> Map Operator Tree:
  259. a
  260. TableScan
  261. alias: a
  262. Select Operator
  263. expressions:
  264. expr: fullurl
  265. type: string
  266. expr: 'HOST'
  267. type: string
  268. expr: 'PATH'
  269. type: string
  270. expr: 'QUERY'
  271. type: string
  272. expr: 'REF'
  273. type: string
  274. expr: 'PROTOCOL'
  275. type: string
  276. expr: 'FILE'
  277. type: string
  278. expr: 'AUTHORITY'
  279. type: string
  280. expr: 'USERINFO'
  281. type: string
  282. expr: 'QUERY:k1'
  283. type: string
  284. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
  285. UDTF Operator
  286. function name: parse_url_tuple
  287. Reduce Output Operator
  288. key expressions:
  289. expr: c0
  290. type: string
  291. expr: c1
  292. type: string
  293. expr: c2
  294. type: string
  295. sort order: +++
  296. tag: -1
  297. value expressions:
  298. expr: c0
  299. type: string
  300. expr: c1
  301. type: string
  302. expr: c2
  303. type: string
  304. expr: c3
  305. type: string
  306. expr: c4
  307. type: string
  308. expr: c5
  309. type: string
  310. expr: c6
  311. type: string
  312. expr: c7
  313. type: string
  314. expr: c8
  315. type: string
  316. Reduce Operator Tree:
  317. Extract
  318. File Output Operator
  319. compressed: false
  320. GlobalTableId: 0
  321. table:
  322. input format: org.apache.hadoop.mapred.TextInputFormat
  323. output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
  324. Stage: Stage-0
  325. Fetch Operator
  326. limit: -1
  327. PREHOOK: query: select parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from url_t a order by ho, pa, qu
  328. PREHOOK: type: QUERY
  329. PREHOOK: Input: default@url_t
  330. PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-48-12_281_5919341324759760546/-mr-10000
  331. POSTHOOK: query: select parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from url_t a order by ho, pa, qu
  332. POSTHOOK: type: QUERY
  333. POSTHOOK: Input: default@url_t
  334. POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-48-12_281_5919341324759760546/-mr-10000
  335. POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
  336. POSTHOOK: Lineage: url_t.key EXPRESSION []
  337. NULL NULL NULL NULL NULL NULL NULL NULL NULL
  338. NULL NULL NULL NULL NULL NULL NULL NULL NULL
  339. NULL NULL NULL NULL NULL NULL NULL NULL NULL
  340. facebook.com /path1/p.php k1=v1&k2=v2 Ref1 http /path1/p.php?k1=v1&k2=v2 facebook.com NULL v1
  341. sites.google.com /a/example.com/site/page NULL NULL ftp /a/example.com/site/page sites.google.com NULL NULL
  342. www.socs.uts.edu.au /MosaicDocs-old/url-primer.html k1=tps chapter1 https /MosaicDocs-old/url-primer.html?k1=tps www.socs.uts.edu.au:80 NULL tps
  343. PREHOOK: query: -- should return null for 'host', 'query', 'QUERY:nonExistCol'
  344. explain
  345. select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1', 'host', 'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi, au, us, qk1, err1, err2, err3 order by a.key
  346. PREHOOK: type: QUERY
  347. POSTHOOK: query: -- should return null for 'host', 'query', 'QUERY:nonExistCol'
  348. explain
  349. select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1', 'host', 'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi, au, us, qk1, err1, err2, err3 order by a.key
  350. POSTHOOK: type: QUERY
  351. POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
  352. POSTHOOK: Lineage: url_t.key EXPRESSION []
  353. ABSTRACT SYNTAX TREE:
  354. (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple (. (TOK_TABLE_OR_COL a) fullurl) 'HOST' 'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY' 'USERINFO' 'QUERY:k1' 'host' 'query' 'QUERY:nonExistCol') ho pa qu re pr fi au us qk1 err1 err2 err3 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME url_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) ho)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) qu)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) qk1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) err1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) err2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) err3))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
  355. STAGE DEPENDENCIES:
  356. Stage-1 is a root stage
  357. Stage-0 is a root stage
  358. STAGE PLANS:
  359. Stage: Stage-1
  360. Map Reduce
  361. Alias -> Map Operator Tree:
  362. a
  363. TableScan
  364. alias: a
  365. Lateral View Forward
  366. Select Operator
  367. SELECT * : (no compute)
  368. Lateral View Join Operator
  369. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
  370. Select Operator
  371. expressions:
  372. expr: _col0
  373. type: string
  374. expr: _col2
  375. type: string
  376. expr: _col4
  377. type: string
  378. expr: _col10
  379. type: string
  380. expr: _col11
  381. type: string
  382. expr: _col12
  383. type: string
  384. expr: _col13
  385. type: string
  386. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
  387. Reduce Output Operator
  388. key expressions:
  389. expr: _col0
  390. type: string
  391. sort order: +
  392. tag: -1
  393. value expressions:
  394. expr: _col0
  395. type: string
  396. expr: _col1
  397. type: string
  398. expr: _col2
  399. type: string
  400. expr: _col3
  401. type: string
  402. expr: _col4
  403. type: string
  404. expr: _col5
  405. type: string
  406. expr: _col6
  407. type: string
  408. Select Operator
  409. expressions:
  410. expr: fullurl
  411. type: string
  412. expr: 'HOST'
  413. type: string
  414. expr: 'PATH'
  415. type: string
  416. expr: 'QUERY'
  417. type: string
  418. expr: 'REF'
  419. type: string
  420. expr: 'PROTOCOL'
  421. type: string
  422. expr: 'FILE'
  423. type: string
  424. expr: 'AUTHORITY'
  425. type: string
  426. expr: 'USERINFO'
  427. type: string
  428. expr: 'QUERY:k1'
  429. type: string
  430. expr: 'host'
  431. type: string
  432. expr: 'query'
  433. type: string
  434. expr: 'QUERY:nonExistCol'
  435. type: string
  436. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
  437. UDTF Operator
  438. function name: parse_url_tuple
  439. Lateral View Join Operator
  440. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
  441. Select Operator
  442. expressions:
  443. expr: _col0
  444. type: string
  445. expr: _col2
  446. type: string
  447. expr: _col4
  448. type: string
  449. expr: _col10
  450. type: string
  451. expr: _col11
  452. type: string
  453. expr: _col12
  454. type: string
  455. expr: _col13
  456. type: string
  457. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
  458. Reduce Output Operator
  459. key expressions:
  460. expr: _col0
  461. type: string
  462. sort order: +
  463. tag: -1
  464. value expressions:
  465. expr: _col0
  466. type: string
  467. expr: _col1
  468. type: string
  469. expr: _col2
  470. type: string
  471. expr: _col3
  472. type: string
  473. expr: _col4
  474. type: string
  475. expr: _col5
  476. type: string
  477. expr: _col6
  478. type: string
  479. Reduce Operator Tree:
  480. Extract
  481. File Output Operator
  482. compressed: false
  483. GlobalTableId: 0
  484. table:
  485. input format: org.apache.hadoop.mapred.TextInputFormat
  486. output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
  487. Stage: Stage-0
  488. Fetch Operator
  489. limit: -1
  490. PREHOOK: query: select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1', 'host', 'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi, au, us, qk1, err1, err2, err3 order by a.key
  491. PREHOOK: type: QUERY
  492. PREHOOK: Input: default@url_t
  493. PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-48-16_477_4910097906781584970/-mr-10000
  494. POSTHOOK: query: select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1', 'host', 'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi, au, us, qk1, err1, err2, err3 order by a.key
  495. POSTHOOK: type: QUERY
  496. POSTHOOK: Input: default@url_t
  497. POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-48-16_477_4910097906781584970/-mr-10000
  498. POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
  499. POSTHOOK: Lineage: url_t.key EXPRESSION []
  500. 1 facebook.com k1=v1&k2=v2 v1 NULL NULL NULL
  501. 2 www.socs.uts.edu.au k1=tps tps NULL NULL NULL
  502. 3 sites.google.com NULL NULL NULL NULL NULL
  503. 4 NULL NULL NULL NULL NULL NULL
  504. 5 NULL NULL NULL NULL NULL NULL
  505. 6 NULL NULL NULL NULL NULL NULL
  506. PREHOOK: query: explain
  507. select ho, count(*) from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi, au, us, qk1 where qk1 is not null group by ho
  508. PREHOOK: type: QUERY
  509. POSTHOOK: query: explain
  510. select ho, count(*) from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi, au, us, qk1 where qk1 is not null group by ho
  511. POSTHOOK: type: QUERY
  512. POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
  513. POSTHOOK: Lineage: url_t.key EXPRESSION []
  514. ABSTRACT SYNTAX TREE:
  515. (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple (. (TOK_TABLE_OR_COL a) fullurl) 'HOST' 'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY' 'USERINFO' 'QUERY:k1') ho pa qu re pr fi au us qk1 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME url_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ho)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL qk1))) (TOK_GROUPBY (TOK_TABLE_OR_COL ho))))
  516. STAGE DEPENDENCIES:
  517. Stage-1 is a root stage
  518. Stage-0 is a root stage
  519. STAGE PLANS:
  520. Stage: Stage-1
  521. Map Reduce
  522. Alias -> Map Operator Tree:
  523. a
  524. TableScan
  525. alias: a
  526. Lateral View Forward
  527. Select Operator
  528. SELECT * : (no compute)
  529. Lateral View Join Operator
  530. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
  531. Filter Operator
  532. predicate:
  533. expr: _col10 is not null
  534. type: boolean
  535. Select Operator
  536. expressions:
  537. expr: _col2
  538. type: string
  539. outputColumnNames: _col2
  540. Group By Operator
  541. aggregations:
  542. expr: count()
  543. bucketGroup: false
  544. keys:
  545. expr: _col2
  546. type: string
  547. mode: hash
  548. outputColumnNames: _col0, _col1
  549. Reduce Output Operator
  550. key expressions:
  551. expr: _col0
  552. type: string
  553. sort order: +
  554. Map-reduce partition columns:
  555. expr: _col0
  556. type: string
  557. tag: -1
  558. value expressions:
  559. expr: _col1
  560. type: bigint
  561. Select Operator
  562. expressions:
  563. expr: fullurl
  564. type: string
  565. expr: 'HOST'
  566. type: string
  567. expr: 'PATH'
  568. type: string
  569. expr: 'QUERY'
  570. type: string
  571. expr: 'REF'
  572. type: string
  573. expr: 'PROTOCOL'
  574. type: string
  575. expr: 'FILE'
  576. type: string
  577. expr: 'AUTHORITY'
  578. type: string
  579. expr: 'USERINFO'
  580. type: string
  581. expr: 'QUERY:k1'
  582. type: string
  583. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
  584. UDTF Operator
  585. function name: parse_url_tuple
  586. Lateral View Join Operator
  587. outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
  588. Filter Operator
  589. predicate:
  590. expr: _col10 is not null
  591. type: boolean
  592. Select Operator
  593. expressions:
  594. expr: _col2
  595. type: string
  596. outputColumnNames: _col2
  597. Group By Operator
  598. aggregations:
  599. expr: count()
  600. bucketGroup: false
  601. keys:
  602. expr: _col2
  603. type: string
  604. mode: hash
  605. outputColumnNames: _col0, _col1
  606. Reduce Output Operator
  607. key expressions:
  608. expr: _col0
  609. type: string
  610. sort order: +
  611. Map-reduce partition columns:
  612. expr: _col0
  613. type: string
  614. tag: -1
  615. value expressions:
  616. expr: _col1
  617. type: bigint
  618. Reduce Operator Tree:
  619. Group By Operator
  620. aggregations:
  621. expr: count(VALUE._col0)
  622. bucketGroup: false
  623. keys:
  624. expr: KEY._col0
  625. type: string
  626. mode: mergepartial
  627. outputColumnNames: _col0, _col1
  628. Select Operator
  629. expressions:
  630. expr: _col0
  631. type: string
  632. expr: _col1
  633. type: bigint
  634. outputColumnNames: _col0, _col1
  635. File Output Operator
  636. compressed: false
  637. GlobalTableId: 0
  638. table:
  639. input format: org.apache.hadoop.mapred.TextInputFormat
  640. output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
  641. Stage: Stage-0
  642. Fetch Operator
  643. limit: -1
  644. PREHOOK: query: select ho, count(*) from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi, au, us, qk1 where qk1 is not null group by ho
  645. PREHOOK: type: QUERY
  646. PREHOOK: Input: default@url_t
  647. PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-48-20_142_8723043944221910244/-mr-10000
  648. POSTHOOK: query: select ho, count(*) from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi, au, us, qk1 where qk1 is not null group by ho
  649. POSTHOOK: type: QUERY
  650. POSTHOOK: Input: default@url_t
  651. POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-48-20_142_8723043944221910244/-mr-10000
  652. POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
  653. POSTHOOK: Lineage: url_t.key EXPRESSION []
  654. facebook.com 1
  655. www.socs.uts.edu.au 1