PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java

#
Java | 419 lines | 284 code | 53 blank | 82 comment | 23 complexity | a7072f90cd4866f86583914fe1047e36 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.parse;
  19. import java.util.ArrayList;
  20. import java.util.Collection;
  21. import java.util.HashMap;
  22. import org.antlr.runtime.ANTLRStringStream;
  23. import org.antlr.runtime.BitSet;
  24. import org.antlr.runtime.CharStream;
  25. import org.antlr.runtime.IntStream;
  26. import org.antlr.runtime.MismatchedTokenException;
  27. import org.antlr.runtime.NoViableAltException;
  28. import org.antlr.runtime.RecognitionException;
  29. import org.antlr.runtime.Token;
  30. import org.antlr.runtime.TokenRewriteStream;
  31. import org.antlr.runtime.TokenStream;
  32. import org.antlr.runtime.tree.CommonTreeAdaptor;
  33. import org.antlr.runtime.tree.TreeAdaptor;
  34. import org.apache.commons.logging.Log;
  35. import org.apache.commons.logging.LogFactory;
  36. import org.apache.hadoop.hive.ql.Context;
  37. /**
  38. * ParseDriver.
  39. *
  40. */
  41. public class ParseDriver {
  42. private static final Log LOG = LogFactory.getLog("hive.ql.parse.ParseDriver");
  43. private static HashMap<String, String> xlateMap;
  44. static {
  45. xlateMap = new HashMap<String, String>();
  46. // Keywords
  47. xlateMap.put("KW_TRUE", "TRUE");
  48. xlateMap.put("KW_FALSE", "FALSE");
  49. xlateMap.put("KW_ALL", "ALL");
  50. xlateMap.put("KW_AND", "AND");
  51. xlateMap.put("KW_OR", "OR");
  52. xlateMap.put("KW_NOT", "NOT");
  53. xlateMap.put("KW_LIKE", "LIKE");
  54. xlateMap.put("KW_ASC", "ASC");
  55. xlateMap.put("KW_DESC", "DESC");
  56. xlateMap.put("KW_ORDER", "ORDER");
  57. xlateMap.put("KW_BY", "BY");
  58. xlateMap.put("KW_GROUP", "GROUP");
  59. xlateMap.put("KW_WHERE", "WHERE");
  60. xlateMap.put("KW_FROM", "FROM");
  61. xlateMap.put("KW_AS", "AS");
  62. xlateMap.put("KW_SELECT", "SELECT");
  63. xlateMap.put("KW_DISTINCT", "DISTINCT");
  64. xlateMap.put("KW_INSERT", "INSERT");
  65. xlateMap.put("KW_OVERWRITE", "OVERWRITE");
  66. xlateMap.put("KW_OUTER", "OUTER");
  67. xlateMap.put("KW_JOIN", "JOIN");
  68. xlateMap.put("KW_LEFT", "LEFT");
  69. xlateMap.put("KW_RIGHT", "RIGHT");
  70. xlateMap.put("KW_FULL", "FULL");
  71. xlateMap.put("KW_ON", "ON");
  72. xlateMap.put("KW_PARTITION", "PARTITION");
  73. xlateMap.put("KW_PARTITIONS", "PARTITIONS");
  74. xlateMap.put("KW_TABLE", "TABLE");
  75. xlateMap.put("KW_TABLES", "TABLES");
  76. xlateMap.put("KW_SHOW", "SHOW");
  77. xlateMap.put("KW_MSCK", "MSCK");
  78. xlateMap.put("KW_DIRECTORY", "DIRECTORY");
  79. xlateMap.put("KW_LOCAL", "LOCAL");
  80. xlateMap.put("KW_TRANSFORM", "TRANSFORM");
  81. xlateMap.put("KW_USING", "USING");
  82. xlateMap.put("KW_CLUSTER", "CLUSTER");
  83. xlateMap.put("KW_DISTRIBUTE", "DISTRIBUTE");
  84. xlateMap.put("KW_SORT", "SORT");
  85. xlateMap.put("KW_UNION", "UNION");
  86. xlateMap.put("KW_LOAD", "LOAD");
  87. xlateMap.put("KW_DATA", "DATA");
  88. xlateMap.put("KW_INPATH", "INPATH");
  89. xlateMap.put("KW_IS", "IS");
  90. xlateMap.put("KW_NULL", "NULL");
  91. xlateMap.put("KW_CREATE", "CREATE");
  92. xlateMap.put("KW_EXTERNAL", "EXTERNAL");
  93. xlateMap.put("KW_ALTER", "ALTER");
  94. xlateMap.put("KW_DESCRIBE", "DESCRIBE");
  95. xlateMap.put("KW_DROP", "DROP");
  96. xlateMap.put("KW_REANME", "REANME");
  97. xlateMap.put("KW_TO", "TO");
  98. xlateMap.put("KW_COMMENT", "COMMENT");
  99. xlateMap.put("KW_BOOLEAN", "BOOLEAN");
  100. xlateMap.put("KW_TINYINT", "TINYINT");
  101. xlateMap.put("KW_SMALLINT", "SMALLINT");
  102. xlateMap.put("KW_INT", "INT");
  103. xlateMap.put("KW_BIGINT", "BIGINT");
  104. xlateMap.put("KW_FLOAT", "FLOAT");
  105. xlateMap.put("KW_DOUBLE", "DOUBLE");
  106. xlateMap.put("KW_DATE", "DATE");
  107. xlateMap.put("KW_DATETIME", "DATETIME");
  108. xlateMap.put("KW_TIMESTAMP", "TIMESTAMP");
  109. xlateMap.put("KW_STRING", "STRING");
  110. xlateMap.put("KW_ARRAY", "ARRAY");
  111. xlateMap.put("KW_MAP", "MAP");
  112. xlateMap.put("KW_REDUCE", "REDUCE");
  113. xlateMap.put("KW_PARTITIONED", "PARTITIONED");
  114. xlateMap.put("KW_CLUSTERED", "CLUSTERED");
  115. xlateMap.put("KW_SORTED", "SORTED");
  116. xlateMap.put("KW_INTO", "INTO");
  117. xlateMap.put("KW_BUCKETS", "BUCKETS");
  118. xlateMap.put("KW_ROW", "ROW");
  119. xlateMap.put("KW_FORMAT", "FORMAT");
  120. xlateMap.put("KW_DELIMITED", "DELIMITED");
  121. xlateMap.put("KW_FIELDS", "FIELDS");
  122. xlateMap.put("KW_TERMINATED", "TERMINATED");
  123. xlateMap.put("KW_COLLECTION", "COLLECTION");
  124. xlateMap.put("KW_ITEMS", "ITEMS");
  125. xlateMap.put("KW_KEYS", "KEYS");
  126. xlateMap.put("KW_KEY_TYPE", "$KEY$");
  127. xlateMap.put("KW_LINES", "LINES");
  128. xlateMap.put("KW_STORED", "STORED");
  129. xlateMap.put("KW_SEQUENCEFILE", "SEQUENCEFILE");
  130. xlateMap.put("KW_TEXTFILE", "TEXTFILE");
  131. xlateMap.put("KW_INPUTFORMAT", "INPUTFORMAT");
  132. xlateMap.put("KW_OUTPUTFORMAT", "OUTPUTFORMAT");
  133. xlateMap.put("KW_LOCATION", "LOCATION");
  134. xlateMap.put("KW_TABLESAMPLE", "TABLESAMPLE");
  135. xlateMap.put("KW_BUCKET", "BUCKET");
  136. xlateMap.put("KW_OUT", "OUT");
  137. xlateMap.put("KW_OF", "OF");
  138. xlateMap.put("KW_CAST", "CAST");
  139. xlateMap.put("KW_ADD", "ADD");
  140. xlateMap.put("KW_REPLACE", "REPLACE");
  141. xlateMap.put("KW_COLUMNS", "COLUMNS");
  142. xlateMap.put("KW_RLIKE", "RLIKE");
  143. xlateMap.put("KW_REGEXP", "REGEXP");
  144. xlateMap.put("KW_TEMPORARY", "TEMPORARY");
  145. xlateMap.put("KW_FUNCTION", "FUNCTION");
  146. xlateMap.put("KW_EXPLAIN", "EXPLAIN");
  147. xlateMap.put("KW_EXTENDED", "EXTENDED");
  148. xlateMap.put("KW_SERDE", "SERDE");
  149. xlateMap.put("KW_WITH", "WITH");
  150. xlateMap.put("KW_SERDEPROPERTIES", "SERDEPROPERTIES");
  151. xlateMap.put("KW_LIMIT", "LIMIT");
  152. xlateMap.put("KW_SET", "SET");
  153. xlateMap.put("KW_PROPERTIES", "TBLPROPERTIES");
  154. xlateMap.put("KW_VALUE_TYPE", "$VALUE$");
  155. xlateMap.put("KW_ELEM_TYPE", "$ELEM$");
  156. // Operators
  157. xlateMap.put("DOT", ".");
  158. xlateMap.put("COLON", ":");
  159. xlateMap.put("COMMA", ",");
  160. xlateMap.put("SEMICOLON", ");");
  161. xlateMap.put("LPAREN", "(");
  162. xlateMap.put("RPAREN", ")");
  163. xlateMap.put("LSQUARE", "[");
  164. xlateMap.put("RSQUARE", "]");
  165. xlateMap.put("EQUAL", "=");
  166. xlateMap.put("NOTEQUAL", "<>");
  167. xlateMap.put("LESSTHANOREQUALTO", "<=");
  168. xlateMap.put("LESSTHAN", "<");
  169. xlateMap.put("GREATERTHANOREQUALTO", ">=");
  170. xlateMap.put("GREATERTHAN", ">");
  171. xlateMap.put("DIVIDE", "/");
  172. xlateMap.put("PLUS", "+");
  173. xlateMap.put("MINUS", "-");
  174. xlateMap.put("STAR", "*");
  175. xlateMap.put("MOD", "%");
  176. xlateMap.put("AMPERSAND", "&");
  177. xlateMap.put("TILDE", "~");
  178. xlateMap.put("BITWISEOR", "|");
  179. xlateMap.put("BITWISEXOR", "^");
  180. }
  181. public static Collection<String> getKeywords() {
  182. return xlateMap.values();
  183. }
  184. private static String xlate(String name) {
  185. String ret = xlateMap.get(name);
  186. if (ret == null) {
  187. ret = name;
  188. }
  189. return ret;
  190. }
  191. /**
  192. * ANTLRNoCaseStringStream.
  193. *
  194. */
  195. //This class provides and implementation for a case insensitive token checker
  196. //for the lexical analysis part of antlr. By converting the token stream into
  197. //upper case at the time when lexical rules are checked, this class ensures that the
  198. //lexical rules need to just match the token with upper case letters as opposed to
  199. //combination of upper case and lower case characteres. This is purely used for matching lexical
  200. //rules. The actual token text is stored in the same way as the user input without
  201. //actually converting it into an upper case. The token values are generated by the consume()
  202. //function of the super class ANTLRStringStream. The LA() function is the lookahead funtion
  203. //and is purely used for matching lexical rules. This also means that the grammar will only
  204. //accept capitalized tokens in case it is run from other tools like antlrworks which
  205. //do not have the ANTLRNoCaseStringStream implementation.
  206. public class ANTLRNoCaseStringStream extends ANTLRStringStream {
  207. public ANTLRNoCaseStringStream(String input) {
  208. super(input);
  209. }
  210. public int LA(int i) {
  211. int returnChar = super.LA(i);
  212. if (returnChar == CharStream.EOF) {
  213. return returnChar;
  214. } else if (returnChar == 0) {
  215. return returnChar;
  216. }
  217. return Character.toUpperCase((char) returnChar);
  218. }
  219. }
  220. /**
  221. * HiveLexerX.
  222. *
  223. */
  224. public class HiveLexerX extends HiveLexer {
  225. private final ArrayList<ParseError> errors;
  226. public HiveLexerX() {
  227. super();
  228. errors = new ArrayList<ParseError>();
  229. }
  230. public HiveLexerX(CharStream input) {
  231. super(input);
  232. errors = new ArrayList<ParseError>();
  233. }
  234. public void displayRecognitionError(String[] tokenNames,
  235. RecognitionException e) {
  236. errors.add(new ParseError(this, e, tokenNames));
  237. }
  238. public String getErrorMessage(RecognitionException e, String[] tokenNames) {
  239. String msg = null;
  240. if (e instanceof NoViableAltException) {
  241. @SuppressWarnings("unused")
  242. NoViableAltException nvae = (NoViableAltException) e;
  243. // for development, can add
  244. // "decision=<<"+nvae.grammarDecisionDescription+">>"
  245. // and "(decision="+nvae.decisionNumber+") and
  246. // "state "+nvae.stateNumber
  247. msg = "character " + getCharErrorDisplay(e.c) + " not supported here";
  248. } else {
  249. msg = super.getErrorMessage(e, tokenNames);
  250. }
  251. return msg;
  252. }
  253. public ArrayList<ParseError> getErrors() {
  254. return errors;
  255. }
  256. }
  257. /**
  258. * HiveParserX.
  259. *
  260. */
  261. public class HiveParserX extends HiveParser {
  262. private final ArrayList<ParseError> errors;
  263. public HiveParserX(TokenStream input) {
  264. super(input);
  265. errors = new ArrayList<ParseError>();
  266. }
  267. protected void mismatch(IntStream input, int ttype, BitSet follow)
  268. throws RecognitionException {
  269. throw new MismatchedTokenException(ttype, input);
  270. }
  271. public void recoverFromMismatchedSet(IntStream input,
  272. RecognitionException re, BitSet follow) throws RecognitionException {
  273. throw re;
  274. }
  275. public void displayRecognitionError(String[] tokenNames,
  276. RecognitionException e) {
  277. errors.add(new ParseError(this, e, tokenNames));
  278. }
  279. public String getErrorMessage(RecognitionException e, String[] tokenNames) {
  280. String msg = null;
  281. // Transalate the token names to something that the user can understand
  282. String[] xlateNames = new String[tokenNames.length];
  283. for (int i = 0; i < tokenNames.length; ++i) {
  284. xlateNames[i] = ParseDriver.xlate(tokenNames[i]);
  285. }
  286. if (e instanceof NoViableAltException) {
  287. @SuppressWarnings("unused")
  288. NoViableAltException nvae = (NoViableAltException) e;
  289. // for development, can add
  290. // "decision=<<"+nvae.grammarDecisionDescription+">>"
  291. // and "(decision="+nvae.decisionNumber+") and
  292. // "state "+nvae.stateNumber
  293. msg = "cannot recognize input " + getTokenErrorDisplay(e.token);
  294. } else {
  295. msg = super.getErrorMessage(e, xlateNames);
  296. }
  297. if (msgs.size() > 0) {
  298. msg = msg + " in " + msgs.peek();
  299. }
  300. return msg;
  301. }
  302. public ArrayList<ParseError> getErrors() {
  303. return errors;
  304. }
  305. }
  306. /**
  307. * Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes
  308. * so that the graph walking algorithms and the rules framework defined in
  309. * ql.lib can be used with the AST Nodes.
  310. */
  311. static final TreeAdaptor adaptor = new CommonTreeAdaptor() {
  312. /**
  313. * Creates an ASTNode for the given token. The ASTNode is a wrapper around
  314. * antlr's CommonTree class that implements the Node interface.
  315. *
  316. * @param payload
  317. * The token.
  318. * @return Object (which is actually an ASTNode) for the token.
  319. */
  320. @Override
  321. public Object create(Token payload) {
  322. return new ASTNode(payload);
  323. }
  324. };
  325. public ASTNode parse(String command) throws ParseException {
  326. return parse(command, null);
  327. }
  328. /**
  329. * Parses a command, optionally assigning the parser's token stream to the
  330. * given context.
  331. *
  332. * @param command
  333. * command to parse
  334. *
  335. * @param ctx
  336. * context with which to associate this parser's token stream, or
  337. * null if either no context is available or the context already has
  338. * an existing stream
  339. *
  340. * @return parsed AST
  341. */
  342. public ASTNode parse(String command, Context ctx) throws ParseException {
  343. LOG.info("Parsing command: " + command);
  344. HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
  345. TokenRewriteStream tokens = new TokenRewriteStream(lexer);
  346. if (ctx != null) {
  347. ctx.setTokenRewriteStream(tokens);
  348. }
  349. HiveParserX parser = new HiveParserX(tokens);
  350. parser.setTreeAdaptor(adaptor);
  351. HiveParser.statement_return r = null;
  352. try {
  353. r = parser.statement();
  354. } catch (RecognitionException e) {
  355. throw new ParseException(parser.getErrors());
  356. }
  357. if (lexer.getErrors().size() == 0 && parser.getErrors().size() == 0) {
  358. LOG.info("Parse Completed");
  359. } else if (lexer.getErrors().size() != 0) {
  360. throw new ParseException(lexer.getErrors());
  361. } else {
  362. throw new ParseException(parser.getErrors());
  363. }
  364. return (ASTNode) r.getTree();
  365. }
  366. }