PageRenderTime 67ms CodeModel.GetById 10ms app.highlight 52ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java

#
Java | 419 lines | 284 code | 53 blank | 82 comment | 23 complexity | a7072f90cd4866f86583914fe1047e36 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.parse;
 20
 21import java.util.ArrayList;
 22import java.util.Collection;
 23import java.util.HashMap;
 24
 25import org.antlr.runtime.ANTLRStringStream;
 26import org.antlr.runtime.BitSet;
 27import org.antlr.runtime.CharStream;
 28import org.antlr.runtime.IntStream;
 29import org.antlr.runtime.MismatchedTokenException;
 30import org.antlr.runtime.NoViableAltException;
 31import org.antlr.runtime.RecognitionException;
 32import org.antlr.runtime.Token;
 33import org.antlr.runtime.TokenRewriteStream;
 34import org.antlr.runtime.TokenStream;
 35import org.antlr.runtime.tree.CommonTreeAdaptor;
 36import org.antlr.runtime.tree.TreeAdaptor;
 37import org.apache.commons.logging.Log;
 38import org.apache.commons.logging.LogFactory;
 39import org.apache.hadoop.hive.ql.Context;
 40
 41/**
 42 * ParseDriver.
 43 *
 44 */
 45public class ParseDriver {
 46
 47  private static final Log LOG = LogFactory.getLog("hive.ql.parse.ParseDriver");
 48
 49  private static HashMap<String, String> xlateMap;
 50  static {
 51    xlateMap = new HashMap<String, String>();
 52
 53    // Keywords
 54    xlateMap.put("KW_TRUE", "TRUE");
 55    xlateMap.put("KW_FALSE", "FALSE");
 56    xlateMap.put("KW_ALL", "ALL");
 57    xlateMap.put("KW_AND", "AND");
 58    xlateMap.put("KW_OR", "OR");
 59    xlateMap.put("KW_NOT", "NOT");
 60    xlateMap.put("KW_LIKE", "LIKE");
 61
 62    xlateMap.put("KW_ASC", "ASC");
 63    xlateMap.put("KW_DESC", "DESC");
 64    xlateMap.put("KW_ORDER", "ORDER");
 65    xlateMap.put("KW_BY", "BY");
 66    xlateMap.put("KW_GROUP", "GROUP");
 67    xlateMap.put("KW_WHERE", "WHERE");
 68    xlateMap.put("KW_FROM", "FROM");
 69    xlateMap.put("KW_AS", "AS");
 70    xlateMap.put("KW_SELECT", "SELECT");
 71    xlateMap.put("KW_DISTINCT", "DISTINCT");
 72    xlateMap.put("KW_INSERT", "INSERT");
 73    xlateMap.put("KW_OVERWRITE", "OVERWRITE");
 74    xlateMap.put("KW_OUTER", "OUTER");
 75    xlateMap.put("KW_JOIN", "JOIN");
 76    xlateMap.put("KW_LEFT", "LEFT");
 77    xlateMap.put("KW_RIGHT", "RIGHT");
 78    xlateMap.put("KW_FULL", "FULL");
 79    xlateMap.put("KW_ON", "ON");
 80    xlateMap.put("KW_PARTITION", "PARTITION");
 81    xlateMap.put("KW_PARTITIONS", "PARTITIONS");
 82    xlateMap.put("KW_TABLE", "TABLE");
 83    xlateMap.put("KW_TABLES", "TABLES");
 84    xlateMap.put("KW_SHOW", "SHOW");
 85    xlateMap.put("KW_MSCK", "MSCK");
 86    xlateMap.put("KW_DIRECTORY", "DIRECTORY");
 87    xlateMap.put("KW_LOCAL", "LOCAL");
 88    xlateMap.put("KW_TRANSFORM", "TRANSFORM");
 89    xlateMap.put("KW_USING", "USING");
 90    xlateMap.put("KW_CLUSTER", "CLUSTER");
 91    xlateMap.put("KW_DISTRIBUTE", "DISTRIBUTE");
 92    xlateMap.put("KW_SORT", "SORT");
 93    xlateMap.put("KW_UNION", "UNION");
 94    xlateMap.put("KW_LOAD", "LOAD");
 95    xlateMap.put("KW_DATA", "DATA");
 96    xlateMap.put("KW_INPATH", "INPATH");
 97    xlateMap.put("KW_IS", "IS");
 98    xlateMap.put("KW_NULL", "NULL");
 99    xlateMap.put("KW_CREATE", "CREATE");
100    xlateMap.put("KW_EXTERNAL", "EXTERNAL");
101    xlateMap.put("KW_ALTER", "ALTER");
102    xlateMap.put("KW_DESCRIBE", "DESCRIBE");
103    xlateMap.put("KW_DROP", "DROP");
104    xlateMap.put("KW_REANME", "REANME");
105    xlateMap.put("KW_TO", "TO");
106    xlateMap.put("KW_COMMENT", "COMMENT");
107    xlateMap.put("KW_BOOLEAN", "BOOLEAN");
108    xlateMap.put("KW_TINYINT", "TINYINT");
109    xlateMap.put("KW_SMALLINT", "SMALLINT");
110    xlateMap.put("KW_INT", "INT");
111    xlateMap.put("KW_BIGINT", "BIGINT");
112    xlateMap.put("KW_FLOAT", "FLOAT");
113    xlateMap.put("KW_DOUBLE", "DOUBLE");
114    xlateMap.put("KW_DATE", "DATE");
115    xlateMap.put("KW_DATETIME", "DATETIME");
116    xlateMap.put("KW_TIMESTAMP", "TIMESTAMP");
117    xlateMap.put("KW_STRING", "STRING");
118    xlateMap.put("KW_ARRAY", "ARRAY");
119    xlateMap.put("KW_MAP", "MAP");
120    xlateMap.put("KW_REDUCE", "REDUCE");
121    xlateMap.put("KW_PARTITIONED", "PARTITIONED");
122    xlateMap.put("KW_CLUSTERED", "CLUSTERED");
123    xlateMap.put("KW_SORTED", "SORTED");
124    xlateMap.put("KW_INTO", "INTO");
125    xlateMap.put("KW_BUCKETS", "BUCKETS");
126    xlateMap.put("KW_ROW", "ROW");
127    xlateMap.put("KW_FORMAT", "FORMAT");
128    xlateMap.put("KW_DELIMITED", "DELIMITED");
129    xlateMap.put("KW_FIELDS", "FIELDS");
130    xlateMap.put("KW_TERMINATED", "TERMINATED");
131    xlateMap.put("KW_COLLECTION", "COLLECTION");
132    xlateMap.put("KW_ITEMS", "ITEMS");
133    xlateMap.put("KW_KEYS", "KEYS");
134    xlateMap.put("KW_KEY_TYPE", "$KEY$");
135    xlateMap.put("KW_LINES", "LINES");
136    xlateMap.put("KW_STORED", "STORED");
137    xlateMap.put("KW_SEQUENCEFILE", "SEQUENCEFILE");
138    xlateMap.put("KW_TEXTFILE", "TEXTFILE");
139    xlateMap.put("KW_INPUTFORMAT", "INPUTFORMAT");
140    xlateMap.put("KW_OUTPUTFORMAT", "OUTPUTFORMAT");
141    xlateMap.put("KW_LOCATION", "LOCATION");
142    xlateMap.put("KW_TABLESAMPLE", "TABLESAMPLE");
143    xlateMap.put("KW_BUCKET", "BUCKET");
144    xlateMap.put("KW_OUT", "OUT");
145    xlateMap.put("KW_OF", "OF");
146    xlateMap.put("KW_CAST", "CAST");
147    xlateMap.put("KW_ADD", "ADD");
148    xlateMap.put("KW_REPLACE", "REPLACE");
149    xlateMap.put("KW_COLUMNS", "COLUMNS");
150    xlateMap.put("KW_RLIKE", "RLIKE");
151    xlateMap.put("KW_REGEXP", "REGEXP");
152    xlateMap.put("KW_TEMPORARY", "TEMPORARY");
153    xlateMap.put("KW_FUNCTION", "FUNCTION");
154    xlateMap.put("KW_EXPLAIN", "EXPLAIN");
155    xlateMap.put("KW_EXTENDED", "EXTENDED");
156    xlateMap.put("KW_SERDE", "SERDE");
157    xlateMap.put("KW_WITH", "WITH");
158    xlateMap.put("KW_SERDEPROPERTIES", "SERDEPROPERTIES");
159    xlateMap.put("KW_LIMIT", "LIMIT");
160    xlateMap.put("KW_SET", "SET");
161    xlateMap.put("KW_PROPERTIES", "TBLPROPERTIES");
162    xlateMap.put("KW_VALUE_TYPE", "$VALUE$");
163    xlateMap.put("KW_ELEM_TYPE", "$ELEM$");
164
165    // Operators
166    xlateMap.put("DOT", ".");
167    xlateMap.put("COLON", ":");
168    xlateMap.put("COMMA", ",");
169    xlateMap.put("SEMICOLON", ");");
170
171    xlateMap.put("LPAREN", "(");
172    xlateMap.put("RPAREN", ")");
173    xlateMap.put("LSQUARE", "[");
174    xlateMap.put("RSQUARE", "]");
175
176    xlateMap.put("EQUAL", "=");
177    xlateMap.put("NOTEQUAL", "<>");
178    xlateMap.put("LESSTHANOREQUALTO", "<=");
179    xlateMap.put("LESSTHAN", "<");
180    xlateMap.put("GREATERTHANOREQUALTO", ">=");
181    xlateMap.put("GREATERTHAN", ">");
182
183    xlateMap.put("DIVIDE", "/");
184    xlateMap.put("PLUS", "+");
185    xlateMap.put("MINUS", "-");
186    xlateMap.put("STAR", "*");
187    xlateMap.put("MOD", "%");
188
189    xlateMap.put("AMPERSAND", "&");
190    xlateMap.put("TILDE", "~");
191    xlateMap.put("BITWISEOR", "|");
192    xlateMap.put("BITWISEXOR", "^");
193  }
194
195  public static Collection<String> getKeywords() {
196    return xlateMap.values();
197  }
198
199  private static String xlate(String name) {
200
201    String ret = xlateMap.get(name);
202    if (ret == null) {
203      ret = name;
204    }
205
206    return ret;
207  }
208
209  /**
210   * ANTLRNoCaseStringStream.
211   * 
212   */
213  //This class provides and implementation for a case insensitive token checker
214  //for the lexical analysis part of antlr. By converting the token stream into
215  //upper case at the time when lexical rules are checked, this class ensures that the
216  //lexical rules need to just match the token with upper case letters as opposed to
217  //combination of upper case and lower case characteres. This is purely used for matching lexical
218  //rules. The actual token text is stored in the same way as the user input without
219  //actually converting it into an upper case. The token values are generated by the consume()
220  //function of the super class ANTLRStringStream. The LA() function is the lookahead funtion
221  //and is purely used for matching lexical rules. This also means that the grammar will only
222  //accept capitalized tokens in case it is run from other tools like antlrworks which
223  //do not have the ANTLRNoCaseStringStream implementation.
224  public class ANTLRNoCaseStringStream extends ANTLRStringStream {
225
226    public ANTLRNoCaseStringStream(String input) {
227      super(input);
228    }
229
230    public int LA(int i) {
231
232      int returnChar = super.LA(i);
233      if (returnChar == CharStream.EOF) {
234        return returnChar;
235      } else if (returnChar == 0) {
236        return returnChar;
237      }
238
239      return Character.toUpperCase((char) returnChar);
240    }
241  }
242
243  /**
244   * HiveLexerX.
245   *
246   */
247  public class HiveLexerX extends HiveLexer {
248
249    private final ArrayList<ParseError> errors;
250
251    public HiveLexerX() {
252      super();
253      errors = new ArrayList<ParseError>();
254    }
255
256    public HiveLexerX(CharStream input) {
257      super(input);
258      errors = new ArrayList<ParseError>();
259    }
260
261    public void displayRecognitionError(String[] tokenNames,
262        RecognitionException e) {
263
264      errors.add(new ParseError(this, e, tokenNames));
265    }
266
267    public String getErrorMessage(RecognitionException e, String[] tokenNames) {
268      String msg = null;
269
270      if (e instanceof NoViableAltException) {
271        @SuppressWarnings("unused")
272        NoViableAltException nvae = (NoViableAltException) e;
273        // for development, can add
274        // "decision=<<"+nvae.grammarDecisionDescription+">>"
275        // and "(decision="+nvae.decisionNumber+") and
276        // "state "+nvae.stateNumber
277        msg = "character " + getCharErrorDisplay(e.c) + " not supported here";
278      } else {
279        msg = super.getErrorMessage(e, tokenNames);
280      }
281
282      return msg;
283    }
284
285    public ArrayList<ParseError> getErrors() {
286      return errors;
287    }
288
289  }
290
291  /**
292   * HiveParserX.
293   *
294   */
295  public class HiveParserX extends HiveParser {
296
297    private final ArrayList<ParseError> errors;
298
299    public HiveParserX(TokenStream input) {
300      super(input);
301      errors = new ArrayList<ParseError>();
302    }
303
304    protected void mismatch(IntStream input, int ttype, BitSet follow)
305        throws RecognitionException {
306
307      throw new MismatchedTokenException(ttype, input);
308    }
309
310    public void recoverFromMismatchedSet(IntStream input,
311        RecognitionException re, BitSet follow) throws RecognitionException {
312      throw re;
313    }
314
315    public void displayRecognitionError(String[] tokenNames,
316        RecognitionException e) {
317
318      errors.add(new ParseError(this, e, tokenNames));
319    }
320
321    public String getErrorMessage(RecognitionException e, String[] tokenNames) {
322      String msg = null;
323
324      // Transalate the token names to something that the user can understand
325      String[] xlateNames = new String[tokenNames.length];
326      for (int i = 0; i < tokenNames.length; ++i) {
327        xlateNames[i] = ParseDriver.xlate(tokenNames[i]);
328      }
329
330      if (e instanceof NoViableAltException) {
331        @SuppressWarnings("unused")
332        NoViableAltException nvae = (NoViableAltException) e;
333        // for development, can add
334        // "decision=<<"+nvae.grammarDecisionDescription+">>"
335        // and "(decision="+nvae.decisionNumber+") and
336        // "state "+nvae.stateNumber
337        msg = "cannot recognize input " + getTokenErrorDisplay(e.token);
338      } else {
339        msg = super.getErrorMessage(e, xlateNames);
340      }
341
342      if (msgs.size() > 0) {
343        msg = msg + " in " + msgs.peek();
344      }
345      return msg;
346    }
347
348    public ArrayList<ParseError> getErrors() {
349      return errors;
350    }
351
352  }
353
354  /**
355   * Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes
356   * so that the graph walking algorithms and the rules framework defined in
357   * ql.lib can be used with the AST Nodes.
358   */
359  static final TreeAdaptor adaptor = new CommonTreeAdaptor() {
360    /**
361     * Creates an ASTNode for the given token. The ASTNode is a wrapper around
362     * antlr's CommonTree class that implements the Node interface.
363     * 
364     * @param payload
365     *          The token.
366     * @return Object (which is actually an ASTNode) for the token.
367     */
368    @Override
369    public Object create(Token payload) {
370      return new ASTNode(payload);
371    }
372  };
373
374  public ASTNode parse(String command) throws ParseException {
375    return parse(command, null);
376  }
377
378  /**
379   * Parses a command, optionally assigning the parser's token stream to the
380   * given context.
381   * 
382   * @param command
383   *          command to parse
384   * 
385   * @param ctx
386   *          context with which to associate this parser's token stream, or
387   *          null if either no context is available or the context already has
388   *          an existing stream
389   * 
390   * @return parsed AST
391   */
392  public ASTNode parse(String command, Context ctx) throws ParseException {
393    LOG.info("Parsing command: " + command);
394
395    HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
396    TokenRewriteStream tokens = new TokenRewriteStream(lexer);
397    if (ctx != null) {
398      ctx.setTokenRewriteStream(tokens);
399    }
400    HiveParserX parser = new HiveParserX(tokens);
401    parser.setTreeAdaptor(adaptor);
402    HiveParser.statement_return r = null;
403    try {
404      r = parser.statement();
405    } catch (RecognitionException e) {
406      throw new ParseException(parser.getErrors());
407    }
408
409    if (lexer.getErrors().size() == 0 && parser.getErrors().size() == 0) {
410      LOG.info("Parse Completed");
411    } else if (lexer.getErrors().size() != 0) {
412      throw new ParseException(lexer.getErrors());
413    } else {
414      throw new ParseException(parser.getErrors());
415    }
416
417    return (ASTNode) r.getTree();
418  }
419}