PageRenderTime 31ms CodeModel.GetById 10ms app.highlight 18ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java

#
Java | 173 lines | 99 code | 26 blank | 48 comment | 14 complexity | 65ae37543d629474d56c170e7fbff08d MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.exec.errors;
 20
 21import java.io.BufferedReader;
 22import java.io.IOException;
 23import java.io.InputStreamReader;
 24import java.net.MalformedURLException;
 25import java.net.URL;
 26import java.util.ArrayList;
 27import java.util.HashMap;
 28import java.util.List;
 29import java.util.Map;
 30import java.util.Map.Entry;
 31
 32import org.apache.hadoop.hive.conf.HiveConf;
 33import org.apache.hadoop.mapred.JobConf;
 34
 35/**
 36 * TaskLogProcessor reads the logs from failed task attempts and tries to figure
 37 * out what the cause of the error was using various heuristics.
 38 */
 39public class TaskLogProcessor {
 40
 41  private final Map<ErrorHeuristic, HeuristicStats> heuristics =
 42    new HashMap<ErrorHeuristic, HeuristicStats>();
 43  private final List<String> taskLogUrls = new ArrayList<String>();
 44
 45  private JobConf conf = null;
 46  // Query is the hive query string i.e. "SELECT * FROM src;" associated with
 47  // this set of tasks logs
 48  private String query = null;
 49
 50  public TaskLogProcessor(JobConf conf) {
 51    this.conf = conf;
 52    query = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYSTRING);
 53
 54    heuristics.put(new ScriptErrorHeuristic(), new HeuristicStats());
 55    heuristics.put(new MapAggrMemErrorHeuristic(), new HeuristicStats());
 56    heuristics.put(new DataCorruptErrorHeuristic(), new HeuristicStats());
 57    for(ErrorHeuristic e : heuristics.keySet()) {
 58      e.init(query, conf);
 59    }
 60  }
 61
 62  /**
 63   * Adds a task log URL for the heuristics to read through.
 64   * @param url
 65   */
 66  public void addTaskAttemptLogUrl(String url) {
 67    taskLogUrls.add(url);
 68  }
 69
 70  private static class HeuristicStats {
 71
 72    // The number of times eh has returned non-null errors
 73    private int triggerCount = 0;
 74    // All ErrorAndSolutions that ErrorHeuristic has generated. For the same error, they
 75    // should be the same though it's possible that different file paths etc
 76    // could generate different error messages
 77    private final List<ErrorAndSolution> ens = new ArrayList<ErrorAndSolution>();
 78
 79    HeuristicStats() {
 80    }
 81
 82    int getTriggerCount() {
 83      return triggerCount;
 84    }
 85
 86    void incTriggerCount() {
 87      triggerCount++;
 88    }
 89
 90    List<ErrorAndSolution> getErrorAndSolutions() {
 91      return ens;
 92    }
 93
 94    void addErrorAndSolution(ErrorAndSolution e) {
 95      ens.add(e);
 96    }
 97  }
 98
 99  /**
100   * Processes the provided task logs using the known error heuristics to get
101   * the matching errors.
102   * @return A ErrorAndSolution from the ErrorHeuristic that most frequently
103   * generated matches. In case of ties, multiple ErrorAndSolutions will be
104   * returned.
105   */
106  public List<ErrorAndSolution> getErrors() {
107
108    for(String urlString : taskLogUrls) {
109
110      // Open the log file, and read in a line. Then feed the line into
111      // each of the ErrorHeuristics. Repeat for all the lines in the log.
112      URL taskAttemptLogUrl;
113      try {
114        taskAttemptLogUrl = new URL(urlString);
115      } catch(MalformedURLException e) {
116        throw new RuntimeException("Bad task log url", e);
117      }
118      BufferedReader in;
119      try {
120        in = new BufferedReader(
121            new InputStreamReader(taskAttemptLogUrl.openStream()));
122        String inputLine;
123        while ((inputLine = in.readLine()) != null) {
124          for(ErrorHeuristic e : heuristics.keySet()) {
125            e.processLogLine(inputLine);
126          }
127        }
128        in.close();
129      } catch (IOException e) {
130        throw new RuntimeException("Error while reading from task log url", e);
131      }
132
133      // Once the lines of the log file have been fed into the ErrorHeuristics,
134      // see if they have detected anything. If any has, record
135      // what ErrorAndSolution it gave so we can later return the most
136      // frequently occurring error
137      for(Entry<ErrorHeuristic, HeuristicStats> ent : heuristics.entrySet()) {
138        ErrorHeuristic eh = ent.getKey();
139        HeuristicStats hs = ent.getValue();
140
141        ErrorAndSolution es = eh.getErrorAndSolution();
142        if(es != null) {
143          hs.incTriggerCount();
144          hs.addErrorAndSolution(es);
145        }
146      }
147
148    }
149
150    // Return the errors that occur the most frequently
151    int max = 0;
152    for(HeuristicStats hs : heuristics.values()) {
153      if(hs.getTriggerCount() > max) {
154        max = hs.getTriggerCount();
155      }
156    }
157
158    List<ErrorAndSolution> errors = new ArrayList<ErrorAndSolution>();
159    for(HeuristicStats hs : heuristics.values()) {
160      if(hs.getTriggerCount() == max) {
161        if(hs.getErrorAndSolutions().size() > 0) {
162          // An error heuristic could have generated different ErrorAndSolution
163          // for each task attempt, but most likely they are the same. Plus,
164          // one of those is probably good enough for debugging
165          errors.add(hs.getErrorAndSolutions().get(0));
166        }
167      }
168    }
169
170    return errors;
171  }
172
173}