/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
Java | 3525 lines | 2393 code | 368 blank | 764 comment | 448 complexity | 606b47d493fe363a4cdb507f15147d83 MD5 | raw file
Possible License(s): Apache-2.0
Large files files are truncated, but you can click here to view the full file
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.hive.ql.exec;
- import java.beans.DefaultPersistenceDelegate;
- import java.beans.Encoder;
- import java.beans.ExceptionListener;
- import java.beans.Expression;
- import java.beans.PersistenceDelegate;
- import java.beans.Statement;
- import java.beans.XMLDecoder;
- import java.beans.XMLEncoder;
- import java.io.BufferedReader;
- import java.io.ByteArrayInputStream;
- import java.io.ByteArrayOutputStream;
- import java.io.DataInput;
- import java.io.EOFException;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.InputStreamReader;
- import java.io.OutputStream;
- import java.io.PrintStream;
- import java.io.Serializable;
- import java.io.UnsupportedEncodingException;
- import java.net.URI;
- import java.net.URL;
- import java.net.URLClassLoader;
- import java.security.MessageDigest;
- import java.security.NoSuchAlgorithmException;
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.PreparedStatement;
- import java.sql.SQLException;
- import java.sql.SQLTransientException;
- import java.sql.Timestamp;
- import java.text.SimpleDateFormat;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.Calendar;
- import java.util.Collection;
- import java.util.Collections;
- import java.util.Date;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.Iterator;
- import java.util.LinkedHashMap;
- import java.util.LinkedList;
- import java.util.List;
- import java.util.Map;
- import java.util.Properties;
- import java.util.Random;
- import java.util.Set;
- import java.util.UUID;
- import java.util.concurrent.ConcurrentHashMap;
- import java.util.concurrent.ExecutionException;
- import java.util.concurrent.Future;
- import java.util.concurrent.LinkedBlockingQueue;
- import java.util.concurrent.ThreadPoolExecutor;
- import java.util.concurrent.TimeUnit;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import java.util.zip.Deflater;
- import java.util.zip.DeflaterOutputStream;
- import java.util.zip.InflaterInputStream;
- import org.antlr.runtime.CommonToken;
- import org.apache.commons.codec.binary.Base64;
- import org.apache.commons.lang.StringUtils;
- import org.apache.commons.lang.WordUtils;
- import org.apache.commons.logging.Log;
- import org.apache.commons.logging.LogFactory;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.filecache.DistributedCache;
- import org.apache.hadoop.fs.ContentSummary;
- import org.apache.hadoop.fs.FileStatus;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.fs.PathFilter;
- import org.apache.hadoop.fs.permission.FsPermission;
- import org.apache.hadoop.hive.common.HiveInterruptCallback;
- import org.apache.hadoop.hive.common.HiveInterruptUtils;
- import org.apache.hadoop.hive.common.HiveStatsUtils;
- import org.apache.hadoop.hive.conf.HiveConf;
- import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
- import org.apache.hadoop.hive.metastore.Warehouse;
- import org.apache.hadoop.hive.metastore.api.FieldSchema;
- import org.apache.hadoop.hive.metastore.api.Order;
- import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
- import org.apache.hadoop.hive.ql.Context;
- import org.apache.hadoop.hive.ql.ErrorMsg;
- import org.apache.hadoop.hive.ql.QueryPlan;
- import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
- import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
- import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
- import org.apache.hadoop.hive.ql.exec.mr.ExecReducer;
- import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
- import org.apache.hadoop.hive.ql.exec.tez.TezTask;
- import org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat;
- import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
- import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
- import org.apache.hadoop.hive.ql.io.HiveInputFormat;
- import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
- import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat;
- import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat;
- import org.apache.hadoop.hive.ql.io.RCFile;
- import org.apache.hadoop.hive.ql.io.ReworkMapredInputFormat;
- import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork;
- import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileMergeMapper;
- import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanMapper;
- import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork;
- import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateMapper;
- import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateWork;
- import org.apache.hadoop.hive.ql.log.PerfLogger;
- import org.apache.hadoop.hive.ql.metadata.HiveException;
- import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
- import org.apache.hadoop.hive.ql.metadata.HiveUtils;
- import org.apache.hadoop.hive.ql.metadata.InputEstimator;
- import org.apache.hadoop.hive.ql.metadata.Partition;
- import org.apache.hadoop.hive.ql.metadata.Table;
- import org.apache.hadoop.hive.ql.parse.SemanticException;
- import org.apache.hadoop.hive.ql.plan.BaseWork;
- import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
- import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
- import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
- import org.apache.hadoop.hive.ql.plan.GroupByDesc;
- import org.apache.hadoop.hive.ql.plan.MapWork;
- import org.apache.hadoop.hive.ql.plan.MapredWork;
- import org.apache.hadoop.hive.ql.plan.OperatorDesc;
- import org.apache.hadoop.hive.ql.plan.PartitionDesc;
- import org.apache.hadoop.hive.ql.plan.PlanUtils;
- import org.apache.hadoop.hive.ql.plan.PlanUtils.ExpressionTypes;
- import org.apache.hadoop.hive.ql.plan.ReduceWork;
- import org.apache.hadoop.hive.ql.plan.TableDesc;
- import org.apache.hadoop.hive.ql.plan.api.Adjacency;
- import org.apache.hadoop.hive.ql.plan.api.Graph;
- import org.apache.hadoop.hive.ql.session.SessionState;
- import org.apache.hadoop.hive.ql.stats.StatsFactory;
- import org.apache.hadoop.hive.ql.stats.StatsPublisher;
- import org.apache.hadoop.hive.serde.serdeConstants;
- import org.apache.hadoop.hive.serde2.SerDeException;
- import org.apache.hadoop.hive.serde2.SerDeUtils;
- import org.apache.hadoop.hive.serde2.Serializer;
- import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
- import org.apache.hadoop.hive.shims.ShimLoader;
- import org.apache.hadoop.io.IOUtils;
- import org.apache.hadoop.io.SequenceFile;
- import org.apache.hadoop.io.SequenceFile.CompressionType;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.io.Writable;
- import org.apache.hadoop.io.WritableComparable;
- import org.apache.hadoop.io.compress.CompressionCodec;
- import org.apache.hadoop.io.compress.DefaultCodec;
- import org.apache.hadoop.mapred.FileInputFormat;
- import org.apache.hadoop.mapred.FileOutputFormat;
- import org.apache.hadoop.mapred.InputFormat;
- import org.apache.hadoop.mapred.JobConf;
- import org.apache.hadoop.mapred.RecordReader;
- import org.apache.hadoop.mapred.Reporter;
- import org.apache.hadoop.mapred.SequenceFileInputFormat;
- import org.apache.hadoop.mapred.SequenceFileOutputFormat;
- import org.apache.hadoop.util.Progressable;
- import org.apache.hadoop.util.ReflectionUtils;
- import org.apache.hadoop.util.Shell;
- import com.esotericsoftware.kryo.Kryo;
- import com.esotericsoftware.kryo.io.Input;
- import com.esotericsoftware.kryo.io.Output;
- import com.esotericsoftware.kryo.serializers.FieldSerializer;
- import com.esotericsoftware.shaded.org.objenesis.strategy.StdInstantiatorStrategy;
- /**
- * Utilities.
- *
- */
- @SuppressWarnings("nls")
- public final class Utilities {
- /**
- * The object in the reducer are composed of these top level fields.
- */
- public static String HADOOP_LOCAL_FS = "file:///";
- public static String MAP_PLAN_NAME = "map.xml";
- public static String REDUCE_PLAN_NAME = "reduce.xml";
- public static final String MAPRED_MAPPER_CLASS = "mapred.mapper.class";
- public static final String MAPRED_REDUCER_CLASS = "mapred.reducer.class";
- /**
- * ReduceField:
- * KEY: record key
- * VALUE: record value
- */
- public static enum ReduceField {
- KEY, VALUE
- };
- public static List<String> reduceFieldNameList;
- static {
- reduceFieldNameList = new ArrayList<String>();
- for (ReduceField r : ReduceField.values()) {
- reduceFieldNameList.add(r.toString());
- }
- }
- public static String removeValueTag(String column) {
- if (column.startsWith(ReduceField.VALUE + ".")) {
- return column.substring(6);
- }
- return column;
- }
- private Utilities() {
- // prevent instantiation
- }
- private static Map<Path, BaseWork> gWorkMap = Collections
- .synchronizedMap(new HashMap<Path, BaseWork>());
- private static final String CLASS_NAME = Utilities.class.getName();
- private static final Log LOG = LogFactory.getLog(CLASS_NAME);
- public static void clearWork(Configuration conf) {
- Path mapPath = getPlanPath(conf, MAP_PLAN_NAME);
- Path reducePath = getPlanPath(conf, REDUCE_PLAN_NAME);
- // if the plan path hasn't been initialized just return, nothing to clean.
- if (mapPath == null && reducePath == null) {
- return;
- }
- try {
- FileSystem fs = mapPath.getFileSystem(conf);
- if (fs.exists(mapPath)) {
- fs.delete(mapPath, true);
- }
- if (fs.exists(reducePath)) {
- fs.delete(reducePath, true);
- }
- } catch (Exception e) {
- LOG.warn("Failed to clean-up tmp directories.", e);
- } finally {
- // where a single process works with multiple plans - we must clear
- // the cache before working with the next plan.
- clearWorkMapForConf(conf);
- }
- }
- public static MapredWork getMapRedWork(Configuration conf) {
- MapredWork w = new MapredWork();
- w.setMapWork(getMapWork(conf));
- w.setReduceWork(getReduceWork(conf));
- return w;
- }
- public static void setMapWork(Configuration conf, MapWork work) {
- setBaseWork(conf, MAP_PLAN_NAME, work);
- }
- public static MapWork getMapWork(Configuration conf) {
- return (MapWork) getBaseWork(conf, MAP_PLAN_NAME);
- }
- public static void setReduceWork(Configuration conf, ReduceWork work) {
- setBaseWork(conf, REDUCE_PLAN_NAME, work);
- }
- public static ReduceWork getReduceWork(Configuration conf) {
- return (ReduceWork) getBaseWork(conf, REDUCE_PLAN_NAME);
- }
- /**
- * Pushes work into the global work map
- */
- public static void setBaseWork(Configuration conf, String name, BaseWork work) {
- Path path = getPlanPath(conf, name);
- gWorkMap.put(path, work);
- }
- /**
- * Returns the Map or Reduce plan
- * Side effect: the BaseWork returned is also placed in the gWorkMap
- * @param conf
- * @param name
- * @return BaseWork based on the name supplied will return null if name is null
- * @throws RuntimeException if the configuration files are not proper or if plan can not be loaded
- */
- private static BaseWork getBaseWork(Configuration conf, String name) {
- BaseWork gWork = null;
- Path path = null;
- InputStream in = null;
- try {
- path = getPlanPath(conf, name);
- assert path != null;
- if (!gWorkMap.containsKey(path)) {
- Path localPath;
- if (ShimLoader.getHadoopShims().isLocalMode(conf)) {
- localPath = path;
- } else {
- localPath = new Path(name);
- }
- if (HiveConf.getBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN)) {
- LOG.debug("Loading plan from string: "+path.toUri().getPath());
- String planString = conf.get(path.toUri().getPath());
- if (planString == null) {
- LOG.info("Could not find plan string in conf");
- return null;
- }
- byte[] planBytes = Base64.decodeBase64(planString);
- in = new ByteArrayInputStream(planBytes);
- in = new InflaterInputStream(in);
- } else {
- in = new FileInputStream(localPath.toUri().getPath());
- }
- if(MAP_PLAN_NAME.equals(name)){
- if (ExecMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))){
- gWork = deserializePlan(in, MapWork.class, conf);
- } else if(RCFileMergeMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
- gWork = deserializePlan(in, MergeWork.class, conf);
- } else if(ColumnTruncateMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
- gWork = deserializePlan(in, ColumnTruncateWork.class, conf);
- } else if(PartialScanMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
- gWork = deserializePlan(in, PartialScanWork.class,conf);
- } else {
- throw new RuntimeException("unable to determine work from configuration ."
- + MAPRED_MAPPER_CLASS + " was "+ conf.get(MAPRED_MAPPER_CLASS)) ;
- }
- } else if (REDUCE_PLAN_NAME.equals(name)) {
- if(ExecReducer.class.getName().equals(conf.get(MAPRED_REDUCER_CLASS))) {
- gWork = deserializePlan(in, ReduceWork.class, conf);
- } else {
- throw new RuntimeException("unable to determine work from configuration ."
- + MAPRED_REDUCER_CLASS +" was "+ conf.get(MAPRED_REDUCER_CLASS)) ;
- }
- }
- gWorkMap.put(path, gWork);
- } else {
- LOG.debug("Found plan in cache.");
- gWork = gWorkMap.get(path);
- }
- return gWork;
- } catch (FileNotFoundException fnf) {
- // happens. e.g.: no reduce work.
- LOG.info("No plan file found: "+path);
- return null;
- } catch (Exception e) {
- LOG.error("Failed to load plan: "+path, e);
- throw new RuntimeException(e);
- } finally {
- if (in != null) {
- try {
- in.close();
- } catch (IOException cantBlameMeForTrying) { }
- }
- }
- }
- public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) {
- try {
- Graph stageGraph = plan.getQueryPlan().getStageGraph();
- if (stageGraph == null) {
- return;
- }
- List<Adjacency> adjList = stageGraph.getAdjacencyList();
- if (adjList == null) {
- return;
- }
- for (Adjacency adj : adjList) {
- List<String> children = adj.getChildren();
- if (children == null || children.isEmpty()) {
- return;
- }
- conf.setStrings("mapreduce.workflow.adjacency."+adj.getNode(),
- children.toArray(new String[children.size()]));
- }
- } catch (IOException e) {
- }
- }
- public static List<String> getFieldSchemaString(List<FieldSchema> fl) {
- if (fl == null) {
- return null;
- }
- ArrayList<String> ret = new ArrayList<String>();
- for (FieldSchema f : fl) {
- ret.add(f.getName() + " " + f.getType()
- + (f.getComment() != null ? (" " + f.getComment()) : ""));
- }
- return ret;
- }
- /**
- * Java 1.5 workaround. From http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5015403
- */
- public static class EnumDelegate extends DefaultPersistenceDelegate {
- @Override
- protected Expression instantiate(Object oldInstance, Encoder out) {
- return new Expression(Enum.class, "valueOf", new Object[] {oldInstance.getClass(),
- ((Enum<?>) oldInstance).name()});
- }
- @Override
- protected boolean mutatesTo(Object oldInstance, Object newInstance) {
- return oldInstance == newInstance;
- }
- }
- public static class MapDelegate extends DefaultPersistenceDelegate {
- @Override
- protected Expression instantiate(Object oldInstance, Encoder out) {
- Map oldMap = (Map) oldInstance;
- HashMap newMap = new HashMap(oldMap);
- return new Expression(newMap, HashMap.class, "new", new Object[] {});
- }
- @Override
- protected boolean mutatesTo(Object oldInstance, Object newInstance) {
- return false;
- }
- @Override
- protected void initialize(Class<?> type, Object oldInstance, Object newInstance, Encoder out) {
- java.util.Collection oldO = (java.util.Collection) oldInstance;
- java.util.Collection newO = (java.util.Collection) newInstance;
- if (newO.size() != 0) {
- out.writeStatement(new Statement(oldInstance, "clear", new Object[] {}));
- }
- for (Iterator i = oldO.iterator(); i.hasNext();) {
- out.writeStatement(new Statement(oldInstance, "add", new Object[] {i.next()}));
- }
- }
- }
- public static class SetDelegate extends DefaultPersistenceDelegate {
- @Override
- protected Expression instantiate(Object oldInstance, Encoder out) {
- Set oldSet = (Set) oldInstance;
- HashSet newSet = new HashSet(oldSet);
- return new Expression(newSet, HashSet.class, "new", new Object[] {});
- }
- @Override
- protected boolean mutatesTo(Object oldInstance, Object newInstance) {
- return false;
- }
- @Override
- protected void initialize(Class<?> type, Object oldInstance, Object newInstance, Encoder out) {
- java.util.Collection oldO = (java.util.Collection) oldInstance;
- java.util.Collection newO = (java.util.Collection) newInstance;
- if (newO.size() != 0) {
- out.writeStatement(new Statement(oldInstance, "clear", new Object[] {}));
- }
- for (Iterator i = oldO.iterator(); i.hasNext();) {
- out.writeStatement(new Statement(oldInstance, "add", new Object[] {i.next()}));
- }
- }
- }
- public static class ListDelegate extends DefaultPersistenceDelegate {
- @Override
- protected Expression instantiate(Object oldInstance, Encoder out) {
- List oldList = (List) oldInstance;
- ArrayList newList = new ArrayList(oldList);
- return new Expression(newList, ArrayList.class, "new", new Object[] {});
- }
- @Override
- protected boolean mutatesTo(Object oldInstance, Object newInstance) {
- return false;
- }
- @Override
- protected void initialize(Class<?> type, Object oldInstance, Object newInstance, Encoder out) {
- java.util.Collection oldO = (java.util.Collection) oldInstance;
- java.util.Collection newO = (java.util.Collection) newInstance;
- if (newO.size() != 0) {
- out.writeStatement(new Statement(oldInstance, "clear", new Object[] {}));
- }
- for (Iterator i = oldO.iterator(); i.hasNext();) {
- out.writeStatement(new Statement(oldInstance, "add", new Object[] {i.next()}));
- }
- }
- }
- /**
- * DatePersistenceDelegate. Needed to serialize java.util.Date
- * since it is not serialization friendly.
- * Also works for java.sql.Date since it derives from java.util.Date.
- */
- public static class DatePersistenceDelegate extends PersistenceDelegate {
- @Override
- protected Expression instantiate(Object oldInstance, Encoder out) {
- Date dateVal = (Date)oldInstance;
- Object[] args = { dateVal.getTime() };
- return new Expression(dateVal, dateVal.getClass(), "new", args);
- }
- @Override
- protected boolean mutatesTo(Object oldInstance, Object newInstance) {
- if (oldInstance == null || newInstance == null) {
- return false;
- }
- return oldInstance.getClass() == newInstance.getClass();
- }
- }
- /**
- * TimestampPersistenceDelegate. Needed to serialize java.sql.Timestamp since
- * it is not serialization friendly.
- */
- public static class TimestampPersistenceDelegate extends DatePersistenceDelegate {
- @Override
- protected void initialize(Class<?> type, Object oldInstance, Object newInstance, Encoder out) {
- Timestamp ts = (Timestamp)oldInstance;
- Object[] args = { ts.getNanos() };
- Statement stmt = new Statement(oldInstance, "setNanos", args);
- out.writeStatement(stmt);
- }
- }
- /**
- * Need to serialize org.antlr.runtime.CommonToken
- */
- public static class CommonTokenDelegate extends PersistenceDelegate {
- @Override
- protected Expression instantiate(Object oldInstance, Encoder out) {
- CommonToken ct = (CommonToken)oldInstance;
- Object[] args = {ct.getType(), ct.getText()};
- return new Expression(ct, ct.getClass(), "new", args);
- }
- }
- public static class PathDelegate extends PersistenceDelegate {
- @Override
- protected Expression instantiate(Object oldInstance, Encoder out) {
- Path p = (Path)oldInstance;
- Object[] args = {p.toString()};
- return new Expression(p, p.getClass(), "new", args);
- }
- }
- public static void setMapRedWork(Configuration conf, MapredWork w, Path hiveScratchDir) {
- setMapWork(conf, w.getMapWork(), hiveScratchDir, true);
- if (w.getReduceWork() != null) {
- setReduceWork(conf, w.getReduceWork(), hiveScratchDir, true);
- }
- }
- public static Path setMapWork(Configuration conf, MapWork w, Path hiveScratchDir, boolean useCache) {
- return setBaseWork(conf, w, hiveScratchDir, MAP_PLAN_NAME, useCache);
- }
- public static Path setReduceWork(Configuration conf, ReduceWork w, Path hiveScratchDir, boolean useCache) {
- return setBaseWork(conf, w, hiveScratchDir, REDUCE_PLAN_NAME, useCache);
- }
- private static Path setBaseWork(Configuration conf, BaseWork w, Path hiveScratchDir, String name, boolean useCache) {
- try {
- setPlanPath(conf, hiveScratchDir);
- Path planPath = getPlanPath(conf, name);
- OutputStream out;
- if (HiveConf.getBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN)) {
- // add it to the conf
- ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
- out = new DeflaterOutputStream(byteOut, new Deflater(Deflater.BEST_SPEED));
- serializePlan(w, out, conf);
- LOG.info("Setting plan: "+planPath.toUri().getPath());
- conf.set(planPath.toUri().getPath(),
- Base64.encodeBase64String(byteOut.toByteArray()));
- } else {
- // use the default file system of the conf
- FileSystem fs = planPath.getFileSystem(conf);
- out = fs.create(planPath);
- serializePlan(w, out, conf);
- // Serialize the plan to the default hdfs instance
- // Except for hadoop local mode execution where we should be
- // able to get the plan directly from the cache
- if (useCache && !ShimLoader.getHadoopShims().isLocalMode(conf)) {
- // Set up distributed cache
- if (!DistributedCache.getSymlink(conf)) {
- DistributedCache.createSymlink(conf);
- }
- String uriWithLink = planPath.toUri().toString() + "#" + name;
- DistributedCache.addCacheFile(new URI(uriWithLink), conf);
- // set replication of the plan file to a high number. we use the same
- // replication factor as used by the hadoop jobclient for job.xml etc.
- short replication = (short) conf.getInt("mapred.submit.replication", 10);
- fs.setReplication(planPath, replication);
- }
- }
- // Cache the plan in this process
- gWorkMap.put(planPath, w);
- return planPath;
- } catch (Exception e) {
- e.printStackTrace();
- throw new RuntimeException(e);
- }
- }
- private static Path getPlanPath(Configuration conf, String name) {
- Path planPath = getPlanPath(conf);
- if (planPath == null) {
- return null;
- }
- return new Path(planPath, name);
- }
- private static void setPlanPath(Configuration conf, Path hiveScratchDir) throws IOException {
- if (getPlanPath(conf) == null) {
- // this is the unique conf ID, which is kept in JobConf as part of the plan file name
- String jobID = UUID.randomUUID().toString();
- Path planPath = new Path(hiveScratchDir, jobID);
- FileSystem fs = planPath.getFileSystem(conf);
- fs.mkdirs(planPath);
- HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, planPath.toUri().toString());
- }
- }
- public static Path getPlanPath(Configuration conf) {
- String plan = HiveConf.getVar(conf, HiveConf.ConfVars.PLAN);
- if (plan != null && !plan.isEmpty()) {
- return new Path(plan);
- }
- return null;
- }
- /**
- * Serializes expression via Kryo.
- * @param expr Expression.
- * @return Bytes.
- */
- public static byte[] serializeExpressionToKryo(ExprNodeGenericFuncDesc expr) {
- return serializeObjectToKryo(expr);
- }
- /**
- * Deserializes expression from Kryo.
- * @param bytes Bytes containing the expression.
- * @return Expression; null if deserialization succeeded, but the result type is incorrect.
- */
- public static ExprNodeGenericFuncDesc deserializeExpressionFromKryo(byte[] bytes) {
- return deserializeObjectFromKryo(bytes, ExprNodeGenericFuncDesc.class);
- }
- public static String serializeExpression(ExprNodeGenericFuncDesc expr) {
- try {
- return new String(Base64.encodeBase64(serializeExpressionToKryo(expr)), "UTF-8");
- } catch (UnsupportedEncodingException ex) {
- throw new RuntimeException("UTF-8 support required", ex);
- }
- }
- public static ExprNodeGenericFuncDesc deserializeExpression(String s) {
- byte[] bytes;
- try {
- bytes = Base64.decodeBase64(s.getBytes("UTF-8"));
- } catch (UnsupportedEncodingException ex) {
- throw new RuntimeException("UTF-8 support required", ex);
- }
- return deserializeExpressionFromKryo(bytes);
- }
- private static byte[] serializeObjectToKryo(Serializable object) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- Output output = new Output(baos);
- runtimeSerializationKryo.get().writeObject(output, object);
- output.close();
- return baos.toByteArray();
- }
- private static <T extends Serializable> T deserializeObjectFromKryo(byte[] bytes, Class<T> clazz) {
- Input inp = new Input(new ByteArrayInputStream(bytes));
- T func = runtimeSerializationKryo.get().readObject(inp, clazz);
- inp.close();
- return func;
- }
- public static String serializeObject(Serializable expr) {
- try {
- return new String(Base64.encodeBase64(serializeObjectToKryo(expr)), "UTF-8");
- } catch (UnsupportedEncodingException ex) {
- throw new RuntimeException("UTF-8 support required", ex);
- }
- }
- public static <T extends Serializable> T deserializeObject(String s, Class<T> clazz) {
- try {
- return deserializeObjectFromKryo(Base64.decodeBase64(s.getBytes("UTF-8")), clazz);
- } catch (UnsupportedEncodingException ex) {
- throw new RuntimeException("UTF-8 support required", ex);
- }
- }
- public static class CollectionPersistenceDelegate extends DefaultPersistenceDelegate {
- @Override
- protected Expression instantiate(Object oldInstance, Encoder out) {
- return new Expression(oldInstance, oldInstance.getClass(), "new", null);
- }
- @Override
- protected void initialize(Class type, Object oldInstance, Object newInstance, Encoder out) {
- Iterator ite = ((Collection) oldInstance).iterator();
- while (ite.hasNext()) {
- out.writeStatement(new Statement(oldInstance, "add", new Object[] {ite.next()}));
- }
- }
- }
- /**
- * Kryo serializer for timestamp.
- */
- private static class TimestampSerializer extends
- com.esotericsoftware.kryo.Serializer<Timestamp> {
- @Override
- public Timestamp read(Kryo kryo, Input input, Class<Timestamp> clazz) {
- Timestamp ts = new Timestamp(input.readLong());
- ts.setNanos(input.readInt());
- return ts;
- }
- @Override
- public void write(Kryo kryo, Output output, Timestamp ts) {
- output.writeLong(ts.getTime());
- output.writeInt(ts.getNanos());
- }
- }
- /** Custom Kryo serializer for sql date, otherwise Kryo gets confused between
- java.sql.Date and java.util.Date while deserializing
- */
- private static class SqlDateSerializer extends
- com.esotericsoftware.kryo.Serializer<java.sql.Date> {
- @Override
- public java.sql.Date read(Kryo kryo, Input input, Class<java.sql.Date> clazz) {
- return new java.sql.Date(input.readLong());
- }
- @Override
- public void write(Kryo kryo, Output output, java.sql.Date sqlDate) {
- output.writeLong(sqlDate.getTime());
- }
- }
- private static class CommonTokenSerializer extends com.esotericsoftware.kryo.Serializer<CommonToken> {
- @Override
- public CommonToken read(Kryo kryo, Input input, Class<CommonToken> clazz) {
- return new CommonToken(input.readInt(), input.readString());
- }
- @Override
- public void write(Kryo kryo, Output output, CommonToken token) {
- output.writeInt(token.getType());
- output.writeString(token.getText());
- }
- }
- private static class PathSerializer extends com.esotericsoftware.kryo.Serializer<Path> {
- @Override
- public void write(Kryo kryo, Output output, Path path) {
- output.writeString(path.toUri().toString());
- }
- @Override
- public Path read(Kryo kryo, Input input, Class<Path> type) {
- return new Path(URI.create(input.readString()));
- }
- }
- public static Set<Operator<?>> cloneOperatorTree(Configuration conf, Set<Operator<?>> roots) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
- serializePlan(roots, baos, conf, true);
- Set<Operator<?>> result = deserializePlan(new ByteArrayInputStream(baos.toByteArray()),
- roots.getClass(), conf, true);
- return result;
- }
- private static void serializePlan(Object plan, OutputStream out, Configuration conf, boolean cloningPlan) {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
- perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SERIALIZE_PLAN);
- String serializationType = conf.get(HiveConf.ConfVars.PLAN_SERIALIZATION.varname, "kryo");
- LOG.info("Serializing " + plan.getClass().getSimpleName() + " via " + serializationType);
- if("javaXML".equalsIgnoreCase(serializationType)) {
- serializeObjectByJavaXML(plan, out);
- } else {
- if(cloningPlan) {
- serializeObjectByKryo(cloningQueryPlanKryo.get(), plan, out);
- } else {
- serializeObjectByKryo(runtimeSerializationKryo.get(), plan, out);
- }
- }
- perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SERIALIZE_PLAN);
- }
- /**
- * Serializes the plan.
- * @param plan The plan, such as QueryPlan, MapredWork, etc.
- * @param out The stream to write to.
- * @param conf to pick which serialization format is desired.
- */
- public static void serializePlan(Object plan, OutputStream out, Configuration conf) {
- serializePlan(plan, out, conf, false);
- }
- private static <T> T deserializePlan(InputStream in, Class<T> planClass, Configuration conf, boolean cloningPlan) {
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
- perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DESERIALIZE_PLAN);
- T plan;
- String serializationType = conf.get(HiveConf.ConfVars.PLAN_SERIALIZATION.varname, "kryo");
- LOG.info("Deserializing " + planClass.getSimpleName() + " via " + serializationType);
- if("javaXML".equalsIgnoreCase(serializationType)) {
- plan = deserializeObjectByJavaXML(in);
- } else {
- if(cloningPlan) {
- plan = deserializeObjectByKryo(cloningQueryPlanKryo.get(), in, planClass);
- } else {
- plan = deserializeObjectByKryo(runtimeSerializationKryo.get(), in, planClass);
- }
- }
- perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DESERIALIZE_PLAN);
- return plan;
- }
- /**
- * Deserializes the plan.
- * @param in The stream to read from.
- * @param planClass class of plan
- * @param conf configuration
- * @return The plan, such as QueryPlan, MapredWork, etc.
- */
- public static <T> T deserializePlan(InputStream in, Class<T> planClass, Configuration conf) {
- return deserializePlan(in, planClass, conf, false);
- }
- /**
- * Clones using the powers of XML. Do not use unless necessary.
- * @param plan The plan.
- * @return The clone.
- */
- public static MapredWork clonePlan(MapredWork plan) {
- // TODO: need proper clone. Meanwhile, let's at least keep this horror in one place
- PerfLogger perfLogger = PerfLogger.getPerfLogger();
- perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN);
- ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
- Configuration conf = new HiveConf();
- serializePlan(plan, baos, conf, true);
- MapredWork newPlan = deserializePlan(new ByteArrayInputStream(baos.toByteArray()),
- MapredWork.class, conf, true);
- perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.CLONE_PLAN);
- return newPlan;
- }
- /**
- * Serialize the object. This helper function mainly makes sure that enums,
- * counters, etc are handled properly.
- */
- private static void serializeObjectByJavaXML(Object plan, OutputStream out) {
- XMLEncoder e = new XMLEncoder(out);
- e.setExceptionListener(new ExceptionListener() {
- @Override
- public void exceptionThrown(Exception e) {
- LOG.warn(org.apache.hadoop.util.StringUtils.stringifyException(e));
- throw new RuntimeException("Cannot serialize object", e);
- }
- });
- // workaround for java 1.5
- e.setPersistenceDelegate(ExpressionTypes.class, new EnumDelegate());
- e.setPersistenceDelegate(GroupByDesc.Mode.class, new EnumDelegate());
- e.setPersistenceDelegate(java.sql.Date.class, new DatePersistenceDelegate());
- e.setPersistenceDelegate(Timestamp.class, new TimestampPersistenceDelegate());
- e.setPersistenceDelegate(org.datanucleus.store.types.backed.Map.class, new MapDelegate());
- e.setPersistenceDelegate(org.datanucleus.store.types.backed.List.class, new ListDelegate());
- e.setPersistenceDelegate(CommonToken.class, new CommonTokenDelegate());
- e.setPersistenceDelegate(Path.class, new PathDelegate());
- e.writeObject(plan);
- e.close();
- }
- /**
- * @param plan Usually of type MapredWork, MapredLocalWork etc.
- * @param out stream in which serialized plan is written into
- */
- private static void serializeObjectByKryo(Kryo kryo, Object plan, OutputStream out) {
- Output output = new Output(out);
- kryo.writeObject(output, plan);
- output.close();
- }
- /**
- * De-serialize an object. This helper function mainly makes sure that enums,
- * counters, etc are handled properly.
- */
- @SuppressWarnings("unchecked")
- private static <T> T deserializeObjectByJavaXML(InputStream in) {
- XMLDecoder d = null;
- try {
- d = new XMLDecoder(in, null, null);
- return (T) d.readObject();
- } finally {
- if (null != d) {
- d.close();
- }
- }
- }
- private static <T> T deserializeObjectByKryo(Kryo kryo, InputStream in, Class<T> clazz ) {
- Input inp = new Input(in);
- T t = kryo.readObject(inp,clazz);
- inp.close();
- return t;
- }
- // Kryo is not thread-safe,
- // Also new Kryo() is expensive, so we want to do it just once.
- public static ThreadLocal<Kryo> runtimeSerializationKryo = new ThreadLocal<Kryo>() {
- @Override
- protected synchronized Kryo initialValue() {
- Kryo kryo = new Kryo();
- kryo.setClassLoader(Thread.currentThread().getContextClassLoader());
- kryo.register(java.sql.Date.class, new SqlDateSerializer());
- kryo.register(java.sql.Timestamp.class, new TimestampSerializer());
- kryo.register(Path.class, new PathSerializer());
- kryo.setInstantiatorStrategy(new StdInstantiatorStrategy());
- removeField(kryo, Operator.class, "colExprMap");
- removeField(kryo, ColumnInfo.class, "objectInspector");
- removeField(kryo, MapWork.class, "opParseCtxMap");
- removeField(kryo, MapWork.class, "joinTree");
- return kryo;
- };
- };
- @SuppressWarnings("rawtypes")
- protected static void removeField(Kryo kryo, Class type, String fieldName) {
- FieldSerializer fld = new FieldSerializer(kryo, type);
- fld.removeField(fieldName);
- kryo.register(type, fld);
- }
- private static ThreadLocal<Kryo> cloningQueryPlanKryo = new ThreadLocal<Kryo>() {
- @Override
- protected synchronized Kryo initialValue() {
- Kryo kryo = new Kryo();
- kryo.setClassLoader(Thread.currentThread().getContextClassLoader());
- kryo.register(CommonToken.class, new CommonTokenSerializer());
- kryo.register(java.sql.Date.class, new SqlDateSerializer());
- kryo.register(java.sql.Timestamp.class, new TimestampSerializer());
- kryo.register(Path.class, new PathSerializer());
- kryo.setInstantiatorStrategy(new StdInstantiatorStrategy());
- return kryo;
- };
- };
- public static TableDesc defaultTd;
- static {
- // by default we expect ^A separated strings
- // This tableDesc does not provide column names. We should always use
- // PlanUtils.getDefaultTableDesc(String separatorCode, String columns)
- // or getBinarySortableTableDesc(List<FieldSchema> fieldSchemas) when
- // we know the column names.
- defaultTd = PlanUtils.getDefaultTableDesc("" + Utilities.ctrlaCode);
- }
- public static final int carriageReturnCode = 13;
- public static final int newLineCode = 10;
- public static final int tabCode = 9;
- public static final int ctrlaCode = 1;
- public static final String INDENT = " ";
- // Note: When DDL supports specifying what string to represent null,
- // we should specify "NULL" to represent null in the temp table, and then
- // we can make the following translation deprecated.
- public static String nullStringStorage = "\\N";
- public static String nullStringOutput = "NULL";
- public static Random randGen = new Random();
- /**
- * Gets the task id if we are running as a Hadoop job. Gets a random number otherwise.
- */
- public static String getTaskId(Configuration hconf) {
- String taskid = (hconf == null) ? null : hconf.get("mapred.task.id");
- if ((taskid == null) || taskid.equals("")) {
- return ("" + Math.abs(randGen.nextInt()));
- } else {
- /*
- * extract the task and attempt id from the hadoop taskid. in version 17 the leading component
- * was 'task_'. thereafter the leading component is 'attempt_'. in 17 - hadoop also seems to
- * have used _map_ and _reduce_ to denote map/reduce task types
- */
- String ret = taskid.replaceAll(".*_[mr]_", "").replaceAll(".*_(map|reduce)_", "");
- return (ret);
- }
- }
- public static HashMap makeMap(Object... olist) {
- HashMap ret = new HashMap();
- for (int i = 0; i < olist.length; i += 2) {
- ret.put(olist[i], olist[i + 1]);
- }
- return (ret);
- }
- public static Properties makeProperties(String... olist) {
- Properties ret = new Properties();
- for (int i = 0; i < olist.length; i += 2) {
- ret.setProperty(olist[i], olist[i + 1]);
- }
- return (ret);
- }
- public static ArrayList makeList(Object... olist) {
- ArrayList ret = new ArrayList();
- for (Object element : olist) {
- ret.add(element);
- }
- return (ret);
- }
- /**
- * StreamPrinter.
- *
- */
- public static class StreamPrinter extends Thread {
- InputStream is;
- String type;
- PrintStream os;
- public StreamPrinter(InputStream is, String type, PrintStream os) {
- this.is = is;
- this.type = type;
- this.os = os;
- }
- @Override
- public void run() {
- BufferedReader br = null;
- try {
- InputStreamReader isr = new InputStreamReader(is);
- br = new BufferedReader(isr);
- String line = null;
- if (type != null) {
- while ((line = br.readLine()) != null) {
- os.println(type + ">" + line);
- }
- } else {
- while ((line = br.readLine()) != null) {
- os.println(line);
- }
- }
- br.close();
- br=null;
- } catch (IOException ioe) {
- ioe.printStackTrace();
- }finally{
- IOUtils.closeStream(br);
- }
- }
- }
- public static TableDesc getTableDesc(Table tbl) {
- Properties props = tbl.getMetadata();
- props.put(serdeConstants.SERIALIZATION_LIB, tbl.getDeserializer().getClass().getName());
- return (new TableDesc(tbl.getInputFormatClass(), tbl
- .getOutputFormatClass(), props));
- }
- // column names and column types are all delimited by comma
- public static TableDesc getTableDesc(String cols, String colTypes) {
- return (new TableDesc(SequenceFileInputFormat.class,
- HiveSequenceFileOutputFormat.class, Utilities.makeProperties(
- serdeConstants.SERIALIZATION_FORMAT, "" + Utilities.ctrlaCode,
- serdeConstants.LIST_COLUMNS, cols,
- serdeConstants.LIST_COLUMN_TYPES, colTypes,
- serdeConstants.SERIALIZATION_LIB,LazySimpleSerDe.class.getName())));
- }
- public static PartitionDesc getPartitionDesc(Partition part) throws HiveException {
- return (new PartitionDesc(part));
- }
- public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part)
- throws HiveException {
- return new PartitionDesc(part, tblDesc);
- }
- private static String getOpTreeSkel_helper(Operator<?> op, String indent) {
- if (op == null) {
- return "";
- }
- StringBuilder sb = new StringBuilder();
- sb.append(indent);
- sb.append(op.toString());
- sb.append("\n");
- if (op.getChildOperators() != null) {
- for (Object child : op.getChildOperators()) {
- sb.append(getOpTreeSkel_helper((Operator<?>) child, indent + " "));
- }
- }
- return sb.toString();
- }
- public static String getOpTreeSkel(Operator<?> op) {
- return getOpTreeSkel_helper(op, "");
- }
- private static boolean isWhitespace(int c) {
- if (c == -1) {
- return false;
- }
- return Character.isWhitespace((char) c);
- }
- public static boolean contentsEqual(InputStream is1, InputStream is2, boolean ignoreWhitespace)
- throws IOException {
- try {
- if ((is1 == is2) || (is1 == null && is2 == null)) {
- return true;
- }
- if (is1 == null || is2 == null) {
- return false;
- }
- while (true) {
- int c1 = is1.read();
- while (ignoreWhitespace && isWhitespace(c1)) {
- c1 = is1.read();
- }
- int c2 = is2.read();
- while (ignoreWhitespace && isWhitespace(c2)) {
- c2 = is2.read();
- }
- if (c1 == -1 && c2 == -1) {
- return true;
- }
- if (c1 != c2) {
- break;
- }
- }
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- }
- return false;
- }
- /**
- * convert "From src insert blah blah" to "From src insert ... blah"
- */
- public static String abbreviate(String str, int max) {
- str = str.trim();
- int len = str.length();
- int suffixlength = 20;
- if (len <= max) {
- return str;
- }
- suffixlength = Math.min(suffixlength, (max - 3) / 2);
- String rev = StringUtils.reverse(str);
- // get the last few words
- String suffix = WordUtils.abbreviate(rev, 0, suffixlength, "");
- suffix = StringUtils.reverse(suffix);
- // first few ..
- String prefix = StringUtils.abbreviate(str, max - suffix.length());
- return prefix + suffix;
- }
- public static final String NSTR = "";
- /**
- * StreamStatus.
- *
- */
- public static enum StreamStatus {
- EOF, TERMINATED
- }
- public static StreamStatus readColumn(DataInput in, OutputStream out) throws IOException {
- boolean foundCrChar = false;
- while (true) {
- int b;
- try {
- b = in.readByte();
- } catch (EOFException e) {
- return StreamStatus.EOF;
- }
- // Default new line characters on windows are "CRLF" so detect if there are any windows
- // native newline characters and handle them.
- if (Shell.WINDOWS) {
- // if the CR is not followed by the LF on windows then add it back to the stream and
- // proceed with next characters in the input stream.
- if (foundCrChar && b != Utilities.newLineCode) {
- out.write(Utilities.carriageReturnCode);
- foundCrChar = false;
- }
- if (b == Utilities.carriageReturnCode) {
- foundCrChar = true;
- continue;
- }
- }
- if (b == Utilities.newLineCode) {
- return StreamStatus.TERMINATED;
- }
- out.write(b);
- }
- // Unreachable
- }
- /**
- * Convert an output stream to a compressed output stream based on codecs and compression options
- * specified in the Job Configuration.
- *
- * @param jc
- * Job Configuration
- * @param out
- * Output Stream to be converted into compressed output stream
- * @return compressed output stream
- */
- public static OutputStream createCompressedStream(JobConf jc, OutputStream out)
- throws IOException {
- boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
- return createCompressedStream(jc, out, isCompressed);
- }
- /**
- * Convert an output stream to a compressed output stream based on codecs codecs in the Job
- * Configuration. Caller specifies directly whether file is compressed or not
- *
- * @param jc
- * Job Configuration
- * @param out
- * Output Stream to be converted into compressed output stream
- * @param isCompressed
- * whether the output stream needs to be compressed or not
- * @return compressed output stream
- */
- public static OutputStream createCompressedStream(JobConf jc, OutputStream out,
- boolean isCompressed) throws IOException {
- if (isCompressed) {
- Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jc,
- DefaultCodec.class);
- CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jc);
- return codec.createOutputStream(out);
- } else {
- return (out);
- }
- }
- /**
- * Based on compression option and configured output codec - get extension for output file. This
- * is only required for text files - not sequencefiles
- *
- * @param jc
- * Job Configuration
- * @param isCompressed
- * Whether the output file is compressed or not
- * @return the required file extension (example: .gz)
- * @deprecated Use {@link #getFileExtension(JobConf, boolean, HiveOutputFormat)}
- */
- @Deprecated
- public static String getFileExtension(JobConf jc, boolean isCompressed) {
- return getFileExtension(jc, isCompressed, new HiveIgnoreKeyTextOutputFormat());
- }
- /**
- * Based on compression option, output format, and configured output codec -
- * get extension for output file. Text files require an extension, whereas
- * others, like sequence files, do not.
- * <p>
- * The property <code>hive.output.file.extension</code> is used to determine
- * the extension - if set, it will override other logic for choosing an
- * extension.
- *
- * @param jc
- * Job Configuration
- * @param isCompressed
- * Whether the output file is compressed or not
- * @param hiveOutputFormat
- * The output format, used to detect if the format is text
- * @return the required file extension (example: .gz)
- */
- public static String getFileExtension(JobConf jc, boolean isCompressed,
- HiveOutputFormat<?, ?> hiveOutputFormat) {
- String extension = HiveConf.getVar(jc, HiveConf.ConfVars.OUTPUT_FILE_EXTENSION);
- if (!StringUtils.isEmpty(extension)) {
- return extension;
- }
- if ((hiveOutputFormat instanceof HiveIgnoreKeyTextOutputFormat) && isCompressed) {
- Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jc,
- DefaultCodec.class);
- CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jc);
- return codec.getDefaultExtension();
- }
- return "";
- }
- /**
- * Create a sequencefile output stream based on job configuration.
- *
- * @param jc
- * Job configuration
- * @param fs
- * File System to create file in
- * @param file
- * Path to be created
- * @param keyClass
- * Java Class for key
- * @param valClass
- * Java Class for value
- * @return output stream over the created sequencefile
- */
- public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file,
- Class<?> keyClass, Class<?> valClass, Progressable progressable) throws IOException {
- boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
- return createSequenceWriter(jc, fs, file, keyClass, valClass, isCompressed, progressable);
- }
- /**
- * Create a sequencefile output stream based on job configuration Uses user supplied compression
- * flag (rather than obtaining it from the Job Configuration).
- *
- * @param jc
- * Job configuration
- * @param fs
- * File System to create file in
- * @param file
- * Path to be created
- * @param keyClass
- * Java Class for key
- * @param valClass
- * Java Class for value
- * @return output stream over the created sequencefile
- */
- public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file,
- Class<?> keyClass, Class<?> valClass, boolean isCompressed, Progressable progressable)
- throws IOException {
- CompressionCodec codec = null;
- CompressionType compressionType = CompressionType.NONE;
- Class codecClass = null;
- if (isCompressed) {
- compressionType = SequenceFileOutputFormat.getOutputCompressionType(jc);
- codecClass = FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class);
- codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, jc);
- }
- return (SequenceFile.createWriter(fs, jc, file, keyClass, valClass, compressionType, codec,
- progressable));
- }
- /**
- * Create a RCFile output stream based on job configuration Uses user supplied compression flag
- * (rather than obtaining it from the Job Configuration).
- *
- * @param jc
- * Job configuration
- * @par…
Large files files are truncated, but you can click here to view the full file