PageRenderTime 55ms CodeModel.GetById 8ms app.highlight 41ms RepoModel.GetById 2ms app.codeStats 0ms

/tags/release-0.2.0-rc0/src/java/org/apache/hcatalog/common/HCatUtil.java

#
Java | 378 lines | 259 code | 51 blank | 68 comment | 58 complexity | 846a4a7380eb8811b1ffc76974376520 MD5 | raw file
  1/*
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hcatalog.common;
 20
 21import java.io.ByteArrayInputStream;
 22import java.io.ByteArrayOutputStream;
 23import java.io.IOException;
 24import java.io.ObjectInputStream;
 25import java.io.ObjectOutputStream;
 26import java.io.Serializable;
 27import java.util.ArrayList;
 28import java.util.HashMap;
 29import java.util.List;
 30import java.util.Map;
 31import java.util.Map.Entry;
 32import java.util.Set;
 33
 34import org.apache.commons.logging.Log;
 35import org.apache.commons.logging.LogFactory;
 36import org.apache.hadoop.conf.Configuration;
 37import org.apache.hadoop.fs.FileSystem;
 38import org.apache.hadoop.fs.Path;
 39import org.apache.hadoop.fs.permission.FsAction;
 40import org.apache.hadoop.hive.conf.HiveConf;
 41import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 42import org.apache.hadoop.hive.metastore.api.FieldSchema;
 43import org.apache.hadoop.hive.metastore.api.MetaException;
 44import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 45import org.apache.hadoop.hive.metastore.api.Table;
 46import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 47import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 48import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier;
 49import org.apache.hadoop.io.Text;
 50import org.apache.hadoop.mapreduce.JobContext;
 51import org.apache.hadoop.security.UserGroupInformation;
 52import org.apache.hadoop.security.token.Token;
 53import org.apache.hadoop.security.token.TokenIdentifier;
 54import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier;
 55import org.apache.hcatalog.data.schema.HCatFieldSchema;
 56import org.apache.hcatalog.data.schema.HCatSchema;
 57import org.apache.hcatalog.data.schema.HCatSchemaUtils;
 58import org.apache.hcatalog.mapreduce.HCatOutputFormat;
 59import org.apache.hadoop.mapred.JobClient;
 60import org.apache.hadoop.mapred.JobConf;
 61import org.apache.thrift.TException;
 62
 63public class HCatUtil {
 64
 65//  static final private Log LOG = LogFactory.getLog(HCatUtil.class);
 66
 67  public static boolean checkJobContextIfRunningFromBackend(JobContext j){
 68    if (j.getConfiguration().get("mapred.task.id", "").equals("")){
 69      return false;
 70    }
 71    return true;
 72  }
 73
 74  public static String serialize(Serializable obj) throws IOException {
 75    if (obj == null) {
 76      return "";
 77    }
 78    try {
 79      ByteArrayOutputStream serialObj = new ByteArrayOutputStream();
 80      ObjectOutputStream objStream = new ObjectOutputStream(serialObj);
 81      objStream.writeObject(obj);
 82      objStream.close();
 83      return encodeBytes(serialObj.toByteArray());
 84    } catch (Exception e) {
 85      throw new IOException("Serialization error: " + e.getMessage(), e);
 86    }
 87  }
 88
 89  public static Object deserialize(String str) throws IOException {
 90    if (str == null || str.length() == 0) {
 91      return null;
 92    }
 93    try {
 94      ByteArrayInputStream serialObj = new ByteArrayInputStream(decodeBytes(str));
 95      ObjectInputStream objStream = new ObjectInputStream(serialObj);
 96      return objStream.readObject();
 97    } catch (Exception e) {
 98      throw new IOException("Deserialization error: " + e.getMessage(), e);
 99    }
100  }
101
102  public static String encodeBytes(byte[] bytes) {
103    StringBuffer strBuf = new StringBuffer();
104
105    for (int i = 0; i < bytes.length; i++) {
106      strBuf.append((char) (((bytes[i] >> 4) & 0xF) + ('a')));
107      strBuf.append((char) (((bytes[i]) & 0xF) + ('a')));
108    }
109
110    return strBuf.toString();
111  }
112
113  public static byte[] decodeBytes(String str) {
114    byte[] bytes = new byte[str.length() / 2];
115    for (int i = 0; i < str.length(); i+=2) {
116      char c = str.charAt(i);
117      bytes[i/2] = (byte) ((c - 'a') << 4);
118      c = str.charAt(i+1);
119      bytes[i/2] += (c - 'a');
120    }
121    return bytes;
122  }
123
124  public static List<HCatFieldSchema> getHCatFieldSchemaList(FieldSchema ... fields) throws HCatException {
125    List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>(fields.length);
126
127    for(FieldSchema f : fields) {
128      result.add(HCatSchemaUtils.getHCatFieldSchema(f));
129    }
130
131    return result;
132  }
133
134  public static List<HCatFieldSchema> getHCatFieldSchemaList(List<FieldSchema> fields) throws HCatException {
135      if(fields == null) {
136          return null;
137      } else {
138          List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>();
139          for(FieldSchema f: fields) {
140              result.add(HCatSchemaUtils.getHCatFieldSchema(f));
141          }
142          return result;
143      }
144  }
145
146
147  public static HCatSchema extractSchemaFromStorageDescriptor(StorageDescriptor sd) throws HCatException {
148      if (sd == null){
149          throw new HCatException("Cannot construct partition info from an empty storage descriptor.");
150        }
151        HCatSchema schema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(sd.getCols()));
152        return schema;
153  }
154
155  public static List<FieldSchema> getFieldSchemaList(List<HCatFieldSchema> hcatFields) {
156      if(hcatFields == null) {
157          return null;
158      } else {
159          List<FieldSchema> result = new ArrayList<FieldSchema>();
160          for(HCatFieldSchema f: hcatFields) {
161              result.add(HCatSchemaUtils.getFieldSchema(f));
162          }
163          return result;
164      }
165  }
166
167  public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName) throws Exception{
168    return client.getTable(dbName,tableName);
169  }
170
171  public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException{
172      HCatSchema tableSchema = extractSchemaFromStorageDescriptor(table.getSd());
173
174      if( table.getPartitionKeys().size() != 0 ) {
175
176        // add partition keys to table schema
177        // NOTE : this assumes that we do not ever have ptn keys as columns inside the table schema as well!
178        for (FieldSchema fs : table.getPartitionKeys()){
179            tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs));
180        }
181      }
182      return tableSchema;
183    }
184
185  /**
186   * Validate partition schema, checks if the column types match between the partition
187   * and the existing table schema. Returns the list of columns present in the partition
188   * but not in the table.
189   * @param table the table
190   * @param partitionSchema the partition schema
191   * @return the list of newly added fields
192   * @throws IOException Signals that an I/O exception has occurred.
193   */
194  public static List<FieldSchema> validatePartitionSchema(Table table, HCatSchema partitionSchema) throws IOException {
195    Map<String, FieldSchema> partitionKeyMap = new HashMap<String, FieldSchema>();
196
197    for(FieldSchema field : table.getPartitionKeys()) {
198      partitionKeyMap.put(field.getName().toLowerCase(), field);
199    }
200
201    List<FieldSchema> tableCols = table.getSd().getCols();
202    List<FieldSchema> newFields = new ArrayList<FieldSchema>();
203
204    for(int i = 0;i <  partitionSchema.getFields().size();i++) {
205
206      FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema.getFields().get(i));
207
208      FieldSchema tableField;
209      if( i < tableCols.size() ) {
210        tableField = tableCols.get(i);
211
212        if( ! tableField.getName().equalsIgnoreCase(field.getName())) {
213          throw new HCatException(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, "Expected column <" + tableField.getName() +
214              "> at position " + (i + 1) + ", found column <" + field.getName() + ">");
215        }
216      } else {
217        tableField = partitionKeyMap.get(field.getName().toLowerCase());
218
219        if( tableField != null ) {
220          throw new HCatException(ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" +  field.getName() + ">");
221        }
222      }
223
224      if( tableField == null ) {
225        //field present in partition but not in table
226        newFields.add(field);
227      } else {
228        //field present in both. validate type has not changed
229        TypeInfo partitionType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
230        TypeInfo tableType = TypeInfoUtils.getTypeInfoFromTypeString(tableField.getType());
231
232        if( ! partitionType.equals(tableType) ) {
233          throw new HCatException(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <" + field.getName() + ">, expected <" +
234              tableType.getTypeName() + ">, got <" + partitionType.getTypeName() + ">");
235        }
236      }
237    }
238
239    return newFields;
240  }
241
242  /**
243   * Test if the first FsAction is more permissive than the second. This is useful in cases where
244   * we want to ensure that a file owner has more permissions than the group they belong to, for eg.
245   * More completely(but potentially more cryptically)
246   *  owner-r >= group-r >= world-r : bitwise and-masked with 0444 => 444 >= 440 >= 400 >= 000
247   *  owner-w >= group-w >= world-w : bitwise and-masked with &0222 => 222 >= 220 >= 200 >= 000
248   *  owner-x >= group-x >= world-x : bitwise and-masked with &0111 => 111 >= 110 >= 100 >= 000
249   * @return true if first FsAction is more permissive than the second, false if not.
250   */
251  public static boolean validateMorePermissive(FsAction first, FsAction second) {
252    if ((first == FsAction.ALL) ||
253        (second == FsAction.NONE) ||
254        (first == second)) {
255      return true;
256    }
257    switch (first){
258      case READ_EXECUTE : return ((second == FsAction.READ) || (second == FsAction.EXECUTE));
259      case READ_WRITE : return ((second == FsAction.READ) || (second == FsAction.WRITE));
260      case WRITE_EXECUTE : return ((second == FsAction.WRITE) || (second == FsAction.EXECUTE));
261    }
262    return false;
263  }
264
265  /**
266   * Ensure that read or write permissions are not granted without also granting execute permissions.
267   * Essentially, r-- , rw- and -w- are invalid,
268   * r-x, -wx, rwx, ---, --x are valid
269   *
270   * @param perms The FsAction to verify
271   * @return true if the presence of read or write permission is accompanied by execute permissions
272   */
273  public static boolean validateExecuteBitPresentIfReadOrWrite(FsAction perms){
274    if ((perms == FsAction.READ) || (perms == FsAction.WRITE) || (perms == FsAction.READ_WRITE)){
275      return false;
276    }
277    return true;
278  }
279
280  public static Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> getJobTrackerDelegationToken(Configuration conf, String userName) throws Exception {
281//    LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")");
282    JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class));
283    Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> t = jcl.getDelegationToken(new Text(userName));
284//    LOG.info("got "+t);
285    return t;
286    
287//    return null;
288  }
289
290  public static void cancelJobTrackerDelegationToken(String tokenStrForm, String tokenSignature) throws Exception {
291//    LOG.info("cancelJobTrackerDelegationToken("+tokenStrForm+","+tokenSignature+")");
292    JobClient jcl = new JobClient(new JobConf(new Configuration(), HCatOutputFormat.class));
293    Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> t = extractJobTrackerToken(tokenStrForm,tokenSignature);
294//    LOG.info("canceling "+t);
295    try {
296      jcl.cancelDelegationToken(t);
297    }catch(Exception e){
298//      HCatUtil.logToken(LOG, "jcl token to cancel", t);
299      // ignore if token has already been invalidated.
300    }
301  }
302  
303  
304  public static Token<? extends AbstractDelegationTokenIdentifier> 
305      extractThriftToken(String tokenStrForm, String tokenSignature) throws MetaException, TException, IOException {
306//    LOG.info("extractThriftToken("+tokenStrForm+","+tokenSignature+")");
307    Token<? extends AbstractDelegationTokenIdentifier> t = new Token<DelegationTokenIdentifier>();
308    t.decodeFromUrlString(tokenStrForm);
309    t.setService(new Text(tokenSignature));
310//    LOG.info("returning "+t);
311    return t;
312  }
313
314  public static Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> 
315      extractJobTrackerToken(String tokenStrForm, String tokenSignature) throws MetaException, TException, IOException {
316//    LOG.info("extractJobTrackerToken("+tokenStrForm+","+tokenSignature+")");
317    Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> t = 
318        new Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier>();
319    t.decodeFromUrlString(tokenStrForm);
320    t.setService(new Text(tokenSignature));
321//    LOG.info("returning "+t);
322    return t;
323  }
324
325  /**
326   * Logging stack trace
327   * @param logger
328   */
329  public static void logStackTrace(Log logger) {
330    StackTraceElement[] stackTrace = new Exception().getStackTrace();
331    for (int i = 1 ; i < stackTrace.length ; i++){
332      logger.info("\t"+stackTrace[i].toString());
333    }
334  }
335
336  /**
337   * debug log the hive conf
338   * @param logger
339   * @param hc
340   */
341  public static void logHiveConf(Log logger, HiveConf hc){
342    logEntrySet(logger,"logging hiveconf:",hc.getAllProperties().entrySet());
343  }
344
345  
346  public static void logList(Log logger, String itemName, List<? extends Object> list){
347      logger.info(itemName+":");
348      for (Object item : list){
349          logger.info("\t["+item+"]");
350      }
351  }
352  
353  public static void logMap(Log logger, String itemName, Map<? extends Object,? extends Object> map){
354    logEntrySet(logger,itemName,map.entrySet());
355  }
356
357  public static void logEntrySet(Log logger, String itemName, Set<? extends Entry> entrySet) {
358    logger.info(itemName+":");
359    for (Entry e : entrySet){
360      logger.info("\t["+e.getKey()+"]=>["+e.getValue()+"]");
361    }
362  }
363
364  public static void logAllTokens(Log logger, JobContext context) throws IOException {
365    for (Token<? extends TokenIdentifier>t : context.getCredentials().getAllTokens()){
366      logToken(logger,"token",t);
367    }
368  }
369
370  public static void logToken(Log logger, String itemName, Token<? extends TokenIdentifier> t) throws IOException {
371    logger.info(itemName+":");
372    logger.info("\tencodeToUrlString : "+t.encodeToUrlString());
373    logger.info("\ttoString : "+t.toString());
374    logger.info("\tkind : "+t.getKind());
375    logger.info("\tservice : "+t.getService());
376  }
377  
378}