PageRenderTime 44ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.2.0-rc0/src/java/org/apache/hcatalog/common/HCatUtil.java

#
Java | 378 lines | 259 code | 51 blank | 68 comment | 58 complexity | 846a4a7380eb8811b1ffc76974376520 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hcatalog.common;
  19. import java.io.ByteArrayInputStream;
  20. import java.io.ByteArrayOutputStream;
  21. import java.io.IOException;
  22. import java.io.ObjectInputStream;
  23. import java.io.ObjectOutputStream;
  24. import java.io.Serializable;
  25. import java.util.ArrayList;
  26. import java.util.HashMap;
  27. import java.util.List;
  28. import java.util.Map;
  29. import java.util.Map.Entry;
  30. import java.util.Set;
  31. import org.apache.commons.logging.Log;
  32. import org.apache.commons.logging.LogFactory;
  33. import org.apache.hadoop.conf.Configuration;
  34. import org.apache.hadoop.fs.FileSystem;
  35. import org.apache.hadoop.fs.Path;
  36. import org.apache.hadoop.fs.permission.FsAction;
  37. import org.apache.hadoop.hive.conf.HiveConf;
  38. import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
  39. import org.apache.hadoop.hive.metastore.api.FieldSchema;
  40. import org.apache.hadoop.hive.metastore.api.MetaException;
  41. import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
  42. import org.apache.hadoop.hive.metastore.api.Table;
  43. import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
  44. import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
  45. import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier;
  46. import org.apache.hadoop.io.Text;
  47. import org.apache.hadoop.mapreduce.JobContext;
  48. import org.apache.hadoop.security.UserGroupInformation;
  49. import org.apache.hadoop.security.token.Token;
  50. import org.apache.hadoop.security.token.TokenIdentifier;
  51. import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier;
  52. import org.apache.hcatalog.data.schema.HCatFieldSchema;
  53. import org.apache.hcatalog.data.schema.HCatSchema;
  54. import org.apache.hcatalog.data.schema.HCatSchemaUtils;
  55. import org.apache.hcatalog.mapreduce.HCatOutputFormat;
  56. import org.apache.hadoop.mapred.JobClient;
  57. import org.apache.hadoop.mapred.JobConf;
  58. import org.apache.thrift.TException;
  59. public class HCatUtil {
  60. // static final private Log LOG = LogFactory.getLog(HCatUtil.class);
  61. public static boolean checkJobContextIfRunningFromBackend(JobContext j){
  62. if (j.getConfiguration().get("mapred.task.id", "").equals("")){
  63. return false;
  64. }
  65. return true;
  66. }
  67. public static String serialize(Serializable obj) throws IOException {
  68. if (obj == null) {
  69. return "";
  70. }
  71. try {
  72. ByteArrayOutputStream serialObj = new ByteArrayOutputStream();
  73. ObjectOutputStream objStream = new ObjectOutputStream(serialObj);
  74. objStream.writeObject(obj);
  75. objStream.close();
  76. return encodeBytes(serialObj.toByteArray());
  77. } catch (Exception e) {
  78. throw new IOException("Serialization error: " + e.getMessage(), e);
  79. }
  80. }
  81. public static Object deserialize(String str) throws IOException {
  82. if (str == null || str.length() == 0) {
  83. return null;
  84. }
  85. try {
  86. ByteArrayInputStream serialObj = new ByteArrayInputStream(decodeBytes(str));
  87. ObjectInputStream objStream = new ObjectInputStream(serialObj);
  88. return objStream.readObject();
  89. } catch (Exception e) {
  90. throw new IOException("Deserialization error: " + e.getMessage(), e);
  91. }
  92. }
  93. public static String encodeBytes(byte[] bytes) {
  94. StringBuffer strBuf = new StringBuffer();
  95. for (int i = 0; i < bytes.length; i++) {
  96. strBuf.append((char) (((bytes[i] >> 4) & 0xF) + ('a')));
  97. strBuf.append((char) (((bytes[i]) & 0xF) + ('a')));
  98. }
  99. return strBuf.toString();
  100. }
  101. public static byte[] decodeBytes(String str) {
  102. byte[] bytes = new byte[str.length() / 2];
  103. for (int i = 0; i < str.length(); i+=2) {
  104. char c = str.charAt(i);
  105. bytes[i/2] = (byte) ((c - 'a') << 4);
  106. c = str.charAt(i+1);
  107. bytes[i/2] += (c - 'a');
  108. }
  109. return bytes;
  110. }
  111. public static List<HCatFieldSchema> getHCatFieldSchemaList(FieldSchema ... fields) throws HCatException {
  112. List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>(fields.length);
  113. for(FieldSchema f : fields) {
  114. result.add(HCatSchemaUtils.getHCatFieldSchema(f));
  115. }
  116. return result;
  117. }
  118. public static List<HCatFieldSchema> getHCatFieldSchemaList(List<FieldSchema> fields) throws HCatException {
  119. if(fields == null) {
  120. return null;
  121. } else {
  122. List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>();
  123. for(FieldSchema f: fields) {
  124. result.add(HCatSchemaUtils.getHCatFieldSchema(f));
  125. }
  126. return result;
  127. }
  128. }
  129. public static HCatSchema extractSchemaFromStorageDescriptor(StorageDescriptor sd) throws HCatException {
  130. if (sd == null){
  131. throw new HCatException("Cannot construct partition info from an empty storage descriptor.");
  132. }
  133. HCatSchema schema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(sd.getCols()));
  134. return schema;
  135. }
  136. public static List<FieldSchema> getFieldSchemaList(List<HCatFieldSchema> hcatFields) {
  137. if(hcatFields == null) {
  138. return null;
  139. } else {
  140. List<FieldSchema> result = new ArrayList<FieldSchema>();
  141. for(HCatFieldSchema f: hcatFields) {
  142. result.add(HCatSchemaUtils.getFieldSchema(f));
  143. }
  144. return result;
  145. }
  146. }
  147. public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName) throws Exception{
  148. return client.getTable(dbName,tableName);
  149. }
  150. public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException{
  151. HCatSchema tableSchema = extractSchemaFromStorageDescriptor(table.getSd());
  152. if( table.getPartitionKeys().size() != 0 ) {
  153. // add partition keys to table schema
  154. // NOTE : this assumes that we do not ever have ptn keys as columns inside the table schema as well!
  155. for (FieldSchema fs : table.getPartitionKeys()){
  156. tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs));
  157. }
  158. }
  159. return tableSchema;
  160. }
  161. /**
  162. * Validate partition schema, checks if the column types match between the partition
  163. * and the existing table schema. Returns the list of columns present in the partition
  164. * but not in the table.
  165. * @param table the table
  166. * @param partitionSchema the partition schema
  167. * @return the list of newly added fields
  168. * @throws IOException Signals that an I/O exception has occurred.
  169. */
  170. public static List<FieldSchema> validatePartitionSchema(Table table, HCatSchema partitionSchema) throws IOException {
  171. Map<String, FieldSchema> partitionKeyMap = new HashMap<String, FieldSchema>();
  172. for(FieldSchema field : table.getPartitionKeys()) {
  173. partitionKeyMap.put(field.getName().toLowerCase(), field);
  174. }
  175. List<FieldSchema> tableCols = table.getSd().getCols();
  176. List<FieldSchema> newFields = new ArrayList<FieldSchema>();
  177. for(int i = 0;i < partitionSchema.getFields().size();i++) {
  178. FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema.getFields().get(i));
  179. FieldSchema tableField;
  180. if( i < tableCols.size() ) {
  181. tableField = tableCols.get(i);
  182. if( ! tableField.getName().equalsIgnoreCase(field.getName())) {
  183. throw new HCatException(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, "Expected column <" + tableField.getName() +
  184. "> at position " + (i + 1) + ", found column <" + field.getName() + ">");
  185. }
  186. } else {
  187. tableField = partitionKeyMap.get(field.getName().toLowerCase());
  188. if( tableField != null ) {
  189. throw new HCatException(ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" + field.getName() + ">");
  190. }
  191. }
  192. if( tableField == null ) {
  193. //field present in partition but not in table
  194. newFields.add(field);
  195. } else {
  196. //field present in both. validate type has not changed
  197. TypeInfo partitionType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
  198. TypeInfo tableType = TypeInfoUtils.getTypeInfoFromTypeString(tableField.getType());
  199. if( ! partitionType.equals(tableType) ) {
  200. throw new HCatException(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <" + field.getName() + ">, expected <" +
  201. tableType.getTypeName() + ">, got <" + partitionType.getTypeName() + ">");
  202. }
  203. }
  204. }
  205. return newFields;
  206. }
  207. /**
  208. * Test if the first FsAction is more permissive than the second. This is useful in cases where
  209. * we want to ensure that a file owner has more permissions than the group they belong to, for eg.
  210. * More completely(but potentially more cryptically)
  211. * owner-r >= group-r >= world-r : bitwise and-masked with 0444 => 444 >= 440 >= 400 >= 000
  212. * owner-w >= group-w >= world-w : bitwise and-masked with &0222 => 222 >= 220 >= 200 >= 000
  213. * owner-x >= group-x >= world-x : bitwise and-masked with &0111 => 111 >= 110 >= 100 >= 000
  214. * @return true if first FsAction is more permissive than the second, false if not.
  215. */
  216. public static boolean validateMorePermissive(FsAction first, FsAction second) {
  217. if ((first == FsAction.ALL) ||
  218. (second == FsAction.NONE) ||
  219. (first == second)) {
  220. return true;
  221. }
  222. switch (first){
  223. case READ_EXECUTE : return ((second == FsAction.READ) || (second == FsAction.EXECUTE));
  224. case READ_WRITE : return ((second == FsAction.READ) || (second == FsAction.WRITE));
  225. case WRITE_EXECUTE : return ((second == FsAction.WRITE) || (second == FsAction.EXECUTE));
  226. }
  227. return false;
  228. }
  229. /**
  230. * Ensure that read or write permissions are not granted without also granting execute permissions.
  231. * Essentially, r-- , rw- and -w- are invalid,
  232. * r-x, -wx, rwx, ---, --x are valid
  233. *
  234. * @param perms The FsAction to verify
  235. * @return true if the presence of read or write permission is accompanied by execute permissions
  236. */
  237. public static boolean validateExecuteBitPresentIfReadOrWrite(FsAction perms){
  238. if ((perms == FsAction.READ) || (perms == FsAction.WRITE) || (perms == FsAction.READ_WRITE)){
  239. return false;
  240. }
  241. return true;
  242. }
  243. public static Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> getJobTrackerDelegationToken(Configuration conf, String userName) throws Exception {
  244. // LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")");
  245. JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class));
  246. Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> t = jcl.getDelegationToken(new Text(userName));
  247. // LOG.info("got "+t);
  248. return t;
  249. // return null;
  250. }
  251. public static void cancelJobTrackerDelegationToken(String tokenStrForm, String tokenSignature) throws Exception {
  252. // LOG.info("cancelJobTrackerDelegationToken("+tokenStrForm+","+tokenSignature+")");
  253. JobClient jcl = new JobClient(new JobConf(new Configuration(), HCatOutputFormat.class));
  254. Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> t = extractJobTrackerToken(tokenStrForm,tokenSignature);
  255. // LOG.info("canceling "+t);
  256. try {
  257. jcl.cancelDelegationToken(t);
  258. }catch(Exception e){
  259. // HCatUtil.logToken(LOG, "jcl token to cancel", t);
  260. // ignore if token has already been invalidated.
  261. }
  262. }
  263. public static Token<? extends AbstractDelegationTokenIdentifier>
  264. extractThriftToken(String tokenStrForm, String tokenSignature) throws MetaException, TException, IOException {
  265. // LOG.info("extractThriftToken("+tokenStrForm+","+tokenSignature+")");
  266. Token<? extends AbstractDelegationTokenIdentifier> t = new Token<DelegationTokenIdentifier>();
  267. t.decodeFromUrlString(tokenStrForm);
  268. t.setService(new Text(tokenSignature));
  269. // LOG.info("returning "+t);
  270. return t;
  271. }
  272. public static Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier>
  273. extractJobTrackerToken(String tokenStrForm, String tokenSignature) throws MetaException, TException, IOException {
  274. // LOG.info("extractJobTrackerToken("+tokenStrForm+","+tokenSignature+")");
  275. Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> t =
  276. new Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier>();
  277. t.decodeFromUrlString(tokenStrForm);
  278. t.setService(new Text(tokenSignature));
  279. // LOG.info("returning "+t);
  280. return t;
  281. }
  282. /**
  283. * Logging stack trace
  284. * @param logger
  285. */
  286. public static void logStackTrace(Log logger) {
  287. StackTraceElement[] stackTrace = new Exception().getStackTrace();
  288. for (int i = 1 ; i < stackTrace.length ; i++){
  289. logger.info("\t"+stackTrace[i].toString());
  290. }
  291. }
  292. /**
  293. * debug log the hive conf
  294. * @param logger
  295. * @param hc
  296. */
  297. public static void logHiveConf(Log logger, HiveConf hc){
  298. logEntrySet(logger,"logging hiveconf:",hc.getAllProperties().entrySet());
  299. }
  300. public static void logList(Log logger, String itemName, List<? extends Object> list){
  301. logger.info(itemName+":");
  302. for (Object item : list){
  303. logger.info("\t["+item+"]");
  304. }
  305. }
  306. public static void logMap(Log logger, String itemName, Map<? extends Object,? extends Object> map){
  307. logEntrySet(logger,itemName,map.entrySet());
  308. }
  309. public static void logEntrySet(Log logger, String itemName, Set<? extends Entry> entrySet) {
  310. logger.info(itemName+":");
  311. for (Entry e : entrySet){
  312. logger.info("\t["+e.getKey()+"]=>["+e.getValue()+"]");
  313. }
  314. }
  315. public static void logAllTokens(Log logger, JobContext context) throws IOException {
  316. for (Token<? extends TokenIdentifier>t : context.getCredentials().getAllTokens()){
  317. logToken(logger,"token",t);
  318. }
  319. }
  320. public static void logToken(Log logger, String itemName, Token<? extends TokenIdentifier> t) throws IOException {
  321. logger.info(itemName+":");
  322. logger.info("\tencodeToUrlString : "+t.encodeToUrlString());
  323. logger.info("\ttoString : "+t.toString());
  324. logger.info("\tkind : "+t.getKind());
  325. logger.info("\tservice : "+t.getService());
  326. }
  327. }