PageRenderTime 45ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java

#
Java | 218 lines | 168 code | 27 blank | 23 comment | 20 complexity | 486fd8a80486fff39b92d445db9d790b MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.index.compact;
  19. import java.util.ArrayList;
  20. import java.util.HashSet;
  21. import java.util.Iterator;
  22. import java.util.LinkedHashMap;
  23. import java.util.List;
  24. import java.util.Set;
  25. import java.util.Map.Entry;
  26. import org.apache.hadoop.conf.Configuration;
  27. import org.apache.hadoop.hive.conf.HiveConf;
  28. import org.apache.hadoop.hive.metastore.MetaStoreUtils;
  29. import org.apache.hadoop.hive.metastore.api.FieldSchema;
  30. import org.apache.hadoop.hive.metastore.api.Index;
  31. import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
  32. import org.apache.hadoop.hive.metastore.api.Table;
  33. import org.apache.hadoop.hive.ql.Driver;
  34. import org.apache.hadoop.hive.ql.exec.Task;
  35. import org.apache.hadoop.hive.ql.exec.Utilities;
  36. import org.apache.hadoop.hive.ql.hooks.ReadEntity;
  37. import org.apache.hadoop.hive.ql.hooks.WriteEntity;
  38. import org.apache.hadoop.hive.ql.index.AbstractIndexHandler;
  39. import org.apache.hadoop.hive.ql.metadata.Hive;
  40. import org.apache.hadoop.hive.ql.metadata.HiveException;
  41. import org.apache.hadoop.hive.ql.metadata.HiveUtils;
  42. import org.apache.hadoop.hive.ql.metadata.Partition;
  43. import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
  44. import org.apache.hadoop.hive.ql.parse.SemanticException;
  45. import org.apache.hadoop.hive.ql.plan.PartitionDesc;
  46. import org.apache.hadoop.hive.ql.plan.TableDesc;
  47. public class CompactIndexHandler extends AbstractIndexHandler {
  48. private Configuration configuration;
  49. @Override
  50. public void analyzeIndexDefinition(Table baseTable, Index index,
  51. Table indexTable) throws HiveException {
  52. StorageDescriptor storageDesc = index.getSd();
  53. if (this.usesIndexTable() && indexTable != null) {
  54. StorageDescriptor indexTableSd = storageDesc.deepCopy();
  55. List<FieldSchema> indexTblCols = indexTableSd.getCols();
  56. FieldSchema bucketFileName = new FieldSchema("_bucketname", "string", "");
  57. indexTblCols.add(bucketFileName);
  58. FieldSchema offSets = new FieldSchema("_offsets", "array<bigint>", "");
  59. indexTblCols.add(offSets);
  60. indexTable.setSd(indexTableSd);
  61. }
  62. }
  63. @Override
  64. public List<Task<?>> generateIndexBuildTaskList(
  65. org.apache.hadoop.hive.ql.metadata.Table baseTbl,
  66. org.apache.hadoop.hive.metastore.api.Index index,
  67. List<Partition> indexTblPartitions, List<Partition> baseTblPartitions,
  68. org.apache.hadoop.hive.ql.metadata.Table indexTbl,
  69. Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws HiveException {
  70. try {
  71. TableDesc desc = Utilities.getTableDesc(indexTbl);
  72. List<Partition> newBaseTblPartitions = new ArrayList<Partition>();
  73. List<Task<?>> indexBuilderTasks = new ArrayList<Task<?>>();
  74. if (!baseTbl.isPartitioned()) {
  75. // the table does not have any partition, then create index for the
  76. // whole table
  77. Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index.getSd().getCols(), false,
  78. new PartitionDesc(desc, null), indexTbl.getTableName(),
  79. new PartitionDesc(Utilities.getTableDesc(baseTbl), null),
  80. baseTbl.getTableName(), indexTbl.getDbName());
  81. indexBuilderTasks.add(indexBuilder);
  82. } else {
  83. // check whether the index table partitions are still exists in base
  84. // table
  85. for (int i = 0; i < indexTblPartitions.size(); i++) {
  86. Partition indexPart = indexTblPartitions.get(i);
  87. Partition basePart = null;
  88. for (int j = 0; j < baseTblPartitions.size(); j++) {
  89. if (baseTblPartitions.get(j).getName().equals(indexPart.getName())) {
  90. basePart = baseTblPartitions.get(j);
  91. newBaseTblPartitions.add(baseTblPartitions.get(j));
  92. break;
  93. }
  94. }
  95. if (basePart == null)
  96. throw new RuntimeException(
  97. "Partitions of base table and index table are inconsistent.");
  98. // for each partition, spawn a map reduce task.
  99. Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index.getSd().getCols(), true,
  100. new PartitionDesc(indexPart), indexTbl.getTableName(),
  101. new PartitionDesc(basePart), baseTbl.getTableName(), indexTbl.getDbName());
  102. indexBuilderTasks.add(indexBuilder);
  103. }
  104. }
  105. return indexBuilderTasks;
  106. } catch (Exception e) {
  107. throw new SemanticException(e);
  108. }
  109. }
  110. private Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs, Set<WriteEntity> outputs,
  111. List<FieldSchema> indexField, boolean partitioned,
  112. PartitionDesc indexTblPartDesc, String indexTableName,
  113. PartitionDesc baseTablePartDesc, String baseTableName, String dbName) {
  114. String indexCols = HiveUtils.getUnparsedColumnNamesFromFieldSchema(indexField);
  115. //form a new insert overwrite query.
  116. StringBuilder command= new StringBuilder();
  117. LinkedHashMap<String, String> partSpec = indexTblPartDesc.getPartSpec();
  118. command.append("INSERT OVERWRITE TABLE " + HiveUtils.unparseIdentifier(indexTableName ));
  119. if (partitioned && indexTblPartDesc != null) {
  120. command.append(" PARTITION ( ");
  121. List<String> ret = getPartKVPairStringArray(partSpec);
  122. for (int i = 0; i < ret.size(); i++) {
  123. String partKV = ret.get(i);
  124. command.append(partKV);
  125. if (i < ret.size() - 1)
  126. command.append(",");
  127. }
  128. command.append(" ) ");
  129. }
  130. command.append(" SELECT ");
  131. command.append(indexCols);
  132. command.append(",");
  133. command.append(VirtualColumn.FILENAME.getName());
  134. command.append(",");
  135. command.append(" collect_set (");
  136. command.append(VirtualColumn.BLOCKOFFSET.getName());
  137. command.append(") ");
  138. command.append(" FROM " + HiveUtils.unparseIdentifier(baseTableName) );
  139. LinkedHashMap<String, String> basePartSpec = baseTablePartDesc.getPartSpec();
  140. if(basePartSpec != null) {
  141. command.append(" WHERE ");
  142. List<String> pkv = getPartKVPairStringArray(basePartSpec);
  143. for (int i = 0; i < pkv.size(); i++) {
  144. String partKV = pkv.get(i);
  145. command.append(partKV);
  146. if (i < pkv.size() - 1)
  147. command.append(" AND ");
  148. }
  149. }
  150. command.append(" GROUP BY ");
  151. command.append(indexCols + ", " + VirtualColumn.FILENAME.getName());
  152. Driver driver = new Driver(new HiveConf(getConf(), CompactIndexHandler.class));
  153. driver.compile(command.toString());
  154. Task<?> rootTask = driver.getPlan().getRootTasks().get(0);
  155. inputs.addAll(driver.getPlan().getInputs());
  156. outputs.addAll(driver.getPlan().getOutputs());
  157. IndexMetadataChangeWork indexMetaChange = new IndexMetadataChangeWork(partSpec, indexTableName, dbName);
  158. IndexMetadataChangeTask indexMetaChangeTsk = new IndexMetadataChangeTask();
  159. indexMetaChangeTsk.setWork(indexMetaChange);
  160. rootTask.addDependentTask(indexMetaChangeTsk);
  161. return rootTask;
  162. }
  163. private List<String> getPartKVPairStringArray(
  164. LinkedHashMap<String, String> partSpec) {
  165. List<String> ret = new ArrayList<String>(partSpec.size());
  166. Iterator<Entry<String, String>> iter = partSpec.entrySet().iterator();
  167. while (iter.hasNext()) {
  168. StringBuilder sb = new StringBuilder();
  169. Entry<String, String> p = iter.next();
  170. sb.append(HiveUtils.unparseIdentifier(p.getKey()));
  171. sb.append(" = ");
  172. sb.append("'");
  173. sb.append(p.getValue());
  174. sb.append("'");
  175. ret.add(sb.toString());
  176. }
  177. return ret;
  178. }
  179. @Override
  180. public boolean usesIndexTable() {
  181. return true;
  182. }
  183. @Override
  184. public Configuration getConf() {
  185. return configuration;
  186. }
  187. @Override
  188. public void setConf(Configuration conf) {
  189. this.configuration = conf;
  190. }
  191. }