PageRenderTime 46ms CodeModel.GetById 12ms app.highlight 27ms RepoModel.GetById 2ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java

#
Java | 218 lines | 168 code | 27 blank | 23 comment | 20 complexity | 486fd8a80486fff39b92d445db9d790b MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.index.compact;
 20
 21import java.util.ArrayList;
 22import java.util.HashSet;
 23import java.util.Iterator;
 24import java.util.LinkedHashMap;
 25import java.util.List;
 26import java.util.Set;
 27import java.util.Map.Entry;
 28
 29import org.apache.hadoop.conf.Configuration;
 30import org.apache.hadoop.hive.conf.HiveConf;
 31import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 32import org.apache.hadoop.hive.metastore.api.FieldSchema;
 33import org.apache.hadoop.hive.metastore.api.Index;
 34import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 35import org.apache.hadoop.hive.metastore.api.Table;
 36import org.apache.hadoop.hive.ql.Driver;
 37import org.apache.hadoop.hive.ql.exec.Task;
 38import org.apache.hadoop.hive.ql.exec.Utilities;
 39import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 40import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 41import org.apache.hadoop.hive.ql.index.AbstractIndexHandler;
 42import org.apache.hadoop.hive.ql.metadata.Hive;
 43import org.apache.hadoop.hive.ql.metadata.HiveException;
 44import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 45import org.apache.hadoop.hive.ql.metadata.Partition;
 46import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 47import org.apache.hadoop.hive.ql.parse.SemanticException;
 48import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 49import org.apache.hadoop.hive.ql.plan.TableDesc;
 50
 51public class CompactIndexHandler extends AbstractIndexHandler {
 52  
 53  private Configuration configuration;
 54
 55  @Override
 56  public void analyzeIndexDefinition(Table baseTable, Index index,
 57      Table indexTable) throws HiveException {
 58    StorageDescriptor storageDesc = index.getSd();
 59    if (this.usesIndexTable() && indexTable != null) {
 60      StorageDescriptor indexTableSd = storageDesc.deepCopy();
 61      List<FieldSchema> indexTblCols = indexTableSd.getCols();
 62      FieldSchema bucketFileName = new FieldSchema("_bucketname", "string", "");
 63      indexTblCols.add(bucketFileName);
 64      FieldSchema offSets = new FieldSchema("_offsets", "array<bigint>", "");
 65      indexTblCols.add(offSets);
 66      indexTable.setSd(indexTableSd);
 67    }
 68  }
 69
 70  @Override
 71  public List<Task<?>> generateIndexBuildTaskList(
 72      org.apache.hadoop.hive.ql.metadata.Table baseTbl,
 73      org.apache.hadoop.hive.metastore.api.Index index,
 74      List<Partition> indexTblPartitions, List<Partition> baseTblPartitions,
 75      org.apache.hadoop.hive.ql.metadata.Table indexTbl,
 76      Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws HiveException {
 77    try {
 78
 79      TableDesc desc = Utilities.getTableDesc(indexTbl);
 80
 81      List<Partition> newBaseTblPartitions = new ArrayList<Partition>();
 82
 83      List<Task<?>> indexBuilderTasks = new ArrayList<Task<?>>();
 84
 85      if (!baseTbl.isPartitioned()) {
 86        // the table does not have any partition, then create index for the
 87        // whole table
 88        Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index.getSd().getCols(), false,
 89            new PartitionDesc(desc, null), indexTbl.getTableName(),
 90            new PartitionDesc(Utilities.getTableDesc(baseTbl), null), 
 91            baseTbl.getTableName(), indexTbl.getDbName());
 92        indexBuilderTasks.add(indexBuilder);
 93      } else {
 94
 95        // check whether the index table partitions are still exists in base
 96        // table
 97        for (int i = 0; i < indexTblPartitions.size(); i++) {
 98          Partition indexPart = indexTblPartitions.get(i);
 99          Partition basePart = null;
100          for (int j = 0; j < baseTblPartitions.size(); j++) {
101            if (baseTblPartitions.get(j).getName().equals(indexPart.getName())) {
102              basePart = baseTblPartitions.get(j);
103              newBaseTblPartitions.add(baseTblPartitions.get(j));
104              break;
105            }
106          }
107          if (basePart == null)
108            throw new RuntimeException(
109                "Partitions of base table and index table are inconsistent.");
110          // for each partition, spawn a map reduce task.
111          Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index.getSd().getCols(), true,
112              new PartitionDesc(indexPart), indexTbl.getTableName(),
113              new PartitionDesc(basePart), baseTbl.getTableName(), indexTbl.getDbName());
114          indexBuilderTasks.add(indexBuilder);
115        }
116      }
117      return indexBuilderTasks;
118    } catch (Exception e) {
119      throw new SemanticException(e);
120    }
121  }
122
123  private Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs, Set<WriteEntity> outputs, 
124      List<FieldSchema> indexField, boolean partitioned,
125      PartitionDesc indexTblPartDesc, String indexTableName,
126      PartitionDesc baseTablePartDesc, String baseTableName, String dbName) {
127    
128    String indexCols = HiveUtils.getUnparsedColumnNamesFromFieldSchema(indexField);
129
130    //form a new insert overwrite query.
131    StringBuilder command= new StringBuilder();
132    LinkedHashMap<String, String> partSpec = indexTblPartDesc.getPartSpec();
133
134    command.append("INSERT OVERWRITE TABLE " + HiveUtils.unparseIdentifier(indexTableName ));
135    if (partitioned && indexTblPartDesc != null) {
136      command.append(" PARTITION ( ");
137      List<String> ret = getPartKVPairStringArray(partSpec);
138      for (int i = 0; i < ret.size(); i++) {
139        String partKV = ret.get(i);
140        command.append(partKV);
141        if (i < ret.size() - 1)
142          command.append(",");
143      }
144      command.append(" ) ");
145    }
146    
147    command.append(" SELECT ");
148    command.append(indexCols);
149    command.append(",");
150
151    command.append(VirtualColumn.FILENAME.getName());
152    command.append(",");
153    command.append(" collect_set (");
154    command.append(VirtualColumn.BLOCKOFFSET.getName());
155    command.append(") ");
156    command.append(" FROM " + HiveUtils.unparseIdentifier(baseTableName) );
157    LinkedHashMap<String, String> basePartSpec = baseTablePartDesc.getPartSpec();
158    if(basePartSpec != null) {
159      command.append(" WHERE ");
160      List<String> pkv = getPartKVPairStringArray(basePartSpec);
161      for (int i = 0; i < pkv.size(); i++) {
162        String partKV = pkv.get(i);
163        command.append(partKV);
164        if (i < pkv.size() - 1)
165          command.append(" AND ");
166      }
167    }
168    command.append(" GROUP BY ");
169    command.append(indexCols + ", " + VirtualColumn.FILENAME.getName());
170
171    Driver driver = new Driver(new HiveConf(getConf(), CompactIndexHandler.class));
172    driver.compile(command.toString());
173
174    Task<?> rootTask = driver.getPlan().getRootTasks().get(0);
175    inputs.addAll(driver.getPlan().getInputs());
176    outputs.addAll(driver.getPlan().getOutputs());
177    
178    IndexMetadataChangeWork indexMetaChange = new IndexMetadataChangeWork(partSpec, indexTableName, dbName);
179    IndexMetadataChangeTask indexMetaChangeTsk = new IndexMetadataChangeTask(); 
180    indexMetaChangeTsk.setWork(indexMetaChange);
181    rootTask.addDependentTask(indexMetaChangeTsk);
182
183    return rootTask;
184  }
185
186  private List<String> getPartKVPairStringArray(
187      LinkedHashMap<String, String> partSpec) {
188    List<String> ret = new ArrayList<String>(partSpec.size());
189    Iterator<Entry<String, String>> iter = partSpec.entrySet().iterator();
190    while (iter.hasNext()) {
191      StringBuilder sb = new StringBuilder();
192      Entry<String, String> p = iter.next();
193      sb.append(HiveUtils.unparseIdentifier(p.getKey()));
194      sb.append(" = ");
195      sb.append("'");
196      sb.append(p.getValue());
197      sb.append("'");
198      ret.add(sb.toString());
199    }
200    return ret;
201  }
202
203  @Override
204  public boolean usesIndexTable() {
205    return true;
206  }
207
208  @Override
209  public Configuration getConf() {
210    return configuration;
211  }
212
213  @Override
214  public void setConf(Configuration conf) {
215    this.configuration = conf;
216  }
217
218}