PageRenderTime 66ms CodeModel.GetById 15ms app.highlight 44ms RepoModel.GetById 2ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java

#
Java | 317 lines | 238 code | 39 blank | 40 comment | 32 complexity | 637a6c248263cc487ed90a2b221b7712 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.hbase;
 20
 21import java.io.IOException;
 22import java.util.ArrayList;
 23import java.util.Arrays;
 24import java.util.HashSet;
 25import java.util.List;
 26import java.util.Map;
 27import java.util.Properties;
 28import java.util.Set;
 29
 30import org.apache.hadoop.conf.Configuration;
 31import org.apache.hadoop.hbase.HBaseConfiguration;
 32import org.apache.hadoop.hbase.HColumnDescriptor;
 33import org.apache.hadoop.hbase.HTableDescriptor;
 34import org.apache.hadoop.hbase.MasterNotRunningException;
 35import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 36import org.apache.hadoop.hbase.client.HBaseAdmin;
 37import org.apache.hadoop.hbase.client.HTable;
 38import org.apache.hadoop.hbase.util.Bytes;
 39import org.apache.hadoop.hive.metastore.HiveMetaHook;
 40import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 41import org.apache.hadoop.hive.metastore.api.Constants;
 42import org.apache.hadoop.hive.metastore.api.MetaException;
 43import org.apache.hadoop.hive.metastore.api.Table;
 44import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer;
 45import org.apache.hadoop.hive.ql.index.IndexSearchCondition;
 46import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler;
 47import org.apache.hadoop.hive.ql.metadata.Hive;
 48import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
 49import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 50import org.apache.hadoop.hive.ql.plan.TableDesc;
 51import org.apache.hadoop.hive.serde2.Deserializer;
 52import org.apache.hadoop.hive.serde2.SerDe;
 53import org.apache.hadoop.hive.serde2.SerDeException;
 54import org.apache.hadoop.mapred.InputFormat;
 55import org.apache.hadoop.mapred.JobConf;
 56import org.apache.hadoop.mapred.OutputFormat;
 57import org.apache.hadoop.util.StringUtils;
 58
 59/**
 60 * HBaseStorageHandler provides a HiveStorageHandler implementation for
 61 * HBase.
 62 */
 63public class HBaseStorageHandler extends DefaultStorageHandler
 64  implements HiveMetaHook, HiveStoragePredicateHandler {
 65
 66  final static public String DEFAULT_PREFIX = "default.";
 67  
 68  private HBaseConfiguration hbaseConf;
 69  private HBaseAdmin admin;
 70
 71  private HBaseAdmin getHBaseAdmin() throws MetaException {
 72    try {
 73      if (admin == null) {
 74        admin = new HBaseAdmin(hbaseConf);
 75      }
 76      return admin;
 77    } catch (MasterNotRunningException mnre) {
 78      throw new MetaException(StringUtils.stringifyException(mnre));
 79    } catch (ZooKeeperConnectionException zkce) {
 80      throw new MetaException(StringUtils.stringifyException(zkce));
 81    }
 82  }
 83
 84  private String getHBaseTableName(Table tbl) {
 85    // Give preference to TBLPROPERTIES over SERDEPROPERTIES
 86    // (really we should only use TBLPROPERTIES, so this is just
 87    // for backwards compatibility with the original specs).
 88    String tableName = tbl.getParameters().get(HBaseSerDe.HBASE_TABLE_NAME);
 89    if (tableName == null) {
 90      tableName = tbl.getSd().getSerdeInfo().getParameters().get(
 91        HBaseSerDe.HBASE_TABLE_NAME);
 92    }
 93    if (tableName == null) {
 94      tableName = tbl.getDbName() + "." + tbl.getTableName();
 95      if (tableName.startsWith(DEFAULT_PREFIX)) {
 96        tableName = tableName.substring(DEFAULT_PREFIX.length());
 97      }
 98    }
 99    return tableName;
100  }
101
102  @Override
103  public void preDropTable(Table table) throws MetaException {
104    // nothing to do
105  }
106
107  @Override
108  public void rollbackDropTable(Table table) throws MetaException {
109    // nothing to do
110  }
111
112  @Override
113  public void commitDropTable(
114    Table tbl, boolean deleteData) throws MetaException {
115
116    try {
117      String tableName = getHBaseTableName(tbl);
118      boolean isExternal = MetaStoreUtils.isExternalTable(tbl);
119      if (deleteData && !isExternal) {
120        if (getHBaseAdmin().isTableEnabled(tableName)) {
121          getHBaseAdmin().disableTable(tableName);
122        }
123        getHBaseAdmin().deleteTable(tableName);
124      }
125    } catch (IOException ie) {
126      throw new MetaException(StringUtils.stringifyException(ie));
127    }
128  }
129
130  @Override
131  public void preCreateTable(Table tbl) throws MetaException {
132    boolean isExternal = MetaStoreUtils.isExternalTable(tbl);
133
134    // We'd like to move this to HiveMetaStore for any non-native table, but
135    // first we need to support storing NULL for location on a table
136    if (tbl.getSd().getLocation() != null) {
137      throw new MetaException("LOCATION may not be specified for HBase.");
138    }
139
140    try {
141      String tableName = getHBaseTableName(tbl);
142      Map<String, String> serdeParam = tbl.getSd().getSerdeInfo().getParameters();
143      String hbaseColumnsMapping = serdeParam.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
144
145      if (hbaseColumnsMapping == null) {
146        throw new MetaException("No hbase.columns.mapping defined in Serde.");
147      }
148
149      List<String> hbaseColumnFamilies = new ArrayList<String>();
150      List<String> hbaseColumnQualifiers = new ArrayList<String>();
151      List<byte []> hbaseColumnFamiliesBytes = new ArrayList<byte []>();
152      List<byte []> hbaseColumnQualifiersBytes = new ArrayList<byte []>();
153      int iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies,
154          hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
155
156      HTableDescriptor tableDesc;
157
158      if (!getHBaseAdmin().tableExists(tableName)) {
159        // if it is not an external table then create one
160        if (!isExternal) {
161          // Create the column descriptors
162          tableDesc = new HTableDescriptor(tableName);
163          Set<String> uniqueColumnFamilies = new HashSet<String>(hbaseColumnFamilies);
164          uniqueColumnFamilies.remove(hbaseColumnFamilies.get(iKey));
165
166          for (String columnFamily : uniqueColumnFamilies) {
167            tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes(columnFamily)));
168          }
169
170          getHBaseAdmin().createTable(tableDesc);
171        } else {
172          // an external table
173          throw new MetaException("HBase table " + tableName +
174              " doesn't exist while the table is declared as an external table.");
175        }
176
177      } else {
178        if (!isExternal) {
179          throw new MetaException("Table " + tableName + " already exists"
180            + " within HBase; use CREATE EXTERNAL TABLE instead to"
181            + " register it in Hive.");
182        }
183        // make sure the schema mapping is right
184        tableDesc = getHBaseAdmin().getTableDescriptor(Bytes.toBytes(tableName));
185
186        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
187          if (i == iKey) {
188            continue;
189          }
190
191          if (!tableDesc.hasFamily(hbaseColumnFamiliesBytes.get(i))) {
192            throw new MetaException("Column Family " + hbaseColumnFamilies.get(i)
193                + " is not defined in hbase table " + tableName);
194          }
195        }
196      }
197
198      // ensure the table is online
199      new HTable(hbaseConf, tableDesc.getName());
200    } catch (MasterNotRunningException mnre) {
201      throw new MetaException(StringUtils.stringifyException(mnre));
202    } catch (IOException ie) {
203      throw new MetaException(StringUtils.stringifyException(ie));
204    } catch (SerDeException se) {
205      throw new MetaException(StringUtils.stringifyException(se));
206    }
207  }
208
209  @Override
210  public void rollbackCreateTable(Table table) throws MetaException {
211    boolean isExternal = MetaStoreUtils.isExternalTable(table);
212    String tableName = getHBaseTableName(table);
213    try {
214      if (!isExternal && getHBaseAdmin().tableExists(tableName)) {
215        // we have created an HBase table, so we delete it to roll back;
216        if (getHBaseAdmin().isTableEnabled(tableName)) {
217          getHBaseAdmin().disableTable(tableName);
218        }
219        getHBaseAdmin().deleteTable(tableName);
220      }
221    } catch (IOException ie) {
222      throw new MetaException(StringUtils.stringifyException(ie));
223    }
224  }
225
226  @Override
227  public void commitCreateTable(Table table) throws MetaException {
228    // nothing to do
229  }
230
231  @Override
232  public Configuration getConf() {
233    return hbaseConf;
234  }
235
236  @Override
237  public void setConf(Configuration conf) {
238    hbaseConf = new HBaseConfiguration(conf);
239  }
240
241  @Override
242  public Class<? extends InputFormat> getInputFormatClass() {
243    return HiveHBaseTableInputFormat.class;
244  }
245
246  @Override
247  public Class<? extends OutputFormat> getOutputFormatClass() {
248    return HiveHBaseTableOutputFormat.class;
249  }
250
251  @Override
252  public Class<? extends SerDe> getSerDeClass() {
253    return HBaseSerDe.class;
254  }
255
256  @Override
257  public HiveMetaHook getMetaHook() {
258    return this;
259  }
260
261  @Override
262  public void configureTableJobProperties(
263    TableDesc tableDesc,
264    Map<String, String> jobProperties) {
265
266    Properties tableProperties = tableDesc.getProperties();
267
268    jobProperties.put(
269      HBaseSerDe.HBASE_COLUMNS_MAPPING,
270      tableProperties.getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING));
271
272    String tableName =
273      tableProperties.getProperty(HBaseSerDe.HBASE_TABLE_NAME);
274    if (tableName == null) {
275      tableName =
276        tableProperties.getProperty(Constants.META_TABLE_NAME);
277      if (tableName.startsWith(DEFAULT_PREFIX)) {
278        tableName = tableName.substring(DEFAULT_PREFIX.length());
279      }
280    }
281    jobProperties.put(HBaseSerDe.HBASE_TABLE_NAME, tableName);
282  }
283
284  @Override
285  public DecomposedPredicate decomposePredicate(
286    JobConf jobConf,
287    Deserializer deserializer,
288    ExprNodeDesc predicate)
289  {
290    String columnNameProperty = jobConf.get(
291      org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS);
292    List<String> columnNames =
293      Arrays.asList(columnNameProperty.split(","));
294    HBaseSerDe hbaseSerde = (HBaseSerDe) deserializer;
295    IndexPredicateAnalyzer analyzer =
296      HiveHBaseTableInputFormat.newIndexPredicateAnalyzer(
297        columnNames.get(hbaseSerde.getKeyColumnOffset()));
298    List<IndexSearchCondition> searchConditions =
299      new ArrayList<IndexSearchCondition>();
300    ExprNodeDesc residualPredicate =
301      analyzer.analyzePredicate(predicate, searchConditions);
302    if (searchConditions.size() != 1) {
303      // Either there was nothing which could be pushed down (size = 0),
304      // or more than one predicate (size > 1); in the latter case,
305      // we bail out for now since multiple lookups on the key are
306      // either contradictory or redundant.  We'll need to handle
307      // this better later when we support more interesting predicates.
308      return null;
309    }
310
311    DecomposedPredicate decomposedPredicate = new DecomposedPredicate();
312    decomposedPredicate.pushedPredicate = analyzer.translateSearchConditions(
313      searchConditions);
314    decomposedPredicate.residualPredicate = residualPredicate;
315    return decomposedPredicate;
316  }
317}