PageRenderTime 44ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.2.0-rc0/src/java/org/apache/hcatalog/pig/HCatBaseLoader.java

#
Java | 117 lines | 70 code | 19 blank | 28 comment | 0 complexity | 7501871d5894c53a69e77cfa5175c538 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hcatalog.pig;
  19. import java.io.IOException;
  20. import java.util.Arrays;
  21. import java.util.List;
  22. import java.util.Properties;
  23. import org.apache.hadoop.mapreduce.Job;
  24. import org.apache.hadoop.mapreduce.RecordReader;
  25. import org.apache.hcatalog.data.HCatRecord;
  26. import org.apache.hcatalog.data.schema.HCatSchema;
  27. import org.apache.pig.LoadFunc;
  28. import org.apache.pig.LoadMetadata;
  29. import org.apache.pig.LoadPushDown;
  30. import org.apache.pig.PigException;
  31. import org.apache.pig.ResourceStatistics;
  32. import org.apache.pig.backend.executionengine.ExecException;
  33. import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
  34. import org.apache.pig.data.Tuple;
  35. import org.apache.pig.impl.logicalLayer.FrontendException;
  36. import org.apache.pig.impl.util.UDFContext;
  37. /**
  38. * Base class for HCatLoader and HCatEximLoader
  39. */
  40. public abstract class HCatBaseLoader extends LoadFunc implements LoadMetadata, LoadPushDown {
  41. protected static final String PRUNE_PROJECTION_INFO = "prune.projection.info";
  42. private RecordReader<?, ?> reader;
  43. protected String signature;
  44. HCatSchema outputSchema = null;
  45. @Override
  46. public Tuple getNext() throws IOException {
  47. try {
  48. HCatRecord hr = (HCatRecord) (reader.nextKeyValue() ? reader.getCurrentValue() : null);
  49. Tuple t = PigHCatUtil.transformToTuple(hr,outputSchema);
  50. // TODO : we were discussing an iter interface, and also a LazyTuple
  51. // change this when plans for that solidifies.
  52. return t;
  53. } catch (ExecException e) {
  54. int errCode = 6018;
  55. String errMsg = "Error while reading input";
  56. throw new ExecException(errMsg, errCode,
  57. PigException.REMOTE_ENVIRONMENT, e);
  58. } catch (Exception eOther){
  59. int errCode = 6018;
  60. String errMsg = "Error converting read value to tuple";
  61. throw new ExecException(errMsg, errCode,
  62. PigException.REMOTE_ENVIRONMENT, eOther);
  63. }
  64. }
  65. @Override
  66. public void prepareToRead(RecordReader reader, PigSplit arg1) throws IOException {
  67. this.reader = reader;
  68. }
  69. @Override
  70. public ResourceStatistics getStatistics(String location, Job job) throws IOException {
  71. // statistics not implemented currently
  72. return null;
  73. }
  74. @Override
  75. public List<OperatorSet> getFeatures() {
  76. return Arrays.asList(LoadPushDown.OperatorSet.PROJECTION);
  77. }
  78. @Override
  79. public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldsInfo) throws FrontendException {
  80. // Store the required fields information in the UDFContext so that we
  81. // can retrieve it later.
  82. storeInUDFContext(signature, PRUNE_PROJECTION_INFO, requiredFieldsInfo);
  83. // HCat will always prune columns based on what we ask of it - so the
  84. // response is true
  85. return new RequiredFieldResponse(true);
  86. }
  87. @Override
  88. public void setUDFContextSignature(String signature) {
  89. this.signature = signature;
  90. }
  91. // helper methods
  92. protected void storeInUDFContext(String signature, String key, Object value) {
  93. UDFContext udfContext = UDFContext.getUDFContext();
  94. Properties props = udfContext.getUDFProperties(
  95. this.getClass(), new String[] {signature});
  96. props.put(key, value);
  97. }
  98. }