/streaming/src/main/java/com/mongodb/hadoop/mapred/MongoInputFormat.java

http://github.com/mongodb/mongo-hadoop · Java · 72 lines · 42 code · 12 blank · 18 comment · 2 complexity · cfa117a9fb359003b110a09d01821b8a MD5 · raw file

  1. // MongoImportFormat.java
  2. /*
  3. * Copyright 2010 10gen Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package com.mongodb.hadoop.mapred;
  18. import java.util.*;
  19. import com.mongodb.hadoop.input.*;
  20. import com.mongodb.hadoop.mapred.input.MongoInputSplit;
  21. import com.mongodb.hadoop.mapred.input.MongoRecordReader;
  22. import com.mongodb.hadoop.util.*;
  23. import org.apache.commons.logging.*;
  24. import org.apache.hadoop.conf.*;
  25. import org.apache.hadoop.mapred.*;
  26. import org.apache.hadoop.io.*;
  27. import org.apache.hadoop.mapred.InputFormat;
  28. import org.apache.hadoop.mapred.InputSplit;
  29. import org.apache.hadoop.mapred.RecordReader;
  30. import org.apache.hadoop.mapreduce.*;
  31. import org.bson.*;
  32. import com.mongodb.hadoop.MongoConfig;
  33. import com.mongodb.hadoop.mapred.input.*;
  34. import com.mongodb.hadoop.io.*;
  35. @SuppressWarnings("deprecation")
  36. public class MongoInputFormat implements InputFormat<BSONWritable, BSONWritable> {
  37. public RecordReader<BSONWritable, BSONWritable> getRecordReader(InputSplit split,
  38. JobConf job,
  39. Reporter reporter) {
  40. if (!(split instanceof MongoInputSplit))
  41. throw new IllegalStateException("Creation of a new RecordReader requires a MongoInputSplit instance.");
  42. final MongoInputSplit mis = (MongoInputSplit) split;
  43. return new MongoRecordReader(mis);
  44. }
  45. public InputSplit[] getSplits(JobConf job, int numSplits) {
  46. final MongoConfig conf = new MongoConfig(job);
  47. // TODO - Support allowing specification of numSplits to affect our ops?
  48. final List<org.apache.hadoop.mapreduce.InputSplit> splits = MongoSplitter.calculateSplits( conf );
  49. // TODO - Make me less egregiously inefficient.
  50. InputSplit[] classicSplits = new InputSplit[splits.size()];
  51. for ( int i = 0; i < splits.size(); i++ ) {
  52. classicSplits[i] = new MongoInputSplit( (com.mongodb.hadoop.input.MongoInputSplit) splits.get( i ) );
  53. }
  54. return classicSplits;
  55. }
  56. public boolean verifyConfiguration(Configuration conf) {
  57. return true;
  58. }
  59. private static final Log log = LogFactory.getLog(MongoInputFormat.class);
  60. }