/src/main/com/mongodb/hadoop/mapred/MongoInputFormat.java

https://github.com/charlesa101/mongo-hadoop · Java · 77 lines · 38 code · 13 blank · 26 comment · 4 complexity · 200a94a0668730cf5400a1c7c08e90f1 MD5 · raw file

  1. // MongoImportFormat.java
  2. /*
  3. * Copyright 2010 10gen Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package com.mongodb.hadoop.mapred;
  18. import java.util.*;
  19. import org.apache.commons.logging.*;
  20. import org.apache.hadoop.conf.*;
  21. import org.apache.hadoop.mapred.*;
  22. import org.apache.hadoop.io.*;
  23. import org.bson.*;
  24. import com.mongodb.hadoop.MongoConfig;
  25. import com.mongodb.hadoop.mapred.input.*;
  26. import com.mongodb.hadoop.io.*;
  27. @SuppressWarnings("deprecation")
  28. public class MongoInputFormat implements InputFormat<ObjectWritable, BSONWritable> {
  29. public RecordReader<ObjectWritable, BSONWritable> getRecordReader(InputSplit split,
  30. JobConf job,
  31. Reporter reporter) {
  32. if (!(split instanceof MongoInputSplit))
  33. throw new IllegalStateException("Creation of a new RecordReader requires a MongoInputSplit instance.");
  34. final MongoInputSplit mis = (MongoInputSplit) split;
  35. return (RecordReader<ObjectWritable, BSONWritable>) new MongoRecordReader(mis);
  36. }
  37. public InputSplit[] getSplits(JobConf job, int numSplits) {
  38. final MongoConfig conf = new MongoConfig(job);
  39. if (conf.getLimit() > 0 || conf.getSkip() > 0)
  40. /**
  41. * TODO - If they specify skip or limit we create only one input
  42. * split
  43. */
  44. throw new IllegalArgumentException("skip() and limit() is not currently supported do to input split "
  45. + "issues.");
  46. else {
  47. /**
  48. * On the jobclient side we want *ONLY* the min and max ids for each
  49. * split; Actual querying will be done on the individual mappers.
  50. */
  51. /*final int splitSize = conf.getSplitSize();*/
  52. // For first release, no splits, no sharding
  53. InputSplit[] splits =
  54. {(InputSplit) new MongoInputSplit(conf.getInputURI(), conf.getQuery(), conf.getFields(),
  55. conf.getSort(), conf.getLimit(), conf.getSkip())};
  56. log.info("Calculated " + splits.length + " split objects.");
  57. return splits;
  58. }
  59. }
  60. public boolean verifyConfiguration(Configuration conf) {
  61. return true;
  62. }
  63. private static final Log log = LogFactory.getLog(MongoInputFormat.class);
  64. }