/src/main/com/mongodb/hadoop/mapred/input/MongoInputSplit.java

https://github.com/nibin/mongo-hadoop · Java · 132 lines · 89 code · 20 blank · 23 comment · 0 complexity · 7ae787b5526a109f61acdcf35b2d1c37 MD5 · raw file

  1. // MongoInputSplit.java
  2. /*
  3. * Copyright 2010 10gen Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package com.mongodb.hadoop.mapred.input;
  18. import java.io.*;
  19. import java.util.*;
  20. import org.apache.commons.logging.*;
  21. import org.apache.hadoop.io.*;
  22. import org.apache.hadoop.mapred.*;
  23. import com.mongodb.*;
  24. import com.mongodb.hadoop.util.*;
  25. import com.mongodb.util.*;
  26. @SuppressWarnings("deprecation")
  27. public class MongoInputSplit implements Writable, InputSplit {
  28. public MongoInputSplit(MongoURI inputURI, DBObject query, DBObject fields, DBObject sort, int limit, int skip) {
  29. log.info("Creating a new MongoInputSplit for MongoURI '"
  30. + inputURI
  31. + "', query: '"
  32. + query
  33. + "', fieldSpec: '"
  34. + fields
  35. + "', sort: '"
  36. + sort
  37. + "', limit: "
  38. + limit
  39. + ", skip: "
  40. + skip
  41. + " .");
  42. _mongoURI = inputURI;
  43. _querySpec = query;
  44. _fieldSpec = fields;
  45. _sortSpec = sort;
  46. _limit = limit;
  47. _skip = skip;
  48. }
  49. public MongoInputSplit() {
  50. }
  51. public long getLength() {
  52. return getCursor().size();
  53. }
  54. public String[] getLocations() {
  55. final List<String> hosts = _mongoURI.getHosts();
  56. return hosts.toArray(new String[hosts.size()]);
  57. }
  58. /**
  59. * Serialize the Split instance
  60. */
  61. public void write(DataOutput out) throws IOException {
  62. final ObjectOutputStream objOut = new ObjectOutputStream((OutputStream) out);
  63. // TODO - Use object outputstream instead of going to <-> from string?
  64. out.writeUTF(_mongoURI.toString());
  65. out.writeUTF(JSON.serialize(_querySpec));
  66. out.writeUTF(JSON.serialize(_fieldSpec));
  67. out.writeUTF(JSON.serialize(_sortSpec));
  68. out.writeInt(_limit);
  69. out.writeInt(_skip);
  70. objOut.close();
  71. }
  72. public void readFields(DataInput in) throws IOException {
  73. final ObjectInputStream objIn = new ObjectInputStream((InputStream) in);
  74. _mongoURI = new MongoURI(in.readUTF());
  75. _querySpec = (DBObject) JSON.parse(in.readUTF());
  76. _fieldSpec = (DBObject) JSON.parse(in.readUTF());
  77. _sortSpec = (DBObject) JSON.parse(in.readUTF());
  78. _limit = in.readInt();
  79. _skip = in.readInt();
  80. log.info("Deserialized MongoInputSplit ... { length = "
  81. + getLength()
  82. + ", locations = "
  83. + getLocations()
  84. + ", query = "
  85. + _querySpec
  86. + ", fields = "
  87. + _fieldSpec
  88. + ", sort = "
  89. + _sortSpec
  90. + ", limit = "
  91. + _limit
  92. + ", skip = "
  93. + _skip
  94. + "}");
  95. objIn.close();
  96. }
  97. DBCursor getCursor() {
  98. // Return the cursor with the split's query, etc. already slotted in for
  99. // them.
  100. // todo - support limit/skip
  101. final DBCursor cursor = MongoConfigUtil.getCollection(_mongoURI).find(_querySpec, _fieldSpec).sort(_sortSpec);
  102. cursor.slaveOk();
  103. return cursor;
  104. }
  105. private MongoURI _mongoURI;
  106. private DBObject _querySpec;
  107. private DBObject _fieldSpec;
  108. private DBObject _sortSpec;
  109. private int _limit = 0;
  110. private int _skip = 0;
  111. private static final Log log = LogFactory.getLog(MongoInputSplit.class);
  112. }