/streaming/src/main/java/com/mongodb/hadoop/mapred/input/MongoInputSplit.java

http://github.com/mongodb/mongo-hadoop · Java · 129 lines · 80 code · 21 blank · 28 comment · 3 complexity · 53071b0060e235c43752e60036626960 MD5 · raw file

  1. // MongoInputSplit.java
  2. /*
  3. * Copyright 2010 10gen Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package com.mongodb.hadoop.mapred.input;
  18. import com.mongodb.*;
  19. import com.mongodb.hadoop.util.*;
  20. import com.mongodb.util.*;
  21. import org.apache.commons.logging.*;
  22. import org.apache.hadoop.io.*;
  23. import org.apache.hadoop.mapred.*;
  24. import java.io.*;
  25. import java.util.*;
  26. @SuppressWarnings( "deprecation" )
  27. public class MongoInputSplit implements Writable, InputSplit {
  28. public MongoInputSplit( MongoURI inputURI, DBObject query, DBObject fields, DBObject sort, int limit, int skip ){
  29. log.info( "Creating a new MongoInputSplit for MongoURI '" + inputURI + "', query: '" + query + "', fieldSpec: '"
  30. + fields + "', sort: '" + sort + "', limit: " + limit + ", skip: " + skip + " ." );
  31. _mongoURI = inputURI;
  32. _querySpec = query;
  33. _fieldSpec = fields;
  34. _sortSpec = sort;
  35. _limit = limit;
  36. _skip = skip;
  37. getCursor();
  38. }
  39. public MongoInputSplit( com.mongodb.hadoop.input.MongoInputSplit split ){
  40. this(split.getMongoURI(), split.getQuerySpec(), split.getFieldSpec(), split.getSortSpec(), split.getLimit(),
  41. split.getSkip());
  42. }
  43. /**
  44. * This is supposed to return the size of the split in bytes, but for now, for sanity sake we return the # of docs
  45. * in the split instead.
  46. *
  47. * @return
  48. */
  49. @Override
  50. public long getLength(){
  51. return Long.MAX_VALUE;
  52. }
  53. @Override
  54. public String[] getLocations(){
  55. return _mongoURI.getHosts().toArray( new String[_mongoURI.getHosts().size()] );
  56. }
  57. /**
  58. * Serialize the Split instance
  59. */
  60. public void write( DataOutput out ) throws IOException{
  61. out.writeUTF( _mongoURI.toString() );
  62. out.writeUTF( JSON.serialize( _querySpec ) );
  63. out.writeUTF( JSON.serialize( _fieldSpec ) );
  64. out.writeUTF( JSON.serialize( _sortSpec ) );
  65. out.writeInt( _limit );
  66. out.writeInt( _skip );
  67. }
  68. @Override
  69. public void readFields( DataInput in ) throws IOException{
  70. _mongoURI = new MongoURI( in.readUTF() );
  71. _querySpec = (DBObject) JSON.parse( in.readUTF() );
  72. _fieldSpec = (DBObject) JSON.parse( in.readUTF() );
  73. _sortSpec = (DBObject) JSON.parse( in.readUTF() );
  74. _limit = in.readInt();
  75. _skip = in.readInt();
  76. getCursor();
  77. if ( log.isDebugEnabled() ){
  78. log.debug( "Deserialized MongoInputSplit ... { length = " + getLength() + ", locations = "
  79. + Arrays.toString( getLocations() ) + ", query = " + _querySpec
  80. + ", fields = " + _fieldSpec + ", sort = " + _sortSpec + ", limit = " + _limit + ", skip = "
  81. + _skip + "}" );
  82. }
  83. }
  84. DBCursor getCursor(){
  85. // Return the cursor with the split's query, etc. already slotted in for
  86. // them.
  87. // todo - support limit/skip
  88. if ( _cursor == null ){
  89. _cursor = MongoConfigUtil.getCollection( _mongoURI ).find( _querySpec, _fieldSpec ).sort( _sortSpec );
  90. _cursor.slaveOk();
  91. }
  92. return _cursor;
  93. }
  94. @Override
  95. public String toString(){
  96. return "MongoInputSplit{URI=" + _mongoURI + ", query=" + _querySpec + '}';
  97. }
  98. public MongoInputSplit(){ }
  99. private MongoURI _mongoURI;
  100. private DBObject _querySpec;
  101. private DBObject _fieldSpec;
  102. private DBObject _sortSpec;
  103. private int _limit = 0;
  104. private int _skip = 0;
  105. private long _length = -1;
  106. private transient DBCursor _cursor;
  107. private static final Log log = LogFactory.getLog( MongoInputSplit.class );
  108. }