/src/main/com/mongodb/hadoop/mapred/input/MongoInputSplit.java
https://github.com/nibin/mongo-hadoop · Java · 132 lines · 89 code · 20 blank · 23 comment · 0 complexity · 7ae787b5526a109f61acdcf35b2d1c37 MD5 · raw file
- // MongoInputSplit.java
- /*
- * Copyright 2010 10gen Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package com.mongodb.hadoop.mapred.input;
- import java.io.*;
- import java.util.*;
- import org.apache.commons.logging.*;
- import org.apache.hadoop.io.*;
- import org.apache.hadoop.mapred.*;
- import com.mongodb.*;
- import com.mongodb.hadoop.util.*;
- import com.mongodb.util.*;
- @SuppressWarnings("deprecation")
- public class MongoInputSplit implements Writable, InputSplit {
- public MongoInputSplit(MongoURI inputURI, DBObject query, DBObject fields, DBObject sort, int limit, int skip) {
- log.info("Creating a new MongoInputSplit for MongoURI '"
- + inputURI
- + "', query: '"
- + query
- + "', fieldSpec: '"
- + fields
- + "', sort: '"
- + sort
- + "', limit: "
- + limit
- + ", skip: "
- + skip
- + " .");
- _mongoURI = inputURI;
- _querySpec = query;
- _fieldSpec = fields;
- _sortSpec = sort;
- _limit = limit;
- _skip = skip;
- }
- public MongoInputSplit() {
- }
- public long getLength() {
- return getCursor().size();
- }
- public String[] getLocations() {
- final List<String> hosts = _mongoURI.getHosts();
- return hosts.toArray(new String[hosts.size()]);
- }
- /**
- * Serialize the Split instance
- */
- public void write(DataOutput out) throws IOException {
- final ObjectOutputStream objOut = new ObjectOutputStream((OutputStream) out);
- // TODO - Use object outputstream instead of going to <-> from string?
- out.writeUTF(_mongoURI.toString());
- out.writeUTF(JSON.serialize(_querySpec));
- out.writeUTF(JSON.serialize(_fieldSpec));
- out.writeUTF(JSON.serialize(_sortSpec));
- out.writeInt(_limit);
- out.writeInt(_skip);
- objOut.close();
- }
- public void readFields(DataInput in) throws IOException {
- final ObjectInputStream objIn = new ObjectInputStream((InputStream) in);
- _mongoURI = new MongoURI(in.readUTF());
- _querySpec = (DBObject) JSON.parse(in.readUTF());
- _fieldSpec = (DBObject) JSON.parse(in.readUTF());
- _sortSpec = (DBObject) JSON.parse(in.readUTF());
- _limit = in.readInt();
- _skip = in.readInt();
- log.info("Deserialized MongoInputSplit ... { length = "
- + getLength()
- + ", locations = "
- + getLocations()
- + ", query = "
- + _querySpec
- + ", fields = "
- + _fieldSpec
- + ", sort = "
- + _sortSpec
- + ", limit = "
- + _limit
- + ", skip = "
- + _skip
- + "}");
- objIn.close();
- }
- DBCursor getCursor() {
- // Return the cursor with the split's query, etc. already slotted in for
- // them.
- // todo - support limit/skip
- final DBCursor cursor = MongoConfigUtil.getCollection(_mongoURI).find(_querySpec, _fieldSpec).sort(_sortSpec);
- cursor.slaveOk();
- return cursor;
- }
- private MongoURI _mongoURI;
- private DBObject _querySpec;
- private DBObject _fieldSpec;
- private DBObject _sortSpec;
- private int _limit = 0;
- private int _skip = 0;
- private static final Log log = LogFactory.getLog(MongoInputSplit.class);
- }