/core/src/main/java/com/mongodb/hadoop/splitter/MongoSplitterFactory.java

http://github.com/mongodb/mongo-hadoop · Java · 131 lines · 85 code · 14 blank · 32 comment · 25 complexity · feb5e866b73b914c30f581c486b58a04 MD5 · raw file

  1. /*
  2. * Copyright 2010-2013 10gen Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package com.mongodb.hadoop.splitter;
  17. import com.mongodb.CommandResult;
  18. import com.mongodb.DBCollection;
  19. import com.mongodb.MongoClientURI;
  20. import com.mongodb.hadoop.util.MongoConfigUtil;
  21. import org.apache.commons.logging.Log;
  22. import org.apache.commons.logging.LogFactory;
  23. import org.apache.hadoop.conf.Configuration;
  24. import org.apache.hadoop.util.ReflectionUtils;
  25. import java.util.List;
  26. /**
  27. * Examines a collection and dynamically chooses which implementation of MongoSplitter to use, based on our configuration and the
  28. * collection's properties.
  29. */
  30. public final class MongoSplitterFactory {
  31. private static final Log LOG = LogFactory.getLog(MongoSplitterFactory.class);
  32. private MongoSplitterFactory() {
  33. }
  34. public static MongoSplitter getSplitterByClass(final Configuration conf, final String className) {
  35. /* If user has specified a class to use for the splitter, use it */
  36. Class<? extends MongoSplitter> splitterClass =
  37. MongoConfigUtil.getClassByName(conf, className, MongoSplitter.class);
  38. if (splitterClass != null) {
  39. MongoSplitter splitter = ReflectionUtils.newInstance(splitterClass, conf);
  40. splitter.setConfiguration(conf);
  41. return splitter;
  42. } else {
  43. return null;
  44. }
  45. }
  46. public static MongoCollectionSplitter getSplitterByStats(final MongoClientURI uri, final Configuration config) {
  47. /* Looks at the collection in mongo.input.uri
  48. * and choose an implementation based on what's in there. */
  49. MongoCollectionSplitter returnVal;
  50. // If the split calculation is totally disabled, just make one
  51. // big split for the whole collection.
  52. if (!MongoConfigUtil.createInputSplits(config)) {
  53. returnVal = new SingleMongoSplitter(config);
  54. } else {
  55. MongoClientURI authURI = MongoConfigUtil.getAuthURI(config);
  56. CommandResult stats;
  57. DBCollection coll = null;
  58. CommandResult buildInfo;
  59. try {
  60. if (authURI != null) {
  61. coll = MongoConfigUtil.getCollectionWithAuth(uri, authURI);
  62. stats = coll.getStats();
  63. LOG.info("Retrieved Collection stats:" + stats);
  64. } else {
  65. coll = MongoConfigUtil.getCollection(uri);
  66. stats = coll.getStats();
  67. }
  68. buildInfo = coll.getDB().command("buildinfo");
  69. } finally {
  70. if (coll != null) {
  71. MongoConfigUtil.close(coll.getDB().getMongo());
  72. }
  73. }
  74. if (!stats.getBoolean("ok", false)) {
  75. throw new RuntimeException("Unable to calculate input splits from collection stats: " + stats.getString("errmsg"));
  76. }
  77. if (!stats.getBoolean("sharded", false)) {
  78. // Prefer SampleSplitter.
  79. List versionArray = (List) buildInfo.get("versionArray");
  80. boolean sampleOperatorSupported = (
  81. (Integer) versionArray.get(0) > 3
  82. || ((Integer) versionArray.get(0) == 3
  83. && (Integer) versionArray.get(1) >= 2));
  84. if (sampleOperatorSupported) {
  85. returnVal = new SampleSplitter(config);
  86. } else {
  87. returnVal = new StandaloneMongoSplitter(config);
  88. }
  89. } else {
  90. // Collection is sharded
  91. if (MongoConfigUtil.isShardChunkedSplittingEnabled(config)) {
  92. // Creates one split per chunk.
  93. returnVal = new ShardChunkMongoSplitter(config);
  94. } else if (MongoConfigUtil.canReadSplitsFromShards(config)) {
  95. // Creates one split per shard, but ignores chunk bounds.
  96. // Reads from shards directly (bypassing mongos).
  97. // Not usually recommended.
  98. returnVal = new ShardMongoSplitter(config);
  99. } else {
  100. //Not configured to use chunks or shards -
  101. //so treat this the same as if it was an unsharded collection
  102. returnVal = new StandaloneMongoSplitter(config);
  103. }
  104. }
  105. }
  106. return returnVal;
  107. }
  108. public static MongoSplitter getSplitter(final Configuration config) {
  109. String splitterClassName = config.get(MongoConfigUtil.MONGO_SPLITTER_CLASS);
  110. MongoSplitter customSplitter = getSplitterByClass(config, splitterClassName);
  111. if (customSplitter != null) {
  112. return customSplitter;
  113. } else {
  114. return getSplitterByStats(MongoConfigUtil.getInputURI(config), config);
  115. }
  116. }
  117. }