PageRenderTime 1357ms CodeModel.GetById 35ms RepoModel.GetById 2ms app.codeStats 0ms

/src/java/org/apache/cassandra/hadoop/ColumnFamilyInputFormat.java

https://github.com/stephenc/cassandra
Java | 327 lines | 241 code | 34 blank | 52 comment | 34 complexity | 09bc6341f11643887bcb4dd7a7d99d3d MD5 | raw file
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.cassandra.hadoop;
  19. import java.io.IOException;
  20. import java.net.InetAddress;
  21. import java.nio.ByteBuffer;
  22. import java.util.ArrayList;
  23. import java.util.Collections;
  24. import java.util.List;
  25. import java.util.Random;
  26. import java.util.SortedMap;
  27. import java.util.concurrent.Callable;
  28. import java.util.concurrent.ExecutorService;
  29. import java.util.concurrent.Executors;
  30. import java.util.concurrent.Future;
  31. import com.google.common.collect.ImmutableList;
  32. import org.apache.cassandra.db.IColumn;
  33. import org.apache.cassandra.dht.IPartitioner;
  34. import org.apache.cassandra.dht.Range;
  35. import org.apache.cassandra.dht.Token;
  36. import org.apache.cassandra.thrift.Cassandra;
  37. import org.apache.cassandra.thrift.InvalidRequestException;
  38. import org.apache.cassandra.thrift.KeyRange;
  39. import org.apache.cassandra.thrift.TokenRange;
  40. import org.apache.commons.lang.StringUtils;
  41. import org.apache.hadoop.conf.Configuration;
  42. import org.apache.hadoop.mapred.*;
  43. import org.apache.hadoop.mapreduce.InputFormat;
  44. import org.apache.hadoop.mapreduce.InputSplit;
  45. import org.apache.hadoop.mapreduce.JobContext;
  46. import org.apache.hadoop.mapreduce.RecordReader;
  47. import org.apache.hadoop.mapreduce.TaskAttemptContext;
  48. import org.apache.hadoop.mapreduce.TaskAttemptID;
  49. import org.apache.thrift.TException;
  50. import org.slf4j.Logger;
  51. import org.slf4j.LoggerFactory;
  52. /**
  53. * Hadoop InputFormat allowing map/reduce against Cassandra rows within one ColumnFamily.
  54. *
  55. * At minimum, you need to set the CF and predicate (description of columns to extract from each row)
  56. * in your Hadoop job Configuration. The ConfigHelper class is provided to make this
  57. * simple:
  58. * ConfigHelper.setColumnFamily
  59. * ConfigHelper.setSlicePredicate
  60. *
  61. * You can also configure the number of rows per InputSplit with
  62. * ConfigHelper.setInputSplitSize
  63. * This should be "as big as possible, but no bigger." Each InputSplit is read from Cassandra
  64. * with multiple get_slice_range queries, and the per-call overhead of get_slice_range is high,
  65. * so larger split sizes are better -- but if it is too large, you will run out of memory.
  66. *
  67. * The default split size is 64k rows.
  68. */
  69. public class ColumnFamilyInputFormat extends InputFormat<ByteBuffer, SortedMap<ByteBuffer, IColumn>>
  70. implements org.apache.hadoop.mapred.InputFormat<ByteBuffer, SortedMap<ByteBuffer, IColumn>>
  71. {
  72. private static final Logger logger = LoggerFactory.getLogger(ColumnFamilyInputFormat.class);
  73. public static final String MAPRED_TASK_ID = "mapred.task.id";
  74. // The simple fact that we need this is because the old Hadoop API wants us to "write"
  75. // to the key and value whereas the new asks for it.
  76. // I choose 8kb as the default max key size (instanciated only once), but you can
  77. // override it in your jobConf with this setting.
  78. public static final String CASSANDRA_HADOOP_MAX_KEY_SIZE = "cassandra.hadoop.max_key_size";
  79. public static final int CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT = 8192;
  80. private String keyspace;
  81. private String cfName;
  82. private IPartitioner partitioner;
  83. private static void validateConfiguration(Configuration conf)
  84. {
  85. if (ConfigHelper.getInputKeyspace(conf) == null || ConfigHelper.getInputColumnFamily(conf) == null)
  86. {
  87. throw new UnsupportedOperationException("you must set the keyspace and columnfamily with setColumnFamily()");
  88. }
  89. if (ConfigHelper.getInputSlicePredicate(conf) == null)
  90. {
  91. throw new UnsupportedOperationException("you must set the predicate with setPredicate");
  92. }
  93. if (ConfigHelper.getInputInitialAddress(conf) == null)
  94. throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node");
  95. if (ConfigHelper.getInputPartitioner(conf) == null)
  96. throw new UnsupportedOperationException("You must set the Cassandra partitioner class");
  97. }
  98. public List<InputSplit> getSplits(JobContext context) throws IOException
  99. {
  100. Configuration conf = context.getConfiguration();
  101. validateConfiguration(conf);
  102. // cannonical ranges and nodes holding replicas
  103. List<TokenRange> masterRangeNodes = getRangeMap(conf);
  104. keyspace = ConfigHelper.getInputKeyspace(context.getConfiguration());
  105. cfName = ConfigHelper.getInputColumnFamily(context.getConfiguration());
  106. partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration());
  107. logger.debug("partitioner is " + partitioner);
  108. // cannonical ranges, split into pieces, fetching the splits in parallel
  109. ExecutorService executor = Executors.newCachedThreadPool();
  110. List<InputSplit> splits = new ArrayList<InputSplit>();
  111. try
  112. {
  113. List<Future<List<InputSplit>>> splitfutures = new ArrayList<Future<List<InputSplit>>>();
  114. KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
  115. Range<Token> jobRange = null;
  116. if (jobKeyRange != null && jobKeyRange.start_token != null)
  117. {
  118. assert partitioner.preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner";
  119. assert jobKeyRange.start_key == null : "only start_token supported";
  120. assert jobKeyRange.end_key == null : "only end_token supported";
  121. jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
  122. partitioner.getTokenFactory().fromString(jobKeyRange.end_token),
  123. partitioner);
  124. }
  125. for (TokenRange range : masterRangeNodes)
  126. {
  127. if (jobRange == null)
  128. {
  129. // for each range, pick a live owner and ask it to compute bite-sized splits
  130. splitfutures.add(executor.submit(new SplitCallable(range, conf)));
  131. }
  132. else
  133. {
  134. Range<Token> dhtRange = new Range<Token>(partitioner.getTokenFactory().fromString(range.start_token),
  135. partitioner.getTokenFactory().fromString(range.end_token),
  136. partitioner);
  137. if (dhtRange.intersects(jobRange))
  138. {
  139. for (Range<Token> intersection: dhtRange.intersectionWith(jobRange))
  140. {
  141. range.start_token = partitioner.getTokenFactory().toString(intersection.left);
  142. range.end_token = partitioner.getTokenFactory().toString(intersection.right);
  143. // for each range, pick a live owner and ask it to compute bite-sized splits
  144. splitfutures.add(executor.submit(new SplitCallable(range, conf)));
  145. }
  146. }
  147. }
  148. }
  149. // wait until we have all the results back
  150. for (Future<List<InputSplit>> futureInputSplits : splitfutures)
  151. {
  152. try
  153. {
  154. splits.addAll(futureInputSplits.get());
  155. }
  156. catch (Exception e)
  157. {
  158. throw new IOException("Could not get input splits", e);
  159. }
  160. }
  161. }
  162. finally
  163. {
  164. executor.shutdownNow();
  165. }
  166. assert splits.size() > 0;
  167. Collections.shuffle(splits, new Random(System.nanoTime()));
  168. return splits;
  169. }
  170. /**
  171. * Gets a token range and splits it up according to the suggested
  172. * size into input splits that Hadoop can use.
  173. */
  174. class SplitCallable implements Callable<List<InputSplit>>
  175. {
  176. private final TokenRange range;
  177. private final Configuration conf;
  178. public SplitCallable(TokenRange tr, Configuration conf)
  179. {
  180. this.range = tr;
  181. this.conf = conf;
  182. }
  183. public List<InputSplit> call() throws Exception
  184. {
  185. ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
  186. List<String> tokens = getSubSplits(keyspace, cfName, range, conf);
  187. assert range.rpc_endpoints.size() == range.endpoints.size() : "rpc_endpoints size must match endpoints size";
  188. // turn the sub-ranges into InputSplits
  189. String[] endpoints = range.endpoints.toArray(new String[range.endpoints.size()]);
  190. // hadoop needs hostname, not ip
  191. int endpointIndex = 0;
  192. for (String endpoint: range.rpc_endpoints)
  193. {
  194. String endpoint_address = endpoint;
  195. if (endpoint_address == null || endpoint_address.equals("0.0.0.0"))
  196. endpoint_address = range.endpoints.get(endpointIndex);
  197. endpoints[endpointIndex++] = InetAddress.getByName(endpoint_address).getHostName();
  198. }
  199. Token.TokenFactory factory = partitioner.getTokenFactory();
  200. for (int i = 1; i < tokens.size(); i++)
  201. {
  202. Token left = factory.fromString(tokens.get(i - 1));
  203. Token right = factory.fromString(tokens.get(i));
  204. Range<Token> range = new Range<Token>(left, right, partitioner);
  205. List<Range<Token>> ranges = range.isWrapAround() ? range.unwrap() : ImmutableList.of(range);
  206. for (Range<Token> subrange : ranges)
  207. {
  208. ColumnFamilySplit split = new ColumnFamilySplit(factory.toString(subrange.left), factory.toString(subrange.right), endpoints);
  209. logger.debug("adding " + split);
  210. splits.add(split);
  211. }
  212. }
  213. return splits;
  214. }
  215. }
  216. private List<String> getSubSplits(String keyspace, String cfName, TokenRange range, Configuration conf) throws IOException
  217. {
  218. int splitsize = ConfigHelper.getInputSplitSize(conf);
  219. for (int i = 0; i < range.rpc_endpoints.size(); i++)
  220. {
  221. String host = range.rpc_endpoints.get(i);
  222. if (host == null || host.equals("0.0.0.0"))
  223. host = range.endpoints.get(i);
  224. try
  225. {
  226. Cassandra.Client client = ConfigHelper.createConnection(host, ConfigHelper.getInputRpcPort(conf), true);
  227. client.set_keyspace(keyspace);
  228. return client.describe_splits(cfName, range.start_token, range.end_token, splitsize);
  229. }
  230. catch (IOException e)
  231. {
  232. logger.debug("failed connect to endpoint " + host, e);
  233. }
  234. catch (TException e)
  235. {
  236. throw new RuntimeException(e);
  237. }
  238. catch (InvalidRequestException e)
  239. {
  240. throw new RuntimeException(e);
  241. }
  242. }
  243. throw new IOException("failed connecting to all endpoints " + StringUtils.join(range.endpoints, ","));
  244. }
  245. private List<TokenRange> getRangeMap(Configuration conf) throws IOException
  246. {
  247. Cassandra.Client client = ConfigHelper.getClientFromInputAddressList(conf);
  248. List<TokenRange> map;
  249. try
  250. {
  251. map = client.describe_ring(ConfigHelper.getInputKeyspace(conf));
  252. }
  253. catch (TException e)
  254. {
  255. throw new RuntimeException(e);
  256. }
  257. catch (InvalidRequestException e)
  258. {
  259. throw new RuntimeException(e);
  260. }
  261. return map;
  262. }
  263. public RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException
  264. {
  265. return new ColumnFamilyRecordReader();
  266. }
  267. //
  268. // Old Hadoop API
  269. //
  270. public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException
  271. {
  272. TaskAttemptContext tac = new TaskAttemptContext(jobConf, new TaskAttemptID());
  273. List<org.apache.hadoop.mapreduce.InputSplit> newInputSplits = this.getSplits(tac);
  274. org.apache.hadoop.mapred.InputSplit[] oldInputSplits = new org.apache.hadoop.mapred.InputSplit[newInputSplits.size()];
  275. for (int i = 0; i < newInputSplits.size(); i++)
  276. oldInputSplits[i] = (ColumnFamilySplit)newInputSplits.get(i);
  277. return oldInputSplits;
  278. }
  279. public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException
  280. {
  281. TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)))
  282. {
  283. @Override
  284. public void progress()
  285. {
  286. reporter.progress();
  287. }
  288. };
  289. ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
  290. recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit)split, tac);
  291. return recordReader;
  292. }
  293. }