PageRenderTime 225ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordWriter.java

http://github.com/apache/cassandra
Java | 346 lines | 214 code | 34 blank | 98 comment | 25 complexity | d1b781f18182da2952af6fcb0ff3919a MD5 | raw file
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.cassandra.hadoop;
  19. import java.io.IOException;
  20. import java.net.InetAddress;
  21. import java.nio.ByteBuffer;
  22. import java.util.*;
  23. import java.util.concurrent.ArrayBlockingQueue;
  24. import java.util.concurrent.BlockingQueue;
  25. import java.util.concurrent.TimeUnit;
  26. import org.apache.cassandra.client.RingCache;
  27. import org.apache.cassandra.dht.Range;
  28. import org.apache.cassandra.dht.Token;
  29. import org.apache.cassandra.thrift.*;
  30. import org.apache.cassandra.utils.Pair;
  31. import org.apache.hadoop.conf.Configuration;
  32. import org.apache.hadoop.mapreduce.RecordWriter;
  33. import org.apache.hadoop.mapreduce.TaskAttemptContext;
  34. import org.apache.thrift.TException;
  35. import org.apache.thrift.transport.TSocket;
  36. /**
  37. * The <code>ColumnFamilyRecordWriter</code> maps the output &lt;key, value&gt;
  38. * pairs to a Cassandra column family. In particular, it applies all mutations
  39. * in the value, which it associates with the key, and in turn the responsible
  40. * endpoint.
  41. *
  42. * <p>
  43. * Furthermore, this writer groups the mutations by the endpoint responsible for
  44. * the rows being affected. This allows the mutations to be executed in parallel,
  45. * directly to a responsible endpoint.
  46. * </p>
  47. *
  48. * @see ColumnFamilyOutputFormat
  49. * @see OutputFormat
  50. *
  51. */
  52. final class ColumnFamilyRecordWriter extends RecordWriter<ByteBuffer,List<Mutation>>
  53. implements org.apache.hadoop.mapred.RecordWriter<ByteBuffer,List<Mutation>>
  54. {
  55. // The configuration this writer is associated with.
  56. private final Configuration conf;
  57. // The ring cache that describes the token ranges each node in the ring is
  58. // responsible for. This is what allows us to group the mutations by
  59. // the endpoints they should be targeted at. The targeted endpoint
  60. // essentially
  61. // acts as the primary replica for the rows being affected by the mutations.
  62. private final RingCache ringCache;
  63. // The number of mutations to buffer per endpoint
  64. private final int queueSize;
  65. // handles for clients for each range running in the threadpool
  66. private final Map<Range,RangeClient> clients;
  67. private final long batchThreshold;
  68. private final ConsistencyLevel consistencyLevel;
  69. private Progressable progressable;
  70. /**
  71. * Upon construction, obtain the map that this writer will use to collect
  72. * mutations, and the ring cache for the given keyspace.
  73. *
  74. * @param context the task attempt context
  75. * @throws IOException
  76. */
  77. ColumnFamilyRecordWriter(TaskAttemptContext context) throws IOException
  78. {
  79. this(context.getConfiguration());
  80. this.progressable = new Progressable(context);
  81. }
  82. ColumnFamilyRecordWriter(Configuration conf, Progressable progressable) throws IOException
  83. {
  84. this(conf);
  85. this.progressable = progressable;
  86. }
  87. ColumnFamilyRecordWriter(Configuration conf) throws IOException
  88. {
  89. this.conf = conf;
  90. this.ringCache = new RingCache(conf);
  91. this.queueSize = conf.getInt(ColumnFamilyOutputFormat.QUEUE_SIZE, 32 * Runtime.getRuntime().availableProcessors());
  92. this.clients = new HashMap<Range,RangeClient>();
  93. batchThreshold = conf.getLong(ColumnFamilyOutputFormat.BATCH_THRESHOLD, 32);
  94. consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getWriteConsistencyLevel(conf));
  95. }
  96. /**
  97. * If the key is to be associated with a valid value, a mutation is created
  98. * for it with the given column family and columns. In the event the value
  99. * in the column is missing (i.e., null), then it is marked for
  100. * {@link Deletion}. Similarly, if the entire value for a key is missing
  101. * (i.e., null), then the entire key is marked for {@link Deletion}.
  102. * </p>
  103. *
  104. * @param keybuff
  105. * the key to write.
  106. * @param value
  107. * the value to write.
  108. * @throws IOException
  109. */
  110. @Override
  111. public void write(ByteBuffer keybuff, List<Mutation> value) throws IOException
  112. {
  113. Range<Token> range = ringCache.getRange(keybuff);
  114. // get the client for the given range, or create a new one
  115. RangeClient client = clients.get(range);
  116. if (client == null)
  117. {
  118. // haven't seen keys for this range: create new client
  119. client = new RangeClient(ringCache.getEndpoint(range));
  120. client.start();
  121. clients.put(range, client);
  122. }
  123. for (Mutation amut : value)
  124. client.put(Pair.create(keybuff, amut));
  125. progressable.progress();
  126. }
  127. /**
  128. * Close this <code>RecordWriter</code> to future operations, but not before
  129. * flushing out the batched mutations.
  130. *
  131. * @param context the context of the task
  132. * @throws IOException
  133. */
  134. @Override
  135. public void close(TaskAttemptContext context) throws IOException, InterruptedException
  136. {
  137. close();
  138. }
  139. /** Fills the deprecated RecordWriter interface for streaming. */
  140. @Deprecated
  141. public void close(org.apache.hadoop.mapred.Reporter reporter) throws IOException
  142. {
  143. close();
  144. }
  145. private void close() throws IOException
  146. {
  147. // close all the clients before throwing anything
  148. IOException clientException = null;
  149. for (RangeClient client : clients.values())
  150. {
  151. try
  152. {
  153. client.close();
  154. }
  155. catch (IOException e)
  156. {
  157. clientException = e;
  158. }
  159. }
  160. if (clientException != null)
  161. throw clientException;
  162. }
  163. /**
  164. * A client that runs in a threadpool and connects to the list of endpoints for a particular
  165. * range. Mutations for keys in that range are sent to this client via a queue.
  166. */
  167. public class RangeClient extends Thread
  168. {
  169. // The list of endpoints for this range
  170. private final List<InetAddress> endpoints;
  171. private final String columnFamily = ConfigHelper.getOutputColumnFamily(conf);
  172. // A bounded queue of incoming mutations for this range
  173. private final BlockingQueue<Pair<ByteBuffer, Mutation>> queue = new ArrayBlockingQueue<Pair<ByteBuffer,Mutation>>(queueSize);
  174. private volatile boolean run = true;
  175. // we want the caller to know if something went wrong, so we record any unrecoverable exception while writing
  176. // so we can throw it on the caller's stack when he calls put() again, or if there are no more put calls,
  177. // when the client is closed.
  178. private volatile IOException lastException;
  179. private Cassandra.Client thriftClient;
  180. private TSocket thriftSocket;
  181. /**
  182. * Constructs an {@link RangeClient} for the given endpoints.
  183. * @param endpoints the possible endpoints to execute the mutations on
  184. */
  185. public RangeClient(List<InetAddress> endpoints)
  186. {
  187. super("client-" + endpoints);
  188. this.endpoints = endpoints;
  189. }
  190. /**
  191. * enqueues the given value to Cassandra
  192. */
  193. public void put(Pair<ByteBuffer,Mutation> value) throws IOException
  194. {
  195. while (true)
  196. {
  197. if (lastException != null)
  198. throw lastException;
  199. try
  200. {
  201. if (queue.offer(value, 100, TimeUnit.MILLISECONDS))
  202. break;
  203. }
  204. catch (InterruptedException e)
  205. {
  206. throw new AssertionError(e);
  207. }
  208. }
  209. }
  210. public void close() throws IOException
  211. {
  212. // stop the run loop. this will result in closeInternal being called by the time join() finishes.
  213. run = false;
  214. interrupt();
  215. try
  216. {
  217. this.join();
  218. }
  219. catch (InterruptedException e)
  220. {
  221. throw new AssertionError(e);
  222. }
  223. if (lastException != null)
  224. throw lastException;
  225. }
  226. private void closeInternal()
  227. {
  228. if (thriftSocket != null)
  229. {
  230. thriftSocket.close();
  231. thriftSocket = null;
  232. thriftClient = null;
  233. }
  234. }
  235. /**
  236. * Loops collecting mutations from the queue and sending to Cassandra
  237. */
  238. public void run()
  239. {
  240. outer:
  241. while (run || !queue.isEmpty())
  242. {
  243. Pair<ByteBuffer, Mutation> mutation;
  244. try
  245. {
  246. mutation = queue.take();
  247. }
  248. catch (InterruptedException e)
  249. {
  250. // re-check loop condition after interrupt
  251. continue;
  252. }
  253. Map<ByteBuffer, Map<String, List<Mutation>>> batch = new HashMap<ByteBuffer, Map<String, List<Mutation>>>();
  254. while (mutation != null)
  255. {
  256. Map<String, List<Mutation>> subBatch = batch.get(mutation.left);
  257. if (subBatch == null)
  258. {
  259. subBatch = Collections.singletonMap(columnFamily, (List<Mutation>) new ArrayList<Mutation>());
  260. batch.put(mutation.left, subBatch);
  261. }
  262. subBatch.get(columnFamily).add(mutation.right);
  263. if (batch.size() >= batchThreshold)
  264. break;
  265. mutation = queue.poll();
  266. }
  267. Iterator<InetAddress> iter = endpoints.iterator();
  268. while (true)
  269. {
  270. // send the mutation to the last-used endpoint. first time through, this will NPE harmlessly.
  271. try
  272. {
  273. thriftClient.batch_mutate(batch, consistencyLevel);
  274. break;
  275. }
  276. catch (Exception e)
  277. {
  278. closeInternal();
  279. if (!iter.hasNext())
  280. {
  281. lastException = new IOException(e);
  282. break outer;
  283. }
  284. }
  285. // attempt to connect to a different endpoint
  286. try
  287. {
  288. InetAddress address = iter.next();
  289. thriftSocket = new TSocket(address.getHostName(), ConfigHelper.getOutputRpcPort(conf));
  290. thriftClient = ColumnFamilyOutputFormat.createAuthenticatedClient(thriftSocket, conf);
  291. }
  292. catch (Exception e)
  293. {
  294. closeInternal();
  295. // TException means something unexpected went wrong to that endpoint, so
  296. // we should try again to another. Other exceptions (auth or invalid request) are fatal.
  297. if ((!(e instanceof TException)) || !iter.hasNext())
  298. {
  299. lastException = new IOException(e);
  300. break outer;
  301. }
  302. }
  303. }
  304. }
  305. }
  306. @Override
  307. public String toString()
  308. {
  309. return "#<Client for " + endpoints.toString() + ">";
  310. }
  311. }
  312. }