PageRenderTime 29ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/jstorm-client/src/main/java/backtype/storm/coordination/CoordinatedBolt.java

https://gitlab.com/hackbuteer59/jstorm
Java | 379 lines | 334 code | 39 blank | 6 comment | 70 complexity | 4679d015afb5528a032c00a11f75a4c0 MD5 | raw file
  1. package backtype.storm.coordination;
  2. import backtype.storm.topology.FailedException;
  3. import java.util.Map.Entry;
  4. import backtype.storm.tuple.Values;
  5. import backtype.storm.generated.GlobalStreamId;
  6. import java.util.Collection;
  7. import backtype.storm.Constants;
  8. import backtype.storm.generated.Grouping;
  9. import backtype.storm.task.IOutputCollector;
  10. import backtype.storm.task.OutputCollector;
  11. import backtype.storm.task.TopologyContext;
  12. import backtype.storm.topology.IRichBolt;
  13. import backtype.storm.topology.OutputFieldsDeclarer;
  14. import backtype.storm.tuple.Fields;
  15. import backtype.storm.tuple.Tuple;
  16. import backtype.storm.utils.TimeCacheMap;
  17. import backtype.storm.utils.Utils;
  18. import java.io.Serializable;
  19. import java.util.ArrayList;
  20. import java.util.Arrays;
  21. import java.util.HashMap;
  22. import java.util.Iterator;
  23. import java.util.List;
  24. import java.util.Map;
  25. import org.slf4j.Logger;
  26. import org.slf4j.LoggerFactory;
  27. import static backtype.storm.utils.Utils.get;
  28. /**
  29. * Coordination requires the request ids to be globally unique for awhile. This
  30. * is so it doesn't get confused in the case of retries.
  31. */
  32. public class CoordinatedBolt implements IRichBolt {
  33. public static Logger LOG = LoggerFactory.getLogger(CoordinatedBolt.class);
  34. public static interface FinishedCallback {
  35. void finishedId(Object id);
  36. }
  37. public static interface TimeoutCallback {
  38. void timeoutId(Object id);
  39. }
  40. public static class SourceArgs implements Serializable {
  41. public boolean singleCount;
  42. protected SourceArgs(boolean singleCount) {
  43. this.singleCount = singleCount;
  44. }
  45. public static SourceArgs single() {
  46. return new SourceArgs(true);
  47. }
  48. public static SourceArgs all() {
  49. return new SourceArgs(false);
  50. }
  51. @Override
  52. public String toString() {
  53. return "<Single: " + singleCount + ">";
  54. }
  55. }
  56. public class CoordinatedOutputCollector implements IOutputCollector {
  57. IOutputCollector _delegate;
  58. public CoordinatedOutputCollector(IOutputCollector delegate) {
  59. _delegate = delegate;
  60. }
  61. public List<Integer> emit(String stream, Collection<Tuple> anchors,
  62. List<Object> tuple) {
  63. List<Integer> tasks = _delegate.emit(stream, anchors, tuple);
  64. updateTaskCounts(tuple.get(0), tasks);
  65. return tasks;
  66. }
  67. public void emitDirect(int task, String stream,
  68. Collection<Tuple> anchors, List<Object> tuple) {
  69. updateTaskCounts(tuple.get(0), Arrays.asList(task));
  70. _delegate.emitDirect(task, stream, anchors, tuple);
  71. }
  72. public void ack(Tuple tuple) {
  73. Object id = tuple.getValue(0);
  74. synchronized (_tracked) {
  75. TrackingInfo track = _tracked.get(id);
  76. if (track != null)
  77. track.receivedTuples++;
  78. }
  79. boolean failed = checkFinishId(tuple, TupleType.REGULAR);
  80. if (failed) {
  81. _delegate.fail(tuple);
  82. } else {
  83. _delegate.ack(tuple);
  84. }
  85. }
  86. public void fail(Tuple tuple) {
  87. Object id = tuple.getValue(0);
  88. synchronized (_tracked) {
  89. TrackingInfo track = _tracked.get(id);
  90. if (track != null)
  91. track.failed = true;
  92. }
  93. checkFinishId(tuple, TupleType.REGULAR);
  94. _delegate.fail(tuple);
  95. }
  96. public void reportError(Throwable error) {
  97. _delegate.reportError(error);
  98. }
  99. private void updateTaskCounts(Object id, List<Integer> tasks) {
  100. synchronized (_tracked) {
  101. TrackingInfo track = _tracked.get(id);
  102. if (track != null) {
  103. Map<Integer, Integer> taskEmittedTuples = track.taskEmittedTuples;
  104. for (Integer task : tasks) {
  105. int newCount = get(taskEmittedTuples, task, 0) + 1;
  106. taskEmittedTuples.put(task, newCount);
  107. }
  108. }
  109. }
  110. }
  111. }
  112. private Map<String, SourceArgs> _sourceArgs;
  113. private IdStreamSpec _idStreamSpec;
  114. private IRichBolt _delegate;
  115. private Integer _numSourceReports;
  116. private List<Integer> _countOutTasks = new ArrayList<Integer>();;
  117. private OutputCollector _collector;
  118. private TimeCacheMap<Object, TrackingInfo> _tracked;
  119. public static class TrackingInfo {
  120. int reportCount = 0;
  121. int expectedTupleCount = 0;
  122. int receivedTuples = 0;
  123. boolean failed = false;
  124. Map<Integer, Integer> taskEmittedTuples = new HashMap<Integer, Integer>();
  125. boolean receivedId = false;
  126. boolean finished = false;
  127. List<Tuple> ackTuples = new ArrayList<Tuple>();
  128. @Override
  129. public String toString() {
  130. return "reportCount: " + reportCount + "\n"
  131. + "expectedTupleCount: " + expectedTupleCount + "\n"
  132. + "receivedTuples: " + receivedTuples + "\n" + "failed: "
  133. + failed + "\n" + taskEmittedTuples.toString();
  134. }
  135. }
  136. public static class IdStreamSpec implements Serializable {
  137. GlobalStreamId _id;
  138. public GlobalStreamId getGlobalStreamId() {
  139. return _id;
  140. }
  141. public static IdStreamSpec makeDetectSpec(String component,
  142. String stream) {
  143. return new IdStreamSpec(component, stream);
  144. }
  145. protected IdStreamSpec(String component, String stream) {
  146. _id = new GlobalStreamId(component, stream);
  147. }
  148. }
  149. public CoordinatedBolt(IRichBolt delegate) {
  150. this(delegate, null, null);
  151. }
  152. public CoordinatedBolt(IRichBolt delegate, String sourceComponent,
  153. SourceArgs sourceArgs, IdStreamSpec idStreamSpec) {
  154. this(delegate, singleSourceArgs(sourceComponent, sourceArgs),
  155. idStreamSpec);
  156. }
  157. public CoordinatedBolt(IRichBolt delegate,
  158. Map<String, SourceArgs> sourceArgs, IdStreamSpec idStreamSpec) {
  159. _sourceArgs = sourceArgs;
  160. if (_sourceArgs == null)
  161. _sourceArgs = new HashMap<String, SourceArgs>();
  162. _delegate = delegate;
  163. _idStreamSpec = idStreamSpec;
  164. }
  165. public void prepare(Map config, TopologyContext context,
  166. OutputCollector collector) {
  167. TimeCacheMap.ExpiredCallback<Object, TrackingInfo> callback = null;
  168. if (_delegate instanceof TimeoutCallback) {
  169. callback = new TimeoutItems();
  170. }
  171. _tracked = new TimeCacheMap<Object, TrackingInfo>(
  172. context.maxTopologyMessageTimeout(), callback);
  173. _collector = collector;
  174. _delegate.prepare(config, context, new OutputCollector(
  175. new CoordinatedOutputCollector(collector)));
  176. for (String component : Utils.get(context.getThisTargets(),
  177. Constants.COORDINATED_STREAM_ID,
  178. new HashMap<String, Grouping>()).keySet()) {
  179. for (Integer task : context.getComponentTasks(component)) {
  180. _countOutTasks.add(task);
  181. }
  182. }
  183. if (!_sourceArgs.isEmpty()) {
  184. _numSourceReports = 0;
  185. for (Entry<String, SourceArgs> entry : _sourceArgs.entrySet()) {
  186. if (entry.getValue().singleCount) {
  187. _numSourceReports += 1;
  188. } else {
  189. _numSourceReports += context.getComponentTasks(
  190. entry.getKey()).size();
  191. }
  192. }
  193. }
  194. }
  195. private boolean checkFinishId(Tuple tup, TupleType type) {
  196. Object id = tup.getValue(0);
  197. boolean failed = false;
  198. synchronized (_tracked) {
  199. TrackingInfo track = _tracked.get(id);
  200. try {
  201. if (track != null) {
  202. boolean delayed = false;
  203. if (_idStreamSpec == null && type == TupleType.COORD
  204. || _idStreamSpec != null && type == TupleType.ID) {
  205. track.ackTuples.add(tup);
  206. delayed = true;
  207. }
  208. if (track.failed) {
  209. failed = true;
  210. for (Tuple t : track.ackTuples) {
  211. _collector.fail(t);
  212. }
  213. _tracked.remove(id);
  214. } else if (track.receivedId
  215. && (_sourceArgs.isEmpty() || track.reportCount == _numSourceReports
  216. && track.expectedTupleCount == track.receivedTuples)) {
  217. if (_delegate instanceof FinishedCallback) {
  218. ((FinishedCallback) _delegate).finishedId(id);
  219. }
  220. if (!(_sourceArgs.isEmpty() || type != TupleType.REGULAR)) {
  221. throw new IllegalStateException(
  222. "Coordination condition met on a non-coordinating tuple. Should be impossible");
  223. }
  224. Iterator<Integer> outTasks = _countOutTasks.iterator();
  225. while (outTasks.hasNext()) {
  226. int task = outTasks.next();
  227. int numTuples = get(track.taskEmittedTuples, task,
  228. 0);
  229. _collector.emitDirect(task,
  230. Constants.COORDINATED_STREAM_ID, tup,
  231. new Values(id, numTuples));
  232. }
  233. for (Tuple t : track.ackTuples) {
  234. _collector.ack(t);
  235. }
  236. track.finished = true;
  237. _tracked.remove(id);
  238. }
  239. if (!delayed && type != TupleType.REGULAR) {
  240. if (track.failed) {
  241. _collector.fail(tup);
  242. } else {
  243. _collector.ack(tup);
  244. }
  245. }
  246. } else {
  247. if (type != TupleType.REGULAR)
  248. _collector.fail(tup);
  249. }
  250. } catch (FailedException e) {
  251. LOG.error("Failed to finish batch", e);
  252. for (Tuple t : track.ackTuples) {
  253. _collector.fail(t);
  254. }
  255. _tracked.remove(id);
  256. failed = true;
  257. }
  258. }
  259. return failed;
  260. }
  261. public void execute(Tuple tuple) {
  262. Object id = tuple.getValue(0);
  263. TrackingInfo track;
  264. TupleType type = getTupleType(tuple);
  265. synchronized (_tracked) {
  266. track = _tracked.get(id);
  267. if (track == null) {
  268. track = new TrackingInfo();
  269. if (_idStreamSpec == null)
  270. track.receivedId = true;
  271. _tracked.put(id, track);
  272. }
  273. }
  274. if (type == TupleType.ID) {
  275. synchronized (_tracked) {
  276. track.receivedId = true;
  277. }
  278. checkFinishId(tuple, type);
  279. } else if (type == TupleType.COORD) {
  280. int count = (Integer) tuple.getValue(1);
  281. synchronized (_tracked) {
  282. track.reportCount++;
  283. track.expectedTupleCount += count;
  284. }
  285. checkFinishId(tuple, type);
  286. } else {
  287. synchronized (_tracked) {
  288. _delegate.execute(tuple);
  289. }
  290. }
  291. }
  292. public void cleanup() {
  293. _delegate.cleanup();
  294. _tracked.cleanup();
  295. }
  296. public void declareOutputFields(OutputFieldsDeclarer declarer) {
  297. _delegate.declareOutputFields(declarer);
  298. declarer.declareStream(Constants.COORDINATED_STREAM_ID, true,
  299. new Fields("id", "count"));
  300. }
  301. @Override
  302. public Map<String, Object> getComponentConfiguration() {
  303. return _delegate.getComponentConfiguration();
  304. }
  305. private static Map<String, SourceArgs> singleSourceArgs(
  306. String sourceComponent, SourceArgs sourceArgs) {
  307. Map<String, SourceArgs> ret = new HashMap<String, SourceArgs>();
  308. ret.put(sourceComponent, sourceArgs);
  309. return ret;
  310. }
  311. private class TimeoutItems implements
  312. TimeCacheMap.ExpiredCallback<Object, TrackingInfo> {
  313. @Override
  314. public void expire(Object id, TrackingInfo val) {
  315. synchronized (_tracked) {
  316. // the combination of the lock and the finished flag ensure that
  317. // an id is never timed out if it has been finished
  318. val.failed = true;
  319. if (!val.finished) {
  320. ((TimeoutCallback) _delegate).timeoutId(id);
  321. }
  322. }
  323. }
  324. }
  325. private TupleType getTupleType(Tuple tuple) {
  326. if (_idStreamSpec != null
  327. && tuple.getSourceGlobalStreamid().equals(_idStreamSpec._id)) {
  328. return TupleType.ID;
  329. } else if (!_sourceArgs.isEmpty()
  330. && tuple.getSourceStreamId().equals(
  331. Constants.COORDINATED_STREAM_ID)) {
  332. return TupleType.COORD;
  333. } else {
  334. return TupleType.REGULAR;
  335. }
  336. }
  337. static enum TupleType {
  338. REGULAR, ID, COORD
  339. }
  340. }