PageRenderTime 32ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/java/src/com/google/appengine/tools/mapreduce/impl/IntermediateOutput.java

http://appengine-mapreduce.googlecode.com/
Java | 144 lines | 102 code | 20 blank | 22 comment | 12 complexity | 60ca5dd4dbd06313ae34516c066ba899 MD5 | raw file
Possible License(s): Apache-2.0
  1. // Copyright 2012 Google Inc. All Rights Reserved.
  2. package com.google.appengine.tools.mapreduce.impl;
  3. import static com.google.common.base.Preconditions.checkNotNull;
  4. import com.google.appengine.api.files.AppEngineFile;
  5. import com.google.appengine.api.files.FileService;
  6. import com.google.appengine.api.files.FileServiceFactory;
  7. import com.google.appengine.api.files.FileServicePb;
  8. import com.google.appengine.api.files.RecordWriteChannel;
  9. import com.google.appengine.repackaged.com.google.protobuf.ByteString;
  10. import com.google.appengine.tools.mapreduce.KeyValue;
  11. import com.google.appengine.tools.mapreduce.Marshaller;
  12. import com.google.appengine.tools.mapreduce.Output;
  13. import com.google.appengine.tools.mapreduce.OutputWriter;
  14. import com.google.appengine.tools.mapreduce.impl.util.FileUtil;
  15. import com.google.common.collect.ImmutableList;
  16. import java.io.IOException;
  17. import java.nio.ByteBuffer;
  18. import java.util.List;
  19. import java.util.logging.Logger;
  20. /**
  21. * @author ohler@google.com (Christian Ohler)
  22. *
  23. * @param <K> type of intermediate keys
  24. * @param <V> type of intermediate values
  25. */
  26. // NOTE(ohler): Given the set of RPCs that the file service currently offers,
  27. // the file format used by the shuffle service is unreliable -- the file service
  28. // doesn't give us a way to determine whether an append RPC succeeded, and the
  29. // shuffle file format cannot detect duplicate data (so unconditionally retrying
  30. // is not an option). (We can't tell whether an append RPC succeeded because
  31. // (a) an append RPC that failed with a timeout could still have succeeded on
  32. // the backend, and (b) the file service buffers data in memory, so even data
  33. // from a successful append RPC might get lost if the file service process
  34. // crashes or its machine loses its network connection.)
  35. public class IntermediateOutput<K, V> extends Output<KeyValue<K, V>, List<AppEngineFile>> {
  36. private static final long serialVersionUID = 207899202516112458L;
  37. @SuppressWarnings("unused")
  38. private static final Logger log = Logger.getLogger(IntermediateOutput.class.getName());
  39. private static final FileService FILE_SERVICE = FileServiceFactory.getFileService();
  40. private static class Writer<K, V> extends OutputWriter<KeyValue<K, V>> {
  41. private static final long serialVersionUID = 592636863384442324L;
  42. private final String mrJobId;
  43. private final int mapShardNumber;
  44. private final Marshaller<K> keyMarshaller;
  45. private final Marshaller<V> valueMarshaller;
  46. // We create the file lazily so that we don't make empty files if the mapper
  47. // never produces any output (could be a common case).
  48. private AppEngineFile file = null;
  49. // Populated in close() unless no file was created.
  50. private AppEngineFile fileReadHandle = null;
  51. private transient RecordWriteChannel channel;
  52. public Writer(String mrJobId,
  53. int mapShardNumber,
  54. Marshaller<K> keyMarshaller,
  55. Marshaller<V> valueMarshaller) {
  56. this.mrJobId = checkNotNull(mrJobId, "Null mrJobId");
  57. this.mapShardNumber = mapShardNumber;
  58. this.keyMarshaller = checkNotNull(keyMarshaller, "Null keyMarshaller");
  59. this.valueMarshaller = checkNotNull(valueMarshaller, "Null valueMarshaller");
  60. }
  61. private void ensureOpen() throws IOException {
  62. if (channel != null) {
  63. // This only works if slices are <30 seconds. TODO(ohler): close and
  64. // reopen every 29 seconds. Better yet, change fileproxy to not require
  65. // the file to be open.
  66. return;
  67. }
  68. if (file == null) {
  69. file = FILE_SERVICE.createNewBlobFile(MapReduceConstants.MAP_OUTPUT_MIME_TYPE,
  70. mrJobId + ": map output, shard " + mapShardNumber);
  71. }
  72. channel = FILE_SERVICE.openRecordWriteChannel(file, false);
  73. }
  74. @Override public void write(KeyValue<K, V> pair) throws IOException {
  75. ensureOpen();
  76. FileServicePb.KeyValue.Builder b = FileServicePb.KeyValue.newBuilder();
  77. b.setKey(ByteString.copyFrom(keyMarshaller.toBytes(pair.getKey())));
  78. b.setValue(ByteString.copyFrom(valueMarshaller.toBytes(pair.getValue())));
  79. channel.write(ByteBuffer.wrap(b.build().toByteArray()), null);
  80. }
  81. @Override public void endSlice() throws IOException {
  82. if (channel != null) {
  83. channel.close();
  84. }
  85. }
  86. @Override public void close() throws IOException {
  87. if (file != null) {
  88. fileReadHandle = FileUtil.ensureFinalized(file);
  89. }
  90. }
  91. }
  92. private final String mrJobId;
  93. private final int shardCount;
  94. private final Marshaller<K> keyMarshaller;
  95. private final Marshaller<V> valueMarshaller;
  96. public IntermediateOutput(String mrJobId,
  97. int shardCount,
  98. Marshaller<K> keyMarshaller,
  99. Marshaller<V> valueMarshaller) {
  100. this.mrJobId = checkNotNull(mrJobId, "Null mrJobId");
  101. this.shardCount = shardCount;
  102. this.keyMarshaller = checkNotNull(keyMarshaller, "Null keyMarshaller");
  103. this.valueMarshaller = checkNotNull(valueMarshaller, "Null valueMarshaller");
  104. }
  105. @Override public List<? extends OutputWriter<KeyValue<K, V>>> createWriters() {
  106. ImmutableList.Builder<Writer<K, V>> out = ImmutableList.builder();
  107. for (int i = 0; i < shardCount; i++) {
  108. out.add(new Writer<K, V>(mrJobId, i, keyMarshaller, valueMarshaller));
  109. }
  110. return out.build();
  111. }
  112. @Override public List<AppEngineFile> finish(
  113. List<? extends OutputWriter<KeyValue<K, V>>> writers) {
  114. ImmutableList.Builder<AppEngineFile> out = ImmutableList.builder();
  115. for (OutputWriter<KeyValue<K, V>> w : writers) {
  116. @SuppressWarnings("unchecked")
  117. Writer<K, V> writer = (Writer) w;
  118. if (writer.fileReadHandle != null) {
  119. out.add(writer.fileReadHandle);
  120. }
  121. }
  122. return out.build();
  123. }
  124. }