PageRenderTime 40ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/hadoop-0.18.3/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java

https://github.com/rjurney/Cloud-Stenography
Java | 283 lines | 228 code | 22 blank | 33 comment | 20 complexity | 0b054629c0eb44907d2b5a87624761fc MD5 | raw file
Possible License(s): Apache-2.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.mapred;
  19. import java.io.BufferedReader;
  20. import java.io.DataOutputStream;
  21. import java.io.File;
  22. import java.io.IOException;
  23. import java.io.InputStreamReader;
  24. import java.util.ArrayList;
  25. import java.util.Arrays;
  26. import java.util.List;
  27. import junit.framework.TestCase;
  28. import org.apache.commons.logging.Log;
  29. import org.apache.commons.logging.LogFactory;
  30. import org.apache.hadoop.conf.Configuration;
  31. import org.apache.hadoop.dfs.MiniDFSCluster;
  32. import org.apache.hadoop.dfs.NameNode;
  33. import org.apache.hadoop.examples.WordCount;
  34. import org.apache.hadoop.fs.FileSystem;
  35. import org.apache.hadoop.fs.FileUtil;
  36. import org.apache.hadoop.fs.Path;
  37. import org.apache.hadoop.io.IntWritable;
  38. import org.apache.hadoop.io.Text;
  39. /**
  40. * A JUnit test to test Mini Map-Reduce Cluster with Mini-DFS.
  41. */
  42. public class TestMiniMRWithDFS extends TestCase {
  43. private static final Log LOG =
  44. LogFactory.getLog(TestMiniMRWithDFS.class.getName());
  45. static final int NUM_MAPS = 10;
  46. static final int NUM_SAMPLES = 100000;
  47. public static class TestResult {
  48. public String output;
  49. public RunningJob job;
  50. TestResult(RunningJob job, String output) {
  51. this.job = job;
  52. this.output = output;
  53. }
  54. }
  55. public static TestResult launchWordCount(JobConf conf,
  56. Path inDir,
  57. Path outDir,
  58. String input,
  59. int numMaps,
  60. int numReduces) throws IOException {
  61. FileSystem inFs = inDir.getFileSystem(conf);
  62. FileSystem outFs = outDir.getFileSystem(conf);
  63. outFs.delete(outDir, true);
  64. if (!inFs.mkdirs(inDir)) {
  65. throw new IOException("Mkdirs failed to create " + inDir.toString());
  66. }
  67. {
  68. DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
  69. file.writeBytes(input);
  70. file.close();
  71. }
  72. conf.setJobName("wordcount");
  73. conf.setInputFormat(TextInputFormat.class);
  74. // the keys are words (strings)
  75. conf.setOutputKeyClass(Text.class);
  76. // the values are counts (ints)
  77. conf.setOutputValueClass(IntWritable.class);
  78. conf.setMapperClass(WordCount.MapClass.class);
  79. conf.setCombinerClass(WordCount.Reduce.class);
  80. conf.setReducerClass(WordCount.Reduce.class);
  81. FileInputFormat.setInputPaths(conf, inDir);
  82. FileOutputFormat.setOutputPath(conf, outDir);
  83. conf.setNumMapTasks(numMaps);
  84. conf.setNumReduceTasks(numReduces);
  85. RunningJob job = JobClient.runJob(conf);
  86. return new TestResult(job, readOutput(outDir, conf));
  87. }
  88. public static String readOutput(Path outDir,
  89. JobConf conf) throws IOException {
  90. FileSystem fs = outDir.getFileSystem(conf);
  91. StringBuffer result = new StringBuffer();
  92. {
  93. Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir,
  94. new OutputLogFilter()));
  95. for(int i=0; i < fileList.length; ++i) {
  96. LOG.info("File list[" + i + "]" + ": "+ fileList[i]);
  97. BufferedReader file =
  98. new BufferedReader(new InputStreamReader(fs.open(fileList[i])));
  99. String line = file.readLine();
  100. while (line != null) {
  101. result.append(line);
  102. result.append("\n");
  103. line = file.readLine();
  104. }
  105. file.close();
  106. }
  107. }
  108. return result.toString();
  109. }
  110. /**
  111. * Make sure that there are exactly the directories that we expect to find.
  112. * @param mr the map-reduce cluster
  113. * @param taskDirs the task ids that should be present
  114. */
  115. static void checkTaskDirectories(MiniMRCluster mr,
  116. String[] jobIds,
  117. String[] taskDirs) {
  118. mr.waitUntilIdle();
  119. int trackers = mr.getNumTaskTrackers();
  120. List<String> neededDirs = new ArrayList<String>(Arrays.asList(taskDirs));
  121. boolean[] found = new boolean[taskDirs.length];
  122. for(int i=0; i < trackers; ++i) {
  123. int numNotDel = 0;
  124. File localDir = new File(mr.getTaskTrackerLocalDir(i));
  125. LOG.debug("Tracker directory: " + localDir);
  126. File trackerDir = new File(localDir, "taskTracker");
  127. assertTrue("local dir " + localDir + " does not exist.",
  128. localDir.isDirectory());
  129. assertTrue("task tracker dir " + trackerDir + " does not exist.",
  130. trackerDir.isDirectory());
  131. String contents[] = localDir.list();
  132. String trackerContents[] = trackerDir.list();
  133. for(int j=0; j < contents.length; ++j) {
  134. System.out.println("Local " + localDir + ": " + contents[j]);
  135. }
  136. for(int j=0; j < trackerContents.length; ++j) {
  137. System.out.println("Local jobcache " + trackerDir + ": " + trackerContents[j]);
  138. }
  139. for(int fileIdx = 0; fileIdx < contents.length; ++fileIdx) {
  140. String name = contents[fileIdx];
  141. if (!("taskTracker".equals(contents[fileIdx]))) {
  142. LOG.debug("Looking at " + name);
  143. assertTrue("Spurious directory " + name + " found in " +
  144. localDir, false);
  145. }
  146. }
  147. for (int idx = 0; idx < neededDirs.size(); ++idx) {
  148. String name = neededDirs.get(idx);
  149. if (new File(new File(new File(trackerDir, "jobcache"),
  150. jobIds[idx]), name).isDirectory()) {
  151. found[idx] = true;
  152. numNotDel++;
  153. }
  154. }
  155. }
  156. for(int i=0; i< found.length; i++) {
  157. assertTrue("Directory " + taskDirs[i] + " not found", found[i]);
  158. }
  159. }
  160. static void runPI(MiniMRCluster mr, JobConf jobconf) throws IOException {
  161. LOG.info("runPI");
  162. double estimate = PiEstimator.launch(NUM_MAPS, NUM_SAMPLES, jobconf);
  163. double error = Math.abs(Math.PI - estimate);
  164. assertTrue("Error in PI estimation "+error+" exceeds 0.01", (error < 0.01));
  165. checkTaskDirectories(mr, new String[]{}, new String[]{});
  166. }
  167. static void runWordCount(MiniMRCluster mr, JobConf jobConf) throws IOException {
  168. LOG.info("runWordCount");
  169. // Run a word count example
  170. // Keeping tasks that match this pattern
  171. jobConf.setKeepTaskFilesPattern(TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null));
  172. TestResult result;
  173. final Path inDir = new Path("./wc/input");
  174. final Path outDir = new Path("./wc/output");
  175. String input = "The quick brown fox\nhas many silly\nred fox sox\n";
  176. result = launchWordCount(jobConf, inDir, outDir, input, 3, 1);
  177. assertEquals("The\t1\nbrown\t1\nfox\t2\nhas\t1\nmany\t1\n" +
  178. "quick\t1\nred\t1\nsilly\t1\nsox\t1\n", result.output);
  179. JobID jobid = result.job.getID();
  180. TaskAttemptID taskid = new TaskAttemptID(new TaskID(jobid, true, 1),0);
  181. checkTaskDirectories(mr, new String[]{jobid.toString()}, new String[]{taskid.toString()});
  182. // test with maps=0
  183. jobConf = mr.createJobConf();
  184. input = "owen is oom";
  185. result = launchWordCount(jobConf, inDir, outDir, input, 0, 1);
  186. assertEquals("is\t1\noom\t1\nowen\t1\n", result.output);
  187. Counters counters = result.job.getCounters();
  188. long hdfsRead =
  189. counters.findCounter(Task.FileSystemCounter.HDFS_READ).getCounter();
  190. long hdfsWrite =
  191. counters.findCounter(Task.FileSystemCounter.HDFS_WRITE).getCounter();
  192. assertEquals(result.output.length(), hdfsWrite);
  193. assertEquals(input.length(), hdfsRead);
  194. // Run a job with input and output going to localfs even though the
  195. // default fs is hdfs.
  196. {
  197. FileSystem localfs = FileSystem.getLocal(jobConf);
  198. String TEST_ROOT_DIR =
  199. new File(System.getProperty("test.build.data","/tmp"))
  200. .toString().replace(' ', '+');
  201. Path localIn = localfs.makeQualified
  202. (new Path(TEST_ROOT_DIR + "/local/in"));
  203. Path localOut = localfs.makeQualified
  204. (new Path(TEST_ROOT_DIR + "/local/out"));
  205. result = launchWordCount(jobConf, localIn, localOut,
  206. "all your base belong to us", 1, 1);
  207. assertEquals("all\t1\nbase\t1\nbelong\t1\nto\t1\nus\t1\nyour\t1\n",
  208. result.output);
  209. assertTrue("outputs on localfs", localfs.exists(localOut));
  210. }
  211. }
  212. public void testWithDFS() throws IOException {
  213. MiniDFSCluster dfs = null;
  214. MiniMRCluster mr = null;
  215. FileSystem fileSys = null;
  216. try {
  217. final int taskTrackers = 4;
  218. Configuration conf = new Configuration();
  219. dfs = new MiniDFSCluster(conf, 4, true, null);
  220. fileSys = dfs.getFileSystem();
  221. mr = new MiniMRCluster(taskTrackers, fileSys.getUri().toString(), 1);
  222. runPI(mr, mr.createJobConf());
  223. runWordCount(mr, mr.createJobConf());
  224. } finally {
  225. if (dfs != null) { dfs.shutdown(); }
  226. if (mr != null) { mr.shutdown();
  227. }
  228. }
  229. }
  230. public void testWithDFSWithDefaultPort() throws IOException {
  231. MiniDFSCluster dfs = null;
  232. MiniMRCluster mr = null;
  233. FileSystem fileSys = null;
  234. try {
  235. final int taskTrackers = 4;
  236. Configuration conf = new Configuration();
  237. // start a dfs with the default port number
  238. dfs = new MiniDFSCluster(
  239. NameNode.DEFAULT_PORT, conf, 4, true, true, null, null);
  240. fileSys = dfs.getFileSystem();
  241. mr = new MiniMRCluster(taskTrackers, fileSys.getUri().toString(), 1);
  242. JobConf jobConf = mr.createJobConf();
  243. TestResult result;
  244. final Path inDir = new Path("./wc/input");
  245. final Path outDir = new Path("hdfs://" +
  246. dfs.getNameNode().getNameNodeAddress().getHostName() +
  247. ":" + NameNode.DEFAULT_PORT +"/./wc/output");
  248. String input = "The quick brown fox\nhas many silly\nred fox sox\n";
  249. result = launchWordCount(jobConf, inDir, outDir, input, 3, 1);
  250. assertEquals("The\t1\nbrown\t1\nfox\t2\nhas\t1\nmany\t1\n" +
  251. "quick\t1\nred\t1\nsilly\t1\nsox\t1\n", result.output);
  252. } catch (java.net.BindException be) {
  253. LOG.info("Skip the test this time because can not start namenode on port "
  254. + NameNode.DEFAULT_PORT, be);
  255. } finally {
  256. if (dfs != null) { dfs.shutdown(); }
  257. if (mr != null) { mr.shutdown();
  258. }
  259. }
  260. }
  261. }