PageRenderTime 45ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/hadoop-0.18.3/src/test/org/apache/hadoop/mapred/MRBench.java

https://github.com/rjurney/Cloud-Stenography
Java | 308 lines | 210 code | 31 blank | 67 comment | 38 complexity | 952a5f1b2b8d4724207f1f5765bf787f MD5 | raw file
Possible License(s): Apache-2.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.mapred;
  19. import java.io.IOException;
  20. import java.io.PrintStream;
  21. import java.util.ArrayList;
  22. import java.util.Iterator;
  23. import java.util.Random;
  24. import org.apache.commons.logging.Log;
  25. import org.apache.commons.logging.LogFactory;
  26. import org.apache.hadoop.fs.FileSystem;
  27. import org.apache.hadoop.fs.Path;
  28. import org.apache.hadoop.io.UTF8;
  29. import org.apache.hadoop.io.WritableComparable;
  30. import org.apache.hadoop.io.Text;
  31. /**
  32. * Runs a job multiple times and takes average of all runs.
  33. */
  34. public class MRBench {
  35. private static final Log LOG = LogFactory.getLog(MRBench.class);
  36. private static Path BASE_DIR =
  37. new Path(System.getProperty("test.build.data","/benchmarks/MRBench"));
  38. private static Path INPUT_DIR = new Path(BASE_DIR, "mr_input");
  39. private static Path OUTPUT_DIR = new Path(BASE_DIR, "mr_output");
  40. public static enum Order {RANDOM, ASCENDING, DESCENDING};
  41. /**
  42. * Takes input format as text lines, runs some processing on it and
  43. * writes out data as text again.
  44. */
  45. public static class Map extends MapReduceBase
  46. implements Mapper<WritableComparable, Text, UTF8, UTF8> {
  47. public void map(WritableComparable key, Text value,
  48. OutputCollector<UTF8, UTF8> output,
  49. Reporter reporter) throws IOException
  50. {
  51. String line = value.toString();
  52. output.collect(new UTF8(process(line)), new UTF8(""));
  53. }
  54. public String process(String line) {
  55. return line;
  56. }
  57. }
  58. /**
  59. * Ignores the key and writes values to the output.
  60. */
  61. public static class Reduce extends MapReduceBase
  62. implements Reducer<UTF8, UTF8, UTF8, UTF8> {
  63. public void reduce(UTF8 key, Iterator<UTF8> values,
  64. OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException
  65. {
  66. while(values.hasNext()) {
  67. output.collect(key, new UTF8(values.next().toString()));
  68. }
  69. }
  70. }
  71. /**
  72. * Generate a text file on the given filesystem with the given path name.
  73. * The text file will contain the given number of lines of generated data.
  74. * The generated data are string representations of numbers. Each line
  75. * is the same length, which is achieved by padding each number with
  76. * an appropriate number of leading '0' (zero) characters. The order of
  77. * generated data is one of ascending, descending, or random.
  78. */
  79. public static void generateTextFile(FileSystem fs, Path inputFile,
  80. long numLines, Order sortOrder) throws IOException
  81. {
  82. LOG.info("creating control file: "+numLines+" numLines, "+sortOrder+" sortOrder");
  83. PrintStream output = null;
  84. try {
  85. output = new PrintStream(fs.create(inputFile));
  86. int padding = String.valueOf(numLines).length();
  87. switch(sortOrder) {
  88. case RANDOM:
  89. for (long l = 0; l < numLines; l++) {
  90. output.println(pad((new Random()).nextLong(), padding));
  91. }
  92. break;
  93. case ASCENDING:
  94. for (long l = 0; l < numLines; l++) {
  95. output.println(pad(l, padding));
  96. }
  97. break;
  98. case DESCENDING:
  99. for (long l = numLines; l > 0; l--) {
  100. output.println(pad(l, padding));
  101. }
  102. break;
  103. }
  104. } finally {
  105. if (output != null)
  106. output.close();
  107. }
  108. LOG.info("created control file: " + inputFile);
  109. }
  110. /**
  111. * Convert the given number to a string and pad the number with
  112. * leading '0' (zero) characters so that the string is exactly
  113. * the given length.
  114. */
  115. private static String pad(long number, int length) {
  116. String str = String.valueOf(number);
  117. StringBuffer value = new StringBuffer();
  118. for (int i = str.length(); i < length; i++) {
  119. value.append("0");
  120. }
  121. value.append(str);
  122. return value.toString();
  123. }
  124. /**
  125. * Create the job configuration.
  126. */
  127. private static JobConf setupJob(int numMaps, int numReduces, String jarFile) {
  128. JobConf jobConf = new JobConf(MRBench.class);
  129. FileInputFormat.addInputPath(jobConf, INPUT_DIR);
  130. jobConf.setInputFormat(TextInputFormat.class);
  131. jobConf.setOutputFormat(TextOutputFormat.class);
  132. jobConf.setOutputValueClass(UTF8.class);
  133. jobConf.setMapOutputKeyClass(UTF8.class);
  134. jobConf.setMapOutputValueClass(UTF8.class);
  135. if (null != jarFile) {
  136. jobConf.setJar(jarFile);
  137. }
  138. jobConf.setMapperClass(Map.class);
  139. jobConf.setReducerClass(Reduce.class);
  140. jobConf.setNumMapTasks(numMaps);
  141. jobConf.setNumReduceTasks(numReduces);
  142. return jobConf;
  143. }
  144. /**
  145. * Runs a MapReduce task, given number of times. The input to each run
  146. * is the same file.
  147. */
  148. private static ArrayList<Long> runJobInSequence(JobConf masterJobConf, int numRuns) throws IOException {
  149. Path intrimData = null;
  150. Random rand = new Random();
  151. ArrayList<Long> execTimes = new ArrayList<Long>();
  152. for (int i = 0; i < numRuns; i++) {
  153. // create a new job conf every time, reusing same object does not work
  154. JobConf jobConf = new JobConf(masterJobConf);
  155. // reset the job jar because the copy constructor doesn't
  156. jobConf.setJar(masterJobConf.getJar());
  157. // give a new random name to output of the mapred tasks
  158. FileOutputFormat.setOutputPath(jobConf,
  159. new Path(OUTPUT_DIR, "output_" + rand.nextInt()));
  160. LOG.info("Running job " + i + ":" +
  161. " input=" + FileInputFormat.getInputPaths(jobConf)[0] +
  162. " output=" + FileOutputFormat.getOutputPath(jobConf));
  163. // run the mapred task now
  164. long curTime = System.currentTimeMillis();
  165. JobClient.runJob(jobConf);
  166. execTimes.add(new Long(System.currentTimeMillis() - curTime));
  167. }
  168. return execTimes;
  169. }
  170. /**
  171. * <pre>
  172. * Usage: mrbench
  173. * [-baseDir <base DFS path for output/input, default is /benchmarks/MRBench>]
  174. * [-jar <local path to job jar file containing Mapper and Reducer implementations, default is current jar file>]
  175. * [-numRuns <number of times to run the job, default is 1>]
  176. * [-maps <number of maps for each run, default is 2>]
  177. * [-reduces <number of reduces for each run, default is 1>]
  178. * [-inputLines <number of input lines to generate, default is 1>]
  179. * [-inputType <type of input to generate, one of ascending (default), descending, random>]
  180. * [-verbose]
  181. * </pre>
  182. */
  183. public static void main (String[] args) throws IOException {
  184. String version = "MRBenchmark.0.0.2";
  185. System.out.println(version);
  186. String usage =
  187. "Usage: mrbench " +
  188. "[-baseDir <base DFS path for output/input, default is /benchmarks/MRBench>] " +
  189. "[-jar <local path to job jar file containing Mapper and Reducer implementations, default is current jar file>] " +
  190. "[-numRuns <number of times to run the job, default is 1>] " +
  191. "[-maps <number of maps for each run, default is 2>] " +
  192. "[-reduces <number of reduces for each run, default is 1>] " +
  193. "[-inputLines <number of input lines to generate, default is 1>] " +
  194. "[-inputType <type of input to generate, one of ascending (default), descending, random>] " +
  195. "[-verbose]";
  196. String jarFile = null;
  197. int inputLines = 1;
  198. int numRuns = 1;
  199. int numMaps = 2;
  200. int numReduces = 1;
  201. boolean verbose = false;
  202. Order inputSortOrder = Order.ASCENDING;
  203. for (int i = 0; i < args.length; i++) { // parse command line
  204. if (args[i].equals("-jar")) {
  205. jarFile = args[++i];
  206. } else if (args[i].equals("-numRuns")) {
  207. numRuns = Integer.parseInt(args[++i]);
  208. } else if (args[i].equals("-baseDir")) {
  209. BASE_DIR = new Path(args[++i]);
  210. } else if (args[i].equals("-maps")) {
  211. numMaps = Integer.parseInt(args[++i]);
  212. } else if (args[i].equals("-reduces")) {
  213. numReduces = Integer.parseInt(args[++i]);
  214. } else if (args[i].equals("-inputLines")) {
  215. inputLines = Integer.parseInt(args[++i]);
  216. } else if (args[i].equals("-inputType")) {
  217. String s = args[++i];
  218. if (s.equalsIgnoreCase("ascending")) {
  219. inputSortOrder = Order.ASCENDING;
  220. } else if (s.equalsIgnoreCase("descending")) {
  221. inputSortOrder = Order.DESCENDING;
  222. } else if (s.equalsIgnoreCase("random")) {
  223. inputSortOrder = Order.RANDOM;
  224. } else {
  225. inputSortOrder = null;
  226. }
  227. } else if (args[i].equals("-verbose")) {
  228. verbose = true;
  229. } else {
  230. System.err.println(usage);
  231. System.exit(-1);
  232. }
  233. }
  234. if (numRuns < 1 || // verify args
  235. numMaps < 1 ||
  236. numReduces < 1 ||
  237. inputLines < 0 ||
  238. inputSortOrder == null)
  239. {
  240. System.err.println(usage);
  241. System.exit(-1);
  242. }
  243. JobConf jobConf = setupJob(numMaps, numReduces, jarFile);
  244. FileSystem fs = FileSystem.get(jobConf);
  245. Path inputFile = new Path(INPUT_DIR, "input_" + (new Random()).nextInt() + ".txt");
  246. generateTextFile(fs, inputFile, inputLines, inputSortOrder);
  247. // setup test output directory
  248. fs.mkdirs(BASE_DIR);
  249. ArrayList<Long> execTimes = new ArrayList<Long>();
  250. try {
  251. execTimes = runJobInSequence(jobConf, numRuns);
  252. } finally {
  253. // delete output -- should we really do this?
  254. fs.delete(BASE_DIR, true);
  255. }
  256. if (verbose) {
  257. // Print out a report
  258. System.out.println("Total MapReduce jobs executed: " + numRuns);
  259. System.out.println("Total lines of data per job: " + inputLines);
  260. System.out.println("Maps per job: " + numMaps);
  261. System.out.println("Reduces per job: " + numReduces);
  262. }
  263. int i = 0;
  264. long totalTime = 0;
  265. for (Long time : execTimes) {
  266. totalTime += time.longValue();
  267. if (verbose) {
  268. System.out.println("Total milliseconds for task: " + (++i) +
  269. " = " + time);
  270. }
  271. }
  272. long avgTime = totalTime / numRuns;
  273. System.out.println("DataLines\tMaps\tReduces\tAvgTime (milliseconds)");
  274. System.out.println(inputLines + "\t\t" + numMaps + "\t" +
  275. numReduces + "\t" + avgTime);
  276. }
  277. }