PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java

#
Java | 243 lines | 148 code | 43 blank | 52 comment | 5 complexity | 0c163075cde1489a0fad1b322a73d3eb MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.io;
  19. import java.io.IOException;
  20. import java.io.OutputStreamWriter;
  21. import java.util.ArrayList;
  22. import java.util.List;
  23. import junit.framework.TestCase;
  24. import org.apache.commons.logging.Log;
  25. import org.apache.commons.logging.LogFactory;
  26. import org.apache.hadoop.conf.Configuration;
  27. import org.apache.hadoop.fs.ContentSummary;
  28. import org.apache.hadoop.fs.FileSystem;
  29. import org.apache.hadoop.fs.Path;
  30. import org.apache.hadoop.io.LongWritable;
  31. import org.apache.hadoop.io.Text;
  32. import org.apache.hadoop.mapred.FileInputFormat;
  33. import org.apache.hadoop.mapred.InputSplit;
  34. import org.apache.hadoop.mapred.JobConf;
  35. import org.apache.hadoop.mapred.RecordReader;
  36. import org.apache.hadoop.mapred.Reporter;
  37. /**
  38. * Unittest for SymlinkTextInputFormat.
  39. */
  40. @SuppressWarnings("deprecation")
  41. public class TestSymlinkTextInputFormat extends TestCase {
  42. private static Log log =
  43. LogFactory.getLog(TestSymlinkTextInputFormat.class);
  44. private Configuration conf;
  45. private JobConf job;
  46. private FileSystem fileSystem;
  47. private Path testDir;
  48. Reporter reporter;
  49. private Path dataDir1;
  50. private Path dataDir2;
  51. private Path symlinkDir;
  52. @Override
  53. protected void setUp() throws IOException {
  54. conf = new Configuration();
  55. job = new JobConf(conf);
  56. fileSystem = FileSystem.getLocal(conf);
  57. testDir = new Path(System.getProperty("test.data.dir", ".") +
  58. "/TestSymlinkTextInputFormat");
  59. reporter = Reporter.NULL;
  60. fileSystem.delete(testDir, true);
  61. dataDir1 = new Path(testDir, "datadir1");
  62. dataDir2 = new Path(testDir, "datadir2");
  63. symlinkDir = new Path(testDir, "symlinkdir");
  64. }
  65. @Override
  66. protected void tearDown() throws IOException {
  67. fileSystem.delete(testDir, true);
  68. }
  69. /**
  70. * Test scenario: Two data directories, one symlink file that contains two
  71. * paths each point to a file in one of data directories.
  72. */
  73. public void testAccuracy1() throws IOException {
  74. // First data dir, contains 2 files.
  75. FileSystem fs = dataDir1.getFileSystem(job);
  76. int symbolLinkedFileSize = 0;
  77. Path dir1_file1 = new Path(dataDir1, "file1");
  78. writeTextFile(dir1_file1,
  79. "dir1_file1_line1\n" +
  80. "dir1_file1_line2\n");
  81. symbolLinkedFileSize += fs.getFileStatus(dir1_file1).getLen();
  82. Path dir1_file2 = new Path(dataDir1, "file2");
  83. writeTextFile(dir1_file2,
  84. "dir1_file2_line1\n" +
  85. "dir1_file2_line2\n");
  86. // Second data dir, contains 2 files.
  87. Path dir2_file1 = new Path(dataDir2, "file1");
  88. writeTextFile(dir2_file1,
  89. "dir2_file1_line1\n" +
  90. "dir2_file1_line2\n");
  91. Path dir2_file2 = new Path(dataDir2, "file2");
  92. writeTextFile(dir2_file2,
  93. "dir2_file2_line1\n" +
  94. "dir2_file2_line2\n");
  95. symbolLinkedFileSize += fs.getFileStatus(dir2_file2).getLen();
  96. // A symlink file, contains first file from first dir and second file from
  97. // second dir.
  98. writeSymlinkFile(
  99. new Path(symlinkDir, "symlink_file"),
  100. new Path(dataDir1, "file1"),
  101. new Path(dataDir2, "file2"));
  102. SymlinkTextInputFormat inputFormat = new SymlinkTextInputFormat();
  103. //test content summary
  104. ContentSummary cs = inputFormat.getContentSummary(symlinkDir, job);
  105. assertEquals(symbolLinkedFileSize, cs.getLength());
  106. assertEquals(2, cs.getFileCount());
  107. assertEquals(0, cs.getDirectoryCount());
  108. FileInputFormat.setInputPaths(job, symlinkDir);
  109. InputSplit[] splits = inputFormat.getSplits(job, 2);
  110. log.info("Number of splits: " + splits.length);
  111. // Read all values.
  112. List<String> received = new ArrayList<String>();
  113. for (InputSplit split : splits) {
  114. RecordReader<LongWritable, Text> reader =
  115. inputFormat.getRecordReader(split, job, reporter);
  116. LongWritable key = reader.createKey();
  117. Text value = reader.createValue();
  118. while (reader.next(key, value)) {
  119. received.add(value.toString());
  120. }
  121. }
  122. List<String> expected = new ArrayList<String>();
  123. expected.add("dir1_file1_line1");
  124. expected.add("dir1_file1_line2");
  125. expected.add("dir2_file2_line1");
  126. expected.add("dir2_file2_line2");
  127. assertEquals(expected, received);
  128. }
  129. /**
  130. * Scenario: Empty input directory, i.e. no symlink file.
  131. *
  132. * Expected: Should return empty result set without any exception.
  133. */
  134. public void testAccuracy2() throws IOException {
  135. fileSystem.mkdirs(symlinkDir);
  136. FileInputFormat.setInputPaths(job, symlinkDir);
  137. SymlinkTextInputFormat inputFormat = new SymlinkTextInputFormat();
  138. ContentSummary cs = inputFormat.getContentSummary(symlinkDir, job);
  139. assertEquals(0, cs.getLength());
  140. assertEquals(0, cs.getFileCount());
  141. assertEquals(0, cs.getDirectoryCount());
  142. InputSplit[] splits = inputFormat.getSplits(job, 2);
  143. log.info("Number of splits: " + splits.length);
  144. // Read all values.
  145. List<String> received = new ArrayList<String>();
  146. for (InputSplit split : splits) {
  147. RecordReader<LongWritable, Text> reader =
  148. inputFormat.getRecordReader(split, job, reporter);
  149. LongWritable key = reader.createKey();
  150. Text value = reader.createValue();
  151. while (reader.next(key, value)) {
  152. received.add(value.toString());
  153. }
  154. }
  155. List<String> expected = new ArrayList<String>();
  156. assertEquals(expected, received);
  157. }
  158. /**
  159. * Scenario: No job input paths.
  160. * Expected: IOException with proper message.
  161. */
  162. public void testFailure() {
  163. SymlinkTextInputFormat inputFormat = new SymlinkTextInputFormat();
  164. try {
  165. inputFormat.getSplits(job, 2);
  166. fail("IOException expected if no job input paths specified.");
  167. } catch (IOException e) {
  168. assertEquals("Incorrect exception message for no job input paths error.",
  169. "No input paths specified in job.",
  170. e.getMessage());
  171. }
  172. }
  173. /**
  174. * Writes the given string to the given file.
  175. */
  176. private void writeTextFile(Path file, String content) throws IOException {
  177. OutputStreamWriter writer = new OutputStreamWriter(fileSystem.create(file));
  178. writer.write(content);
  179. writer.close();
  180. }
  181. /**
  182. * Writes a symlink file that contains given list of paths.
  183. *
  184. * @param symlinkFile
  185. * The symlink file to write.
  186. *
  187. * @param paths
  188. * The list of paths to write to the symlink file.
  189. */
  190. private void writeSymlinkFile(Path symlinkFile, Path...paths)
  191. throws IOException {
  192. OutputStreamWriter writer =
  193. new OutputStreamWriter(fileSystem.create(symlinkFile));
  194. for (Path path : paths) {
  195. writer.write(path.toString());
  196. writer.write("\n");
  197. }
  198. writer.close();
  199. }
  200. }