PageRenderTime 48ms CodeModel.GetById 34ms app.highlight 11ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java

#
Java | 243 lines | 148 code | 43 blank | 52 comment | 5 complexity | 0c163075cde1489a0fad1b322a73d3eb MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18package org.apache.hadoop.hive.ql.io;
 19
 20import java.io.IOException;
 21import java.io.OutputStreamWriter;
 22import java.util.ArrayList;
 23import java.util.List;
 24
 25import junit.framework.TestCase;
 26
 27import org.apache.commons.logging.Log;
 28import org.apache.commons.logging.LogFactory;
 29import org.apache.hadoop.conf.Configuration;
 30import org.apache.hadoop.fs.ContentSummary;
 31import org.apache.hadoop.fs.FileSystem;
 32import org.apache.hadoop.fs.Path;
 33import org.apache.hadoop.io.LongWritable;
 34import org.apache.hadoop.io.Text;
 35import org.apache.hadoop.mapred.FileInputFormat;
 36import org.apache.hadoop.mapred.InputSplit;
 37import org.apache.hadoop.mapred.JobConf;
 38import org.apache.hadoop.mapred.RecordReader;
 39import org.apache.hadoop.mapred.Reporter;
 40
 41/**
 42 * Unittest for SymlinkTextInputFormat.
 43 */
 44@SuppressWarnings("deprecation")
 45public class TestSymlinkTextInputFormat extends TestCase {
 46  private static Log log =
 47      LogFactory.getLog(TestSymlinkTextInputFormat.class);
 48
 49  private Configuration conf;
 50  private JobConf job;
 51  private FileSystem fileSystem;
 52  private Path testDir;
 53  Reporter reporter;
 54
 55  private Path dataDir1;
 56  private Path dataDir2;
 57  private Path symlinkDir;
 58
 59  @Override
 60  protected void setUp() throws IOException {
 61    conf = new Configuration();
 62    job = new JobConf(conf);
 63    fileSystem = FileSystem.getLocal(conf);
 64    testDir = new Path(System.getProperty("test.data.dir", ".") +
 65                       "/TestSymlinkTextInputFormat");
 66    reporter = Reporter.NULL;
 67    fileSystem.delete(testDir, true);
 68
 69    dataDir1 = new Path(testDir, "datadir1");
 70    dataDir2 = new Path(testDir, "datadir2");
 71    symlinkDir = new Path(testDir, "symlinkdir");
 72  }
 73
 74  @Override
 75  protected void tearDown() throws IOException {
 76    fileSystem.delete(testDir, true);
 77  }
 78
 79  /**
 80   * Test scenario: Two data directories, one symlink file that contains two
 81   * paths each point to a file in one of data directories.
 82   */
 83  public void testAccuracy1() throws IOException {
 84    // First data dir, contains 2 files.
 85    
 86    FileSystem fs = dataDir1.getFileSystem(job);
 87    int symbolLinkedFileSize = 0;
 88    
 89    Path dir1_file1 = new Path(dataDir1, "file1");
 90    writeTextFile(dir1_file1,
 91                  "dir1_file1_line1\n" +
 92                  "dir1_file1_line2\n");
 93    
 94    symbolLinkedFileSize += fs.getFileStatus(dir1_file1).getLen();
 95    
 96    Path dir1_file2 = new Path(dataDir1, "file2");
 97    writeTextFile(dir1_file2,
 98                  "dir1_file2_line1\n" +
 99                  "dir1_file2_line2\n");
100    
101    // Second data dir, contains 2 files.
102    
103    Path dir2_file1 = new Path(dataDir2, "file1");
104    writeTextFile(dir2_file1,
105                  "dir2_file1_line1\n" +
106                  "dir2_file1_line2\n");
107    
108    Path dir2_file2 = new Path(dataDir2, "file2");
109    writeTextFile(dir2_file2,
110                  "dir2_file2_line1\n" +
111                  "dir2_file2_line2\n");
112
113    symbolLinkedFileSize += fs.getFileStatus(dir2_file2).getLen();
114    
115    // A symlink file, contains first file from first dir and second file from
116    // second dir.
117    writeSymlinkFile(
118        new Path(symlinkDir, "symlink_file"),
119        new Path(dataDir1, "file1"),
120        new Path(dataDir2, "file2"));
121    
122    SymlinkTextInputFormat inputFormat = new SymlinkTextInputFormat();
123    
124    //test content summary
125    ContentSummary cs = inputFormat.getContentSummary(symlinkDir, job);
126    
127    assertEquals(symbolLinkedFileSize, cs.getLength());
128    assertEquals(2, cs.getFileCount());
129    assertEquals(0, cs.getDirectoryCount());
130
131    FileInputFormat.setInputPaths(job, symlinkDir);
132    InputSplit[] splits = inputFormat.getSplits(job, 2);
133
134    log.info("Number of splits: " + splits.length);
135
136    // Read all values.
137    List<String> received = new ArrayList<String>();
138    for (InputSplit split : splits) {
139      RecordReader<LongWritable, Text> reader =
140          inputFormat.getRecordReader(split, job, reporter);
141
142      LongWritable key = reader.createKey();
143      Text value = reader.createValue();
144      while (reader.next(key, value)) {
145        received.add(value.toString());
146      }
147    }
148
149    List<String> expected = new ArrayList<String>();
150    expected.add("dir1_file1_line1");
151    expected.add("dir1_file1_line2");
152    expected.add("dir2_file2_line1");
153    expected.add("dir2_file2_line2");
154
155    assertEquals(expected, received);
156  }
157
158  /**
159   * Scenario: Empty input directory, i.e. no symlink file.
160   *
161   * Expected: Should return empty result set without any exception.
162   */
163  public void testAccuracy2() throws IOException {
164    fileSystem.mkdirs(symlinkDir);
165
166    FileInputFormat.setInputPaths(job, symlinkDir);
167
168    SymlinkTextInputFormat inputFormat = new SymlinkTextInputFormat();
169    
170    ContentSummary cs = inputFormat.getContentSummary(symlinkDir, job);
171    
172    assertEquals(0, cs.getLength());
173    assertEquals(0, cs.getFileCount());
174    assertEquals(0, cs.getDirectoryCount());
175    
176    InputSplit[] splits = inputFormat.getSplits(job, 2);
177
178    log.info("Number of splits: " + splits.length);
179
180    // Read all values.
181    List<String> received = new ArrayList<String>();
182    for (InputSplit split : splits) {
183      RecordReader<LongWritable, Text> reader =
184          inputFormat.getRecordReader(split, job, reporter);
185
186      LongWritable key = reader.createKey();
187      Text value = reader.createValue();
188      while (reader.next(key, value)) {
189        received.add(value.toString());
190      }
191    }
192
193    List<String> expected = new ArrayList<String>();
194
195    assertEquals(expected, received);
196  }
197
198  /**
199   * Scenario: No job input paths.
200   * Expected: IOException with proper message.
201   */
202  public void testFailure() {
203    SymlinkTextInputFormat inputFormat = new SymlinkTextInputFormat();
204
205    try {
206      inputFormat.getSplits(job, 2);
207      fail("IOException expected if no job input paths specified.");
208    } catch (IOException e) {
209      assertEquals("Incorrect exception message for no job input paths error.",
210                   "No input paths specified in job.",
211                   e.getMessage());
212    }
213  }
214
215  /**
216   * Writes the given string to the given file.
217   */
218  private void writeTextFile(Path file, String content) throws IOException {
219    OutputStreamWriter writer = new OutputStreamWriter(fileSystem.create(file));
220    writer.write(content);
221    writer.close();
222  }
223
224  /**
225   * Writes a symlink file that contains given list of paths.
226   *
227   * @param symlinkFile
228   * The symlink file to write.
229   *
230   * @param paths
231   * The list of paths to write to the symlink file.
232   */
233  private void writeSymlinkFile(Path symlinkFile, Path...paths)
234      throws IOException {
235    OutputStreamWriter writer =
236        new OutputStreamWriter(fileSystem.create(symlinkFile));
237    for (Path path : paths) {
238      writer.write(path.toString());
239      writer.write("\n");
240    }
241    writer.close();
242  }
243}