PageRenderTime 34ms CodeModel.GetById 11ms app.highlight 19ms RepoModel.GetById 2ms app.codeStats 0ms

/tags/release-0.2.0-rc0/hive/external/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java

#
Java | 394 lines | 295 code | 61 blank | 38 comment | 46 complexity | 0df830a1c117dc61563b9e86262d5a34 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18package org.apache.hadoop.hive.ql.io;
 19
 20import java.io.IOException;
 21import java.util.Random;
 22
 23import junit.framework.TestCase;
 24
 25import org.apache.hadoop.conf.Configuration;
 26import org.apache.hadoop.fs.FileSystem;
 27import org.apache.hadoop.fs.Path;
 28import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 29import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
 30import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
 31import org.apache.hadoop.hive.serde2.io.ByteWritable;
 32import org.apache.hadoop.io.LongWritable;
 33import org.apache.hadoop.io.SequenceFile;
 34import org.apache.hadoop.io.SequenceFile.CompressionType;
 35import org.apache.hadoop.io.compress.CompressionCodec;
 36import org.apache.hadoop.io.compress.DefaultCodec;
 37
 38/**
 39 * PerformTestRCFileAndSeqFile.
 40 *
 41 */
 42public class PerformTestRCFileAndSeqFile extends TestCase {
 43
 44  private final Configuration conf = new Configuration();
 45
 46  private Path testRCFile;
 47  private Path testSeqFile;
 48
 49  private FileSystem fs;
 50
 51  int columnMaxSize = 30;
 52
 53  Random randomCharGenerator = new Random(3);
 54
 55  Random randColLenGenerator = new Random(20);
 56
 57  public PerformTestRCFileAndSeqFile(boolean local, String file)
 58      throws IOException {
 59    if (local) {
 60      fs = FileSystem.getLocal(conf);
 61    } else {
 62      fs = FileSystem.get(conf);
 63    }
 64    conf.setInt(RCFile.Writer.COLUMNS_BUFFER_SIZE_CONF_STR, 1 * 1024 * 1024);
 65    if (file == null) {
 66      Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred");
 67      testRCFile = new Path(dir, "test_rcfile");
 68      testSeqFile = new Path(dir, "test_seqfile");
 69    } else {
 70      testRCFile = new Path(file + "-rcfile");
 71      testSeqFile = new Path(file + "-seqfile");
 72    }
 73    fs.delete(testRCFile, true);
 74    fs.delete(testSeqFile, true);
 75    System.out.println("RCFile:" + testRCFile.toString());
 76    System.out.println("SequenceFile:" + testSeqFile.toString());
 77  }
 78
 79  private void writeSeqenceFileTest(FileSystem fs, int rowCount, Path file,
 80      int columnNum, CompressionCodec codec) throws IOException {
 81
 82    byte[][] columnRandom;
 83
 84    resetRandomGenerators();
 85
 86    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
 87    columnRandom = new byte[columnNum][];
 88    for (int i = 0; i < columnNum; i++) {
 89      BytesRefWritable cu = new BytesRefWritable();
 90      bytes.set(i, cu);
 91    }
 92
 93    // zero length key is not allowed by block compress writer, so we use a byte
 94    // writable
 95    ByteWritable key = new ByteWritable();
 96    SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, file,
 97        ByteWritable.class, BytesRefArrayWritable.class, CompressionType.BLOCK,
 98        codec);
 99
100    for (int i = 0; i < rowCount; i++) {
101      nextRandomRow(columnRandom, bytes);
102      seqWriter.append(key, bytes);
103    }
104    seqWriter.close();
105  }
106
107  private void resetRandomGenerators() {
108    randomCharGenerator = new Random(3);
109    randColLenGenerator = new Random(20);
110  }
111
112  private void writeRCFileTest(FileSystem fs, int rowCount, Path file,
113      int columnNum, CompressionCodec codec) throws IOException {
114    fs.delete(file, true);
115
116    resetRandomGenerators();
117
118    RCFileOutputFormat.setColumnNumber(conf, columnNum);
119    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
120
121    byte[][] columnRandom;
122
123    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
124    columnRandom = new byte[columnNum][];
125    for (int i = 0; i < columnNum; i++) {
126      BytesRefWritable cu = new BytesRefWritable();
127      bytes.set(i, cu);
128    }
129
130    for (int i = 0; i < rowCount; i++) {
131      nextRandomRow(columnRandom, bytes);
132      writer.append(bytes);
133    }
134    writer.close();
135  }
136
137  private void nextRandomRow(byte[][] row, BytesRefArrayWritable bytes) {
138    bytes.resetValid(row.length);
139    for (int i = 0; i < row.length; i++) {
140      int len = Math.abs(randColLenGenerator.nextInt(columnMaxSize));
141      row[i] = new byte[len];
142      for (int j = 0; j < len; j++) {
143        row[i][j] = getRandomChar(randomCharGenerator);
144      }
145      bytes.get(i).set(row[i], 0, len);
146    }
147  }
148
149  private static int CHAR_END = 122 - 7;
150
151  private byte getRandomChar(Random random) {
152    byte b = 0;
153    do {
154      b = (byte) random.nextInt(CHAR_END);
155    } while ((b < 65));
156    if (b > 90) {
157      b += 7;
158    }
159    return b;
160  }
161
162  public static void main(String[] args) throws Exception {
163    int count = 1000;
164    String file = null;
165
166    try {
167      for (int i = 0; i < args.length; ++i) { // parse command line
168        if (args[i] == null) {
169          continue;
170        } else if (args[i].equals("-count")) {
171          count = Integer.parseInt(args[++i]);
172        } else {
173          // file is required parameter
174          file = args[i];
175        }
176      }
177
178      // change it to choose the appropriate file system
179      boolean isLocalFS = true;
180
181      PerformTestRCFileAndSeqFile testcase = new PerformTestRCFileAndSeqFile(
182          isLocalFS, file);
183
184      // change these parameters
185      boolean checkCorrect = true;
186      CompressionCodec codec = new DefaultCodec();
187      testcase.columnMaxSize = 30;
188
189      // testcase.testWithColumnNumber(count, 2, checkCorrect, codec);
190      // testcase.testWithColumnNumber(count, 10, checkCorrect, codec);
191      // testcase.testWithColumnNumber(count, 25, checkCorrect, codec);
192      testcase.testWithColumnNumber(count, 40, checkCorrect, codec);
193      // testcase.testWithColumnNumber(count, 50, checkCorrect, codec);
194      // testcase.testWithColumnNumber(count, 80, checkCorrect, codec);
195
196    } finally {
197    }
198  }
199
200  private void testWithColumnNumber(int rowCount, int columnNum,
201      boolean checkCorrect, CompressionCodec codec) throws IOException {
202    // rcfile
203
204    // rcfile write
205    long start = System.currentTimeMillis();
206    writeRCFileTest(fs, rowCount, testRCFile, columnNum, codec);
207    long cost = System.currentTimeMillis() - start;
208    long fileLen = fs.getFileStatus(testRCFile).getLen();
209    System.out.println("Write RCFile with " + columnNum
210        + " random string columns and " + rowCount + " rows cost " + cost
211        + " milliseconds. And the file's on disk size is " + fileLen);
212
213    // sequence file write
214    start = System.currentTimeMillis();
215    writeSeqenceFileTest(fs, rowCount, testSeqFile, columnNum, codec);
216    cost = System.currentTimeMillis() - start;
217    fileLen = fs.getFileStatus(testSeqFile).getLen();
218    System.out.println("Write SequenceFile with " + columnNum
219        + " random string columns and " + rowCount + " rows cost " + cost
220        + " milliseconds. And the file's on disk size is " + fileLen);
221
222    // rcfile read
223    start = System.currentTimeMillis();
224    int readRows = performRCFileReadFirstColumnTest(fs, testRCFile, columnNum,
225        checkCorrect);
226    cost = System.currentTimeMillis() - start;
227    System.out.println("Read only one column of a RCFile with " + columnNum
228        + " random string columns and " + rowCount + " rows cost " + cost
229        + " milliseconds.");
230    if (rowCount != readRows) {
231      throw new IllegalStateException("Compare read and write row count error.");
232    }
233    assertEquals("", rowCount, readRows);
234
235    if (isLocalFileSystem() && !checkCorrect) {
236      // make some noisy to avoid disk caches data.
237      performSequenceFileRead(fs, rowCount, testSeqFile);
238    }
239
240    start = System.currentTimeMillis();
241    readRows = performRCFileReadFirstAndLastColumnTest(fs, testRCFile,
242        columnNum, checkCorrect);
243    cost = System.currentTimeMillis() - start;
244    System.out.println("Read only first and last columns of a RCFile with "
245        + columnNum + " random string columns and " + rowCount + " rows cost "
246        + cost + " milliseconds.");
247    if (rowCount != readRows) {
248      throw new IllegalStateException("Compare read and write row count error.");
249    }
250    assertEquals("", rowCount, readRows);
251
252    if (isLocalFileSystem() && !checkCorrect) {
253      // make some noisy to avoid disk caches data.
254      performSequenceFileRead(fs, rowCount, testSeqFile);
255    }
256
257    start = System.currentTimeMillis();
258    performRCFileFullyReadColumnTest(fs, testRCFile, columnNum, checkCorrect);
259    cost = System.currentTimeMillis() - start;
260    System.out.println("Read all columns of a RCFile with " + columnNum
261        + " random string columns and " + rowCount + " rows cost " + cost
262        + " milliseconds.");
263    if (rowCount != readRows) {
264      throw new IllegalStateException("Compare read and write row count error.");
265    }
266    assertEquals("", rowCount, readRows);
267
268    // sequence file read
269    start = System.currentTimeMillis();
270    performSequenceFileRead(fs, rowCount, testSeqFile);
271    cost = System.currentTimeMillis() - start;
272    System.out.println("Read SequenceFile with " + columnNum
273        + "  random string columns and " + rowCount + " rows cost " + cost
274        + " milliseconds.");
275  }
276
277  public boolean isLocalFileSystem() {
278    return fs.getUri().toString().startsWith("file://");
279  }
280
281  public void performSequenceFileRead(FileSystem fs, int count, Path file) throws IOException {
282    SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
283    ByteWritable key = new ByteWritable();
284    BytesRefArrayWritable val = new BytesRefArrayWritable();
285    for (int i = 0; i < count; i++) {
286      reader.next(key, val);
287    }
288  }
289
290  public int performRCFileReadFirstColumnTest(FileSystem fs, Path file,
291      int allColumnsNumber, boolean chechCorrect) throws IOException {
292
293    byte[][] checkBytes = null;
294    BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber);
295    if (chechCorrect) {
296      resetRandomGenerators();
297      checkBytes = new byte[allColumnsNumber][];
298    }
299
300    int actualReadCount = 0;
301
302    java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
303    readCols.add(Integer.valueOf(0));
304    ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
305    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
306
307    LongWritable rowID = new LongWritable();
308    BytesRefArrayWritable cols = new BytesRefArrayWritable();
309    while (reader.next(rowID)) {
310      reader.getCurrentRow(cols);
311      boolean ok = true;
312      if (chechCorrect) {
313        nextRandomRow(checkBytes, checkRow);
314        ok = ok && (checkRow.get(0).equals(cols.get(0)));
315      }
316      if (!ok) {
317        throw new IllegalStateException("Compare read and write error.");
318      }
319      actualReadCount++;
320    }
321    return actualReadCount;
322  }
323
324  public int performRCFileReadFirstAndLastColumnTest(FileSystem fs, Path file,
325      int allColumnsNumber, boolean chechCorrect) throws IOException {
326
327    byte[][] checkBytes = null;
328    BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber);
329    if (chechCorrect) {
330      resetRandomGenerators();
331      checkBytes = new byte[allColumnsNumber][];
332    }
333
334    int actualReadCount = 0;
335
336    java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
337    readCols.add(Integer.valueOf(0));
338    readCols.add(Integer.valueOf(allColumnsNumber - 1));
339    ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
340    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
341
342    LongWritable rowID = new LongWritable();
343    BytesRefArrayWritable cols = new BytesRefArrayWritable();
344    while (reader.next(rowID)) {
345      reader.getCurrentRow(cols);
346      boolean ok = true;
347      if (chechCorrect) {
348        nextRandomRow(checkBytes, checkRow);
349        ok = ok && (checkRow.get(0).equals(cols.get(0)));
350        ok = ok
351            && checkRow.get(allColumnsNumber - 1).equals(
352            cols.get(allColumnsNumber - 1));
353      }
354      if (!ok) {
355        throw new IllegalStateException("Compare read and write error.");
356      }
357      actualReadCount++;
358    }
359    return actualReadCount;
360  }
361
362  public int performRCFileFullyReadColumnTest(FileSystem fs, Path file,
363      int allColumnsNumber, boolean chechCorrect) throws IOException {
364
365    byte[][] checkBytes = null;
366    BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber);
367    if (chechCorrect) {
368      resetRandomGenerators();
369      checkBytes = new byte[allColumnsNumber][];
370    }
371
372    int actualReadCount = 0;
373
374    ColumnProjectionUtils.setFullyReadColumns(conf);
375    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
376
377    LongWritable rowID = new LongWritable();
378    BytesRefArrayWritable cols = new BytesRefArrayWritable();
379    while (reader.next(rowID)) {
380      reader.getCurrentRow(cols);
381      boolean ok = true;
382      if (chechCorrect) {
383        nextRandomRow(checkBytes, checkRow);
384        ok = ok && checkRow.equals(cols);
385      }
386      if (!ok) {
387        throw new IllegalStateException("Compare read and write error.");
388      }
389      actualReadCount++;
390    }
391    return actualReadCount;
392  }
393
394}