/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
Java | 2373 lines | 1922 code | 302 blank | 149 comment | 141 complexity | 0b89a080b86042fa3282259deeab8106 MD5 | raw file
Possible License(s): Apache-2.0
Large files files are truncated, but you can click here to view the full file
- /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hive.hcatalog.streaming;
- import java.io.ByteArrayOutputStream;
- import java.io.File;
- import java.io.FileFilter;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.io.PrintStream;
- import java.net.URI;
- import java.net.URISyntaxException;
- import java.nio.ByteBuffer;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.Collection;
- import java.util.Collections;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- import java.util.concurrent.TimeUnit;
- import java.util.concurrent.atomic.AtomicBoolean;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FSDataInputStream;
- import org.apache.hadoop.fs.FSDataOutputStream;
- import org.apache.hadoop.fs.FileStatus;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.fs.RawLocalFileSystem;
- import org.apache.hadoop.fs.permission.FsPermission;
- import org.apache.hadoop.hive.cli.CliSessionState;
- import org.apache.hadoop.hive.common.JavaUtils;
- import org.apache.hadoop.hive.common.TableName;
- import org.apache.hadoop.hive.common.ValidTxnList;
- import org.apache.hadoop.hive.common.ValidWriteIdList;
- import org.apache.hadoop.hive.conf.HiveConf;
- import org.apache.hadoop.hive.conf.Validator;
- import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
- import org.apache.hadoop.hive.metastore.IMetaStoreClient;
- import org.apache.hadoop.hive.metastore.api.FieldSchema;
- import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
- import org.apache.hadoop.hive.metastore.api.LockState;
- import org.apache.hadoop.hive.metastore.api.LockType;
- import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
- import org.apache.hadoop.hive.metastore.api.Partition;
- import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
- import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
- import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
- import org.apache.hadoop.hive.metastore.api.TableValidWriteIds;
- import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
- import org.apache.hadoop.hive.metastore.api.TxnInfo;
- import org.apache.hadoop.hive.metastore.api.TxnState;
- import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
- import org.apache.hadoop.hive.metastore.txn.AcidHouseKeeperService;
- import org.apache.hadoop.hive.metastore.txn.TxnCommonUtils;
- import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
- import org.apache.hadoop.hive.metastore.txn.TxnStore;
- import org.apache.hadoop.hive.metastore.txn.TxnUtils;
- import org.apache.hadoop.hive.ql.DriverFactory;
- import org.apache.hadoop.hive.ql.IDriver;
- import org.apache.hadoop.hive.ql.io.AcidUtils;
- import org.apache.hadoop.hive.ql.io.BucketCodec;
- import org.apache.hadoop.hive.ql.io.IOConstants;
- import org.apache.hadoop.hive.ql.io.orc.OrcFile;
- import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
- import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
- import org.apache.hadoop.hive.ql.io.orc.Reader;
- import org.apache.hadoop.hive.ql.io.orc.RecordReader;
- import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
- import org.apache.hadoop.hive.ql.session.SessionState;
- import org.apache.hadoop.hive.ql.txn.compactor.Worker;
- import org.apache.hadoop.hive.serde.serdeConstants;
- import org.apache.hadoop.hive.serde2.objectinspector.StructField;
- import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
- import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
- import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
- import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
- import org.apache.hadoop.hive.shims.Utils;
- import org.apache.hadoop.io.NullWritable;
- import org.apache.hadoop.mapred.InputFormat;
- import org.apache.hadoop.mapred.InputSplit;
- import org.apache.hadoop.mapred.JobConf;
- import org.apache.hadoop.mapred.Reporter;
- import org.apache.hadoop.security.UserGroupInformation;
- import org.apache.orc.impl.OrcAcidUtils;
- import org.apache.orc.tools.FileDump;
- import org.apache.thrift.TException;
- import org.junit.After;
- import org.junit.Assert;
- import org.junit.Before;
- import org.junit.Ignore;
- import org.junit.Rule;
- import org.junit.Test;
- import org.junit.rules.TemporaryFolder;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_COUNT;
- public class TestStreaming {
- private static final Logger LOG = LoggerFactory.getLogger(TestStreaming.class);
- public static class RawFileSystem extends RawLocalFileSystem {
- private static final URI NAME;
- static {
- try {
- NAME = new URI("raw:///");
- } catch (URISyntaxException se) {
- throw new IllegalArgumentException("bad uri", se);
- }
- }
- @Override
- public URI getUri() {
- return NAME;
- }
- @Override
- public String getScheme() {
- return "raw";
- }
- @Override
- public FileStatus getFileStatus(Path path) throws IOException {
- File file = pathToFile(path);
- if (!file.exists()) {
- throw new FileNotFoundException("Can't find " + path);
- }
- // get close enough
- short mod = 0;
- if (file.canRead()) {
- mod |= 0444;
- }
- if (file.canWrite()) {
- mod |= 0200;
- }
- if (file.canExecute()) {
- mod |= 0111;
- }
- return new FileStatus(file.length(), file.isDirectory(), 1, 1024,
- file.lastModified(), file.lastModified(),
- FsPermission.createImmutable(mod), "owen", "users", path);
- }
- }
- private static final String COL1 = "id";
- private static final String COL2 = "msg";
- private final HiveConf conf;
- private IDriver driver;
- private final IMetaStoreClient msClient;
- final String metaStoreURI = null;
- // partitioned table
- private final static String dbName = "testing";
- private final static String tblName = "alerts";
- private final static String[] fieldNames = new String[]{COL1,COL2};
- List<String> partitionVals;
- private static Path partLoc;
- private static Path partLoc2;
- // unpartitioned table
- private final static String dbName2 = "testing2";
- private final static String tblName2 = "alerts";
- private final static String[] fieldNames2 = new String[]{COL1,COL2};
- // for bucket join testing
- private final static String dbName3 = "testing3";
- private final static String tblName3 = "dimensionTable";
- private final static String dbName4 = "testing4";
- private final static String tblName4 = "factTable";
- List<String> partitionVals2;
- private final String PART1_CONTINENT = "Asia";
- private final String PART1_COUNTRY = "India";
- @Rule
- public TemporaryFolder dbFolder = new TemporaryFolder();
- public TestStreaming() throws Exception {
- partitionVals = new ArrayList<String>(2);
- partitionVals.add(PART1_CONTINENT);
- partitionVals.add(PART1_COUNTRY);
- partitionVals2 = new ArrayList<String>(1);
- partitionVals2.add(PART1_COUNTRY);
- conf = new HiveConf(this.getClass());
- conf.set("fs.raw.impl", RawFileSystem.class.getName());
- conf
- .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
- "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
- TxnDbUtil.setConfValues(conf);
- if (metaStoreURI!=null) {
- conf.setVar(HiveConf.ConfVars.METASTOREURIS, metaStoreURI);
- }
- conf.setBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI, true);
- conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, true);
- dbFolder.create();
- //1) Start from a clean slate (metastore)
- TxnDbUtil.cleanDb(conf);
- TxnDbUtil.prepDb(conf);
- //2) obtain metastore clients
- msClient = new HiveMetaStoreClient(conf);
- }
- @Before
- public void setup() throws Exception {
- SessionState.start(new CliSessionState(conf));
- driver = DriverFactory.newDriver(conf);
- driver.setMaxRows(200002);//make sure Driver returns all results
- // drop and recreate the necessary databases and tables
- dropDB(msClient, dbName);
- String[] colNames = new String[] {COL1, COL2};
- String[] colTypes = new String[] {serdeConstants.INT_TYPE_NAME, serdeConstants.STRING_TYPE_NAME};
- String[] bucketCols = new String[] {COL1};
- String loc1 = dbFolder.newFolder(dbName + ".db").toString();
- String[] partNames = new String[]{"Continent", "Country"};
- partLoc = createDbAndTable(driver, dbName, tblName, partitionVals, colNames, colTypes, bucketCols, partNames, loc1, 1);
- dropDB(msClient, dbName2);
- String loc2 = dbFolder.newFolder(dbName2 + ".db").toString();
- partLoc2 = createDbAndTable(driver, dbName2, tblName2, null, colNames, colTypes, bucketCols, null, loc2, 2);
- String loc3 = dbFolder.newFolder("testing5.db").toString();
- createStoreSales("testing5", loc3);
- runDDL(driver, "drop table testBucketing3.streamedtable");
- runDDL(driver, "drop table testBucketing3.finaltable");
- runDDL(driver, "drop table testBucketing3.nobucket");
- }
- @After
- public void cleanup() throws Exception {
- msClient.close();
- driver.close();
- }
- private static List<FieldSchema> getPartitionKeys() {
- List<FieldSchema> fields = new ArrayList<FieldSchema>();
- // Defining partition names in unsorted order
- fields.add(new FieldSchema("continent", serdeConstants.STRING_TYPE_NAME, ""));
- fields.add(new FieldSchema("country", serdeConstants.STRING_TYPE_NAME, ""));
- return fields;
- }
- private void createStoreSales(String dbName, String loc) throws Exception {
- String dbUri = "raw://" + new Path(loc).toUri().toString();
- String tableLoc = dbUri + Path.SEPARATOR + "store_sales";
- boolean success = runDDL(driver, "create database IF NOT EXISTS " + dbName + " location '" + dbUri + "'");
- Assert.assertTrue(success);
- success = runDDL(driver, "use " + dbName);
- Assert.assertTrue(success);
- success = runDDL(driver, "drop table if exists store_sales");
- Assert.assertTrue(success);
- success = runDDL(driver, "create table store_sales\n" +
- "(\n" +
- " ss_sold_date_sk int,\n" +
- " ss_sold_time_sk int,\n" +
- " ss_item_sk int,\n" +
- " ss_customer_sk int,\n" +
- " ss_cdemo_sk int,\n" +
- " ss_hdemo_sk int,\n" +
- " ss_addr_sk int,\n" +
- " ss_store_sk int,\n" +
- " ss_promo_sk int,\n" +
- " ss_ticket_number int,\n" +
- " ss_quantity int,\n" +
- " ss_wholesale_cost decimal(7,2),\n" +
- " ss_list_price decimal(7,2),\n" +
- " ss_sales_price decimal(7,2),\n" +
- " ss_ext_discount_amt decimal(7,2),\n" +
- " ss_ext_sales_price decimal(7,2),\n" +
- " ss_ext_wholesale_cost decimal(7,2),\n" +
- " ss_ext_list_price decimal(7,2),\n" +
- " ss_ext_tax decimal(7,2),\n" +
- " ss_coupon_amt decimal(7,2),\n" +
- " ss_net_paid decimal(7,2),\n" +
- " ss_net_paid_inc_tax decimal(7,2),\n" +
- " ss_net_profit decimal(7,2)\n" +
- ")\n" +
- " partitioned by (dt string)\n" +
- "clustered by (ss_store_sk, ss_promo_sk)\n" +
- "INTO 4 BUCKETS stored as orc " + " location '" + tableLoc + "'" + " TBLPROPERTIES ('orc.compress'='NONE', 'transactional'='true')");
- Assert.assertTrue(success);
- success = runDDL(driver, "alter table store_sales add partition(dt='2015')");
- Assert.assertTrue(success);
- }
- /**
- * make sure it works with table where bucket col is not 1st col
- * @throws Exception
- */
- @Test
- public void testBucketingWhereBucketColIsNotFirstCol() throws Exception {
- List<String> partitionVals = new ArrayList<String>();
- partitionVals.add("2015");
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testing5", "store_sales", partitionVals);
- StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
- DelimitedInputWriter writer = new DelimitedInputWriter(new String[] {"ss_sold_date_sk","ss_sold_time_sk", "ss_item_sk",
- "ss_customer_sk", "ss_cdemo_sk", "ss_hdemo_sk", "ss_addr_sk", "ss_store_sk", "ss_promo_sk", "ss_ticket_number", "ss_quantity",
- "ss_wholesale_cost", "ss_list_price", "ss_sales_price", "ss_ext_discount_amt", "ss_ext_sales_price", "ss_ext_wholesale_cost",
- "ss_ext_list_price", "ss_ext_tax", "ss_coupon_amt", "ss_net_paid", "ss_net_paid_inc_tax", "ss_net_profit"},",", endPt, connection);
- TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
- txnBatch.beginNextTransaction();
- StringBuilder row = new StringBuilder();
- for(int i = 0; i < 10; i++) {
- for(int ints = 0; ints < 11; ints++) {
- row.append(ints).append(',');
- }
- for(int decs = 0; decs < 12; decs++) {
- row.append(i + 0.1).append(',');
- }
- row.setLength(row.length() - 1);
- txnBatch.write(row.toString().getBytes());
- }
- txnBatch.commit();
- txnBatch.close();
- connection.close();
- ArrayList<String> res = queryTable(driver, "select row__id.bucketid, * from testing5.store_sales");
- for (String re : res) {
- System.out.println(re);
- }
- }
- /**
- * Test that streaming can write to unbucketed table.
- */
- @Test
- public void testNoBuckets() throws Exception {
- queryTable(driver, "drop table if exists default.streamingnobuckets");
- //todo: why does it need transactional_properties?
- queryTable(driver, "create table default.streamingnobuckets (a string, b string) stored as orc TBLPROPERTIES('transactional'='true', 'transactional_properties'='default')");
- queryTable(driver, "insert into default.streamingnobuckets values('foo','bar')");
- List<String> rs = queryTable(driver, "select * from default.streamingNoBuckets");
- Assert.assertEquals(1, rs.size());
- Assert.assertEquals("foo\tbar", rs.get(0));
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "Default", "StreamingNoBuckets", null);
- String[] colNames1 = new String[] { "a", "b" };
- StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
- DelimitedInputWriter wr = new DelimitedInputWriter(colNames1,",", endPt, connection);
- TransactionBatch txnBatch = connection.fetchTransactionBatch(2, wr);
- txnBatch.beginNextTransaction();
- txnBatch.write("a1,b2".getBytes());
- txnBatch.write("a3,b4".getBytes());
- TxnStore txnHandler = TxnUtils.getTxnStore(conf);
- ShowLocksResponse resp = txnHandler.showLocks(new ShowLocksRequest());
- Assert.assertEquals(resp.getLocksSize(), 1);
- Assert.assertEquals("streamingnobuckets", resp.getLocks().get(0).getTablename());
- Assert.assertEquals("default", resp.getLocks().get(0).getDbname());
- txnBatch.commit();
- txnBatch.beginNextTransaction();
- txnBatch.write("a5,b6".getBytes());
- txnBatch.write("a7,b8".getBytes());
- txnBatch.commit();
- txnBatch.close();
- Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
- rs = queryTable(driver,"select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");
- Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
- Assert.assertTrue(rs.get(0), rs.get(0).endsWith("streamingnobuckets/delta_0000001_0000001_0000/bucket_00000_0"));
- Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\ta1\tb2"));
- Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
- Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
- Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
- Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
- Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
- Assert.assertTrue(rs.get(4), rs.get(4).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\ta7\tb8"));
- Assert.assertTrue(rs.get(4), rs.get(4).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
- queryTable(driver, "update default.streamingnobuckets set a=0, b=0 where a='a7'");
- queryTable(driver, "delete from default.streamingnobuckets where a='a1'");
- rs = queryTable(driver, "select a, b from default.streamingnobuckets order by a, b");
- int row = 0;
- Assert.assertEquals("at row=" + row, "0\t0", rs.get(row++));
- Assert.assertEquals("at row=" + row, "a3\tb4", rs.get(row++));
- Assert.assertEquals("at row=" + row, "a5\tb6", rs.get(row++));
- Assert.assertEquals("at row=" + row, "foo\tbar", rs.get(row++));
- queryTable(driver, "alter table default.streamingnobuckets compact 'major'");
- runWorker(conf);
- rs = queryTable(driver,"select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");
- Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
- Assert.assertTrue(rs.get(0), rs.get(0).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
- Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
- Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
- Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
- Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
- Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t0\t0"));
- Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
- }
- /**
- * this is a clone from TestTxnStatement2....
- */
- public static void runWorker(HiveConf hiveConf) throws Exception {
- AtomicBoolean stop = new AtomicBoolean(true);
- Worker t = new Worker();
- t.setThreadId((int) t.getId());
- t.setConf(hiveConf);
- AtomicBoolean looped = new AtomicBoolean();
- t.init(stop, looped);
- t.run();
- }
- // stream data into streaming table with N buckets, then copy the data into another bucketed table
- // check if bucketing in both was done in the same way
- @Test
- @Ignore
- public void testStreamBucketingMatchesRegularBucketing() throws Exception {
- int bucketCount = 100;
- String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
- String tableLoc = "'" + dbUri + Path.SEPARATOR + "streamedtable" + "'";
- String tableLoc2 = "'" + dbUri + Path.SEPARATOR + "finaltable" + "'";
- String tableLoc3 = "'" + dbUri + Path.SEPARATOR + "nobucket" + "'";
- try (IDriver driver = DriverFactory.newDriver(conf)) {
- runDDL(driver, "create database testBucketing3");
- runDDL(driver, "use testBucketing3");
- runDDL(driver, "create table streamedtable ( key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
- + bucketCount + " buckets stored as orc location " + tableLoc + " TBLPROPERTIES ('transactional'='true')");
- // In 'nobucket' table we capture bucketid from streamedtable to workaround a hive bug that prevents joins two identically bucketed tables
- runDDL(driver, "create table nobucket ( bucketid int, key1 string,key2 int,data string ) location " + tableLoc3);
- runDDL(driver,
- "create table finaltable ( bucketid int, key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
- + bucketCount + " buckets stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='true')");
- String[] records = new String[]{
- "PSFAHYLZVC,29,EPNMA",
- "PPPRKWAYAU,96,VUTEE",
- "MIAOFERCHI,3,WBDSI",
- "CEGQAZOWVN,0,WCUZL",
- "XWAKMNSVQF,28,YJVHU",
- "XBWTSAJWME,2,KDQFO",
- "FUVLQTAXAY,5,LDSDG",
- "QTQMDJMGJH,6,QBOMA",
- "EFLOTLWJWN,71,GHWPS",
- "PEQNAOJHCM,82,CAAFI",
- "MOEKQLGZCP,41,RUACR",
- "QZXMCOPTID,37,LFLWE",
- "EYALVWICRD,13,JEZLC",
- "VYWLZAYTXX,16,DMVZX",
- "OSALYSQIXR,47,HNZVE",
- "JGKVHKCEGQ,25,KSCJB",
- "WQFMMYDHET,12,DTRWA",
- "AJOVAYZKZQ,15,YBKFO",
- "YAQONWCUAU,31,QJNHZ",
- "DJBXUEUOEB,35,IYCBL"
- };
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "streamedtable", null);
- String[] colNames1 = new String[]{"key1", "key2", "data"};
- DelimitedInputWriter wr = new DelimitedInputWriter(colNames1, ",", endPt);
- StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
- TransactionBatch txnBatch = connection.fetchTransactionBatch(2, wr);
- txnBatch.beginNextTransaction();
- for (String record : records) {
- txnBatch.write(record.toString().getBytes());
- }
- txnBatch.commit();
- txnBatch.close();
- connection.close();
- ArrayList<String> res1 = queryTable(driver, "select row__id.bucketid, * from streamedtable order by key2");
- for (String re : res1) {
- System.out.println(re);
- }
- driver.run("insert into nobucket select row__id.bucketid,* from streamedtable");
- runDDL(driver, " insert into finaltable select * from nobucket");
- ArrayList<String> res2 = queryTable(driver,
- "select row__id.bucketid,* from finaltable where row__id.bucketid<>bucketid");
- for (String s : res2) {
- LOG.error(s);
- }
- Assert.assertTrue(res2.isEmpty());
- } finally {
- conf.unset(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname);
- }
- }
- @Test
- public void testTableValidation() throws Exception {
- int bucketCount = 100;
- String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
- String tbl1 = "validation1";
- String tbl2 = "validation2";
- String tableLoc = "'" + dbUri + Path.SEPARATOR + tbl1 + "'";
- String tableLoc2 = "'" + dbUri + Path.SEPARATOR + tbl2 + "'";
- runDDL(driver, "create database testBucketing3");
- runDDL(driver, "use testBucketing3");
- runDDL(driver, "create table " + tbl1 + " ( key1 string, data string ) clustered by ( key1 ) into "
- + bucketCount + " buckets stored as orc location " + tableLoc + " TBLPROPERTIES ('transactional'='false')") ;
- runDDL(driver, "create table " + tbl2 + " ( key1 string, data string ) clustered by ( key1 ) into "
- + bucketCount + " buckets stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='false')") ;
- try {
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation1", null);
- endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
- Assert.assertTrue("InvalidTable exception was not thrown", false);
- } catch (InvalidTable e) {
- // expecting this exception
- }
- try {
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation2", null);
- endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
- Assert.assertTrue("InvalidTable exception was not thrown", false);
- } catch (InvalidTable e) {
- // expecting this exception
- }
- }
- /**
- * @deprecated use {@link #checkDataWritten2(Path, long, long, int, String, boolean, String...)} -
- * there is little value in using InputFormat directly
- */
- @Deprecated
- private void checkDataWritten(Path partitionPath, long minTxn, long maxTxn, int buckets, int numExpectedFiles,
- String... records) throws Exception {
- ValidWriteIdList writeIds = getTransactionContext(conf);
- AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, writeIds, null, false, null, false);
- Assert.assertEquals(0, dir.getObsolete().size());
- Assert.assertEquals(0, dir.getOriginalFiles().size());
- List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
- System.out.println("Files found: ");
- for (AcidUtils.ParsedDelta pd : current) {
- System.out.println(pd.getPath().toString());
- }
- Assert.assertEquals(numExpectedFiles, current.size());
- // find the absolute minimum transaction
- long min = Long.MAX_VALUE;
- long max = Long.MIN_VALUE;
- for (AcidUtils.ParsedDelta pd : current) {
- if (pd.getMaxWriteId() > max) {
- max = pd.getMaxWriteId();
- }
- if (pd.getMinWriteId() < min) {
- min = pd.getMinWriteId();
- }
- }
- Assert.assertEquals(minTxn, min);
- Assert.assertEquals(maxTxn, max);
- InputFormat inf = new OrcInputFormat();
- JobConf job = new JobConf();
- job.set("mapred.input.dir", partitionPath.toString());
- job.set(BUCKET_COUNT, Integer.toString(buckets));
- job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
- job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
- AcidUtils.setAcidOperationalProperties(job, true, null);
- job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
- job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());
- job.set(ValidTxnList.VALID_TXNS_KEY, conf.get(ValidTxnList.VALID_TXNS_KEY));
- InputSplit[] splits = inf.getSplits(job, buckets);
- Assert.assertEquals(numExpectedFiles, splits.length);
- org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr =
- inf.getRecordReader(splits[0], job, Reporter.NULL);
- NullWritable key = rr.createKey();
- OrcStruct value = rr.createValue();
- for (String record : records) {
- Assert.assertEquals(true, rr.next(key, value));
- Assert.assertEquals(record, value.toString());
- }
- Assert.assertEquals(false, rr.next(key, value));
- }
- /**
- * @param validationQuery query to read from table to compare data against {@code records}
- * @param records expected data. each row is CVS list of values
- */
- private void checkDataWritten2(Path partitionPath, long minTxn, long maxTxn, int numExpectedFiles,
- String validationQuery, boolean vectorize, String... records) throws Exception {
- AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, getTransactionContext(conf), null,
- false, null, false);
- Assert.assertEquals(0, dir.getObsolete().size());
- Assert.assertEquals(0, dir.getOriginalFiles().size());
- List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
- System.out.println("Files found: ");
- for (AcidUtils.ParsedDelta pd : current) {
- System.out.println(pd.getPath().toString());
- }
- Assert.assertEquals(numExpectedFiles, current.size());
- // find the absolute minimum transaction
- long min = Long.MAX_VALUE;
- long max = Long.MIN_VALUE;
- for (AcidUtils.ParsedDelta pd : current) {
- if (pd.getMaxWriteId() > max) {
- max = pd.getMaxWriteId();
- }
- if (pd.getMinWriteId() < min) {
- min = pd.getMinWriteId();
- }
- }
- Assert.assertEquals(minTxn, min);
- Assert.assertEquals(maxTxn, max);
- boolean isVectorizationEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
- if(vectorize) {
- conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
- }
- String currStrategy = conf.getVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY);
- for(String strategy : ((Validator.StringSet)HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.getValidator()).getExpected()) {
- //run it with each split strategy - make sure there are differences
- conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, strategy.toUpperCase());
- List<String> actualResult = queryTable(driver, validationQuery);
- for (int i = 0; i < actualResult.size(); i++) {
- Assert.assertEquals("diff at [" + i + "]. actual=" + actualResult + " expected=" +
- Arrays.toString(records), records[i], actualResult.get(i));
- }
- }
- conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, currStrategy);
- conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, isVectorizationEnabled);
- }
- private ValidWriteIdList getTransactionContext(Configuration conf) throws Exception {
- ValidTxnList validTxnList = msClient.getValidTxns();
- conf.set(ValidTxnList.VALID_TXNS_KEY, validTxnList.writeToString());
- List<TableValidWriteIds> v = msClient.getValidWriteIds(Collections
- .singletonList(TableName.getDbTable(dbName, tblName)), validTxnList.writeToString());
- return TxnCommonUtils.createValidReaderWriteIdList(v.get(0));
- }
- private void checkNothingWritten(Path partitionPath) throws Exception {
- AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, getTransactionContext(conf), null,
- false, null, false);
- Assert.assertEquals(0, dir.getObsolete().size());
- Assert.assertEquals(0, dir.getOriginalFiles().size());
- List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
- Assert.assertEquals(0, current.size());
- }
- @Test
- public void testEndpointConnection() throws Exception {
- // For partitioned table, partitionVals are specified
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals);
- StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); //shouldn't throw
- connection.close();
- // For unpartitioned table, partitionVals are not specified
- endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
- endPt.newConnection(false, "UT_" + Thread.currentThread().getName()).close(); // should not throw
- // For partitioned table, partitionVals are not specified
- try {
- endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, null);
- connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
- Assert.assertTrue("ConnectionError was not thrown", false);
- connection.close();
- } catch (ConnectionError e) {
- // expecting this exception
- String errMsg = "doesn't specify any partitions for partitioned table";
- Assert.assertTrue(e.toString().endsWith(errMsg));
- }
- // For unpartitioned table, partition values are specified
- try {
- endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, partitionVals);
- connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
- Assert.assertTrue("ConnectionError was not thrown", false);
- connection.close();
- } catch (ConnectionError e) {
- // expecting this exception
- String errMsg = "specifies partitions for unpartitioned table";
- Assert.assertTrue(e.toString().endsWith(errMsg));
- }
- }
- @Test
- public void testAddPartition() throws Exception {
- List<String> newPartVals = new ArrayList<String>(2);
- newPartVals.add(PART1_CONTINENT);
- newPartVals.add("Nepal");
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName
- , newPartVals);
- // Ensure partition is absent
- try {
- msClient.getPartition(endPt.database, endPt.table, endPt.partitionVals);
- Assert.assertTrue("Partition already exists", false);
- } catch (NoSuchObjectException e) {
- // expect this exception
- }
- // Create partition
- Assert.assertNotNull(endPt.newConnection(true, "UT_" + Thread.currentThread().getName()));
- // Ensure partition is present
- Partition p = msClient.getPartition(endPt.database, endPt.table, endPt.partitionVals);
- Assert.assertNotNull("Did not find added partition", p);
- }
- @Test
- public void testTransactionBatchEmptyCommit() throws Exception {
- // 1) to partitioned table
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
- partitionVals);
- StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
- DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
- TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- txnBatch.commit();
- Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
- , txnBatch.getCurrentTransactionState());
- txnBatch.close();
- connection.close();
- // 2) To unpartitioned table
- endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
- writer = new DelimitedInputWriter(fieldNames2,",", endPt);
- connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
- txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- txnBatch.commit();
- Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
- , txnBatch.getCurrentTransactionState());
- txnBatch.close();
- connection.close();
- }
- /**
- * check that transactions that have not heartbeated and timedout get properly aborted
- * @throws Exception
- */
- @Test
- public void testTimeOutReaper() throws Exception {
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
- DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2,",", endPt);
- StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
- TransactionBatch txnBatch = connection.fetchTransactionBatch(5, writer);
- txnBatch.beginNextTransaction();
- conf.setTimeVar(HiveConf.ConfVars.HIVE_TIMEDOUT_TXN_REAPER_START, 0, TimeUnit.SECONDS);
- //ensure txn timesout
- conf.setTimeVar(HiveConf.ConfVars.HIVE_TXN_TIMEOUT, 1, TimeUnit.MILLISECONDS);
- AcidHouseKeeperService houseKeeperService = new AcidHouseKeeperService();
- houseKeeperService.setConf(conf);
- houseKeeperService.run();
- try {
- //should fail because the TransactionBatch timed out
- txnBatch.commit();
- }
- catch(TransactionError e) {
- Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
- }
- txnBatch.close();
- txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- txnBatch.commit();
- txnBatch.beginNextTransaction();
- houseKeeperService.run();
- try {
- //should fail because the TransactionBatch timed out
- txnBatch.commit();
- }
- catch(TransactionError e) {
- Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
- }
- txnBatch.close();
- connection.close();
- }
- @Test
- public void testHeartbeat() throws Exception {
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
- StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
- DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2,",", endPt, connection);
- TransactionBatch txnBatch = connection.fetchTransactionBatch(5, writer);
- txnBatch.beginNextTransaction();
- //todo: this should ideally check Transaction heartbeat as well, but heartbeat
- //timestamp is not reported yet
- //GetOpenTxnsInfoResponse txnresp = msClient.showTxns();
- ShowLocksRequest request = new ShowLocksRequest();
- request.setDbname(dbName2);
- request.setTablename(tblName2);
- ShowLocksResponse response = msClient.showLocks(request);
- Assert.assertEquals("Wrong nubmer of locks: " + response, 1, response.getLocks().size());
- ShowLocksResponseElement lock = response.getLocks().get(0);
- long acquiredAt = lock.getAcquiredat();
- long heartbeatAt = lock.getLastheartbeat();
- txnBatch.heartbeat();
- response = msClient.showLocks(request);
- Assert.assertEquals("Wrong number of locks2: " + response, 1, response.getLocks().size());
- lock = response.getLocks().get(0);
- Assert.assertEquals("Acquired timestamp didn't match", acquiredAt, lock.getAcquiredat());
- Assert.assertTrue("Expected new heartbeat (" + lock.getLastheartbeat() +
- ") == old heartbeat(" + heartbeatAt +")", lock.getLastheartbeat() == heartbeatAt);
- txnBatch.close();
- int txnBatchSize = 200;
- txnBatch = connection.fetchTransactionBatch(txnBatchSize, writer);
- for(int i = 0; i < txnBatchSize; i++) {
- txnBatch.beginNextTransaction();
- if(i % 47 == 0) {
- txnBatch.heartbeat();
- }
- if(i % 10 == 0) {
- txnBatch.abort();
- }
- else {
- txnBatch.commit();
- }
- if(i % 37 == 0) {
- txnBatch.heartbeat();
- }
- }
- }
- @Test
- public void testTransactionBatchEmptyAbort() throws Exception {
- // 1) to partitioned table
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
- partitionVals);
- StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
- DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
- TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- txnBatch.abort();
- Assert.assertEquals(TransactionBatch.TxnState.ABORTED
- , txnBatch.getCurrentTransactionState());
- txnBatch.close();
- connection.close();
- // 2) to unpartitioned table
- endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
- writer = new DelimitedInputWriter(fieldNames,",", endPt);
- connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
- txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- txnBatch.abort();
- Assert.assertEquals(TransactionBatch.TxnState.ABORTED
- , txnBatch.getCurrentTransactionState());
- txnBatch.close();
- connection.close();
- }
- @Test
- public void testTransactionBatchCommit_Delimited() throws Exception {
- testTransactionBatchCommit_Delimited(null);
- }
- @Test
- public void testTransactionBatchCommit_DelimitedUGI() throws Exception {
- testTransactionBatchCommit_Delimited(Utils.getUGI());
- }
- private void testTransactionBatchCommit_Delimited(UserGroupInformation ugi) throws Exception {
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
- partitionVals);
- StreamingConnection connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
- DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, conf, connection);
- // 1st Txn
- TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- Assert.assertEquals(TransactionBatch.TxnState.OPEN
- , txnBatch.getCurrentTransactionState());
- txnBatch.write("1,Hello streaming".getBytes());
- txnBatch.commit();
- checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
- Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
- , txnBatch.getCurrentTransactionState());
- // 2nd Txn
- txnBatch.beginNextTransaction();
- Assert.assertEquals(TransactionBatch.TxnState.OPEN
- , txnBatch.getCurrentTransactionState());
- txnBatch.write("2,Welcome to streaming".getBytes());
- // data should not be visible
- checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
- txnBatch.commit();
- checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
- "{2, Welcome to streaming}");
- txnBatch.close();
- Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
- , txnBatch.getCurrentTransactionState());
- connection.close();
- // To Unpartitioned table
- endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
- connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
- writer = new DelimitedInputWriter(fieldNames,",", endPt, conf, connection);
- // 1st Txn
- txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- Assert.assertEquals(TransactionBatch.TxnState.OPEN
- , txnBatch.getCurrentTransactionState());
- txnBatch.write("1,Hello streaming".getBytes());
- txnBatch.commit();
- Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
- , txnBatch.getCurrentTransactionState());
- connection.close();
- }
- @Test
- public void testTransactionBatchCommit_Regex() throws Exception {
- testTransactionBatchCommit_Regex(null);
- }
- @Test
- public void testTransactionBatchCommit_RegexUGI() throws Exception {
- testTransactionBatchCommit_Regex(Utils.getUGI());
- }
- private void testTransactionBatchCommit_Regex(UserGroupInformation ugi) throws Exception {
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
- partitionVals);
- StreamingConnection connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
- String regex = "([^,]*),(.*)";
- StrictRegexWriter writer = new StrictRegexWriter(regex, endPt, conf, connection);
- // 1st Txn
- TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- Assert.assertEquals(TransactionBatch.TxnState.OPEN
- , txnBatch.getCurrentTransactionState());
- txnBatch.write("1,Hello streaming".getBytes());
- txnBatch.commit();
- checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
- Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
- , txnBatch.getCurrentTransactionState());
- // 2nd Txn
- txnBatch.beginNextTransaction();
- Assert.assertEquals(TransactionBatch.TxnState.OPEN
- , txnBatch.getCurrentTransactionState());
- txnBatch.write("2,Welcome to streaming".getBytes());
- // data should not be visible
- checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
- txnBatch.commit();
- checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
- "{2, Welcome to streaming}");
- txnBatch.close();
- Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
- , txnBatch.getCurrentTransactionState());
- connection.close();
- // To Unpartitioned table
- endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
- connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
- regex = "([^:]*):(.*)";
- writer = new StrictRegexWriter(regex, endPt, conf, connection);
- // 1st Txn
- txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- Assert.assertEquals(TransactionBatch.TxnState.OPEN
- , txnBatch.getCurrentTransactionState());
- txnBatch.write("1:Hello streaming".getBytes());
- txnBatch.commit();
- Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
- , txnBatch.getCurrentTransactionState());
- connection.close();
- }
- @Test
- public void testTransactionBatchCommit_Json() throws Exception {
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
- partitionVals);
- StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
- StrictJsonWriter writer = new StrictJsonWriter(endPt, connection);
- // 1st Txn
- TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- Assert.assertEquals(TransactionBatch.TxnState.OPEN
- , txnBatch.getCurrentTransactionState());
- String rec1 = "{\"id\" : 1, \"msg\": \"Hello streaming\"}";
- txnBatch.write(rec1.getBytes());
- txnBatch.commit();
- checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
- Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
- , txnBatch.getCurrentTransactionState());
- txnBatch.close();
- Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
- , txnBatch.getCurrentTransactionState());
- connection.close();
- List<String> rs = queryTable(driver, "select * from " + dbName + "." + tblName);
- Assert.assertEquals(1, rs.size());
- }
- @Test
- public void testRemainingTransactions() throws Exception {
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
- partitionVals);
- DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt);
- StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
- // 1) test with txn.Commit()
- TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
- int batch=0;
- int initialCount = txnBatch.remainingTransactions();
- while (txnBatch.remainingTransactions()>0) {
- txnBatch.beginNextTransaction();
- Assert.assertEquals(--initialCount, txnBatch.remainingTransactions());
- for (int rec=0; rec<2; ++rec) {
- Assert.assertEquals(TransactionBatch.TxnState.OPEN
- , txnBatch.getCurrentTransactionState());
- txnBatch.write((batch * rec + ",Hello streaming").getBytes());
- }
- txnBatch.commit();
- Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
- , txnBatch.getCurrentTransactionState());
- ++batch;
- }
- Assert.assertEquals(0, txnBatch.remainingTransactions());
- txnBatch.close();
- Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
- , txnBatch.getCurrentTransactionState());
- // 2) test with txn.Abort()
- txnBatch = connection.fetchTransactionBatch(10, writer);
- batch=0;
- initialCount = txnBatch.remainingTransactions();
- while (txnBatch.remainingTransactions()>0) {
- txnBatch.beginNextTransaction();
- Assert.assertEquals(--initialCount,txnBatch.remainingTransactions());
- for (int rec=0; rec<2; ++rec) {
- Assert.assertEquals(TransactionBatch.TxnState.OPEN
- , txnBatch.getCurrentTransactionState());
- txnBatch.write((batch * rec + ",Hello streaming").getBytes());
- }
- txnBatch.abort();
- Assert.assertEquals(TransactionBatch.TxnState.ABORTED
- , txnBatch.getCurrentTransactionState());
- ++batch;
- }
- Assert.assertEquals(0, txnBatch.remainingTransactions());
- txnBatch.close();
- Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
- , txnBatch.getCurrentTransactionState());
- connection.close();
- }
- @Test
- public void testTransactionBatchAbort() throws Exception {
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
- partitionVals);
- StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
- DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
- TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- txnBatch.write("1,Hello streaming".getBytes());
- txnBatch.write("2,Welcome to streaming".getBytes());
- txnBatch.abort();
- checkNothingWritten(partLoc);
- Assert.assertEquals(TransactionBatch.TxnState.ABORTED
- , txnBatch.getCurrentTransactionState());
- txnBatch.close();
- connection.close();
- checkNothingWritten(partLoc);
- }
- @Test
- public void testTransactionBatchAbortAndCommit() throws Exception {
- String agentInfo = "UT_" + Thread.currentThread().getName();
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
- partitionVals);
- StreamingConnection connection = endPt.newConnection(false, agentInfo);
- DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
- TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
- txnBatch.beginNextTransaction();
- txnBatch.write("1,Hello streaming".getBytes());
- txnBatch.write("2,Welcome to streaming".getBytes());
- ShowLocksResponse resp = msClient.showLocks(new ShowLocksRequest());
- Assert.assertEquals("LockCount", 1, resp.getLocksSize());
- Assert.assertEquals("LockType", LockType.SHARED_READ, resp.getLocks().get(0).getType());
- Assert.assertEquals("LockState", LockState.ACQUIRED, resp.getLocks().get(0).getState());
- Assert.assertEquals("AgentInfo", agentInfo, resp.getLocks().get(0).getAgentInfo());
- txnBatch.abort();
- checkNothingWritten(partLoc);
- Assert.assertEquals(TransactionBatch.TxnState.ABORTED
- , txnBatch.getCurrentTransactionState());
- txnBatch.beginNextTransaction();
- txnBatch.write("1,Hello streaming".getBytes());
- txnBatch.write("2,Welcome to streaming".getBytes());
- txnBatch.commit();
- checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
- "{2, Welcome to streaming}");
- txnBatch.close();
- connection.close();
- }
- @Test
- public void testMultipleTransactionBatchCommits() throws Exception {
- HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
- partitionVals);
- DelimitedInputWriter writer = new Delimit…
Large files files are truncated, but you can click here to view the full file