TestStreaming.java | searchcode

/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java

http://github.com/apache/hive · Java · 2373 lines · 1893 code · 321 blank · 159 comment · 148 complexity · 0b89a080b86042fa3282259deeab8106 MD5 · raw file
Large files are truncated click here to view the full file

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hive.hcatalog.streaming;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.cli.CliSessionState;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.Validator;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
import org.apache.hadoop.hive.metastore.api.LockState;
import org.apache.hadoop.hive.metastore.api.LockType;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
import org.apache.hadoop.hive.metastore.api.TableValidWriteIds;
import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
import org.apache.hadoop.hive.metastore.api.TxnInfo;
import org.apache.hadoop.hive.metastore.api.TxnState;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.txn.AcidHouseKeeperService;
import org.apache.hadoop.hive.metastore.txn.TxnCommonUtils;
import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
import org.apache.hadoop.hive.metastore.txn.TxnStore;
import org.apache.hadoop.hive.metastore.txn.TxnUtils;
import org.apache.hadoop.hive.ql.DriverFactory;
import org.apache.hadoop.hive.ql.IDriver;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.BucketCodec;
import org.apache.hadoop.hive.ql.io.IOConstants;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
import org.apache.hadoop.hive.ql.io.orc.Reader;
import org.apache.hadoop.hive.ql.io.orc.RecordReader;
import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.txn.compactor.Worker;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.orc.impl.OrcAcidUtils;
import org.apache.orc.tools.FileDump;
import org.apache.thrift.TException;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_COUNT;


public class TestStreaming {
  private static final Logger LOG = LoggerFactory.getLogger(TestStreaming.class);

  public static class RawFileSystem extends RawLocalFileSystem {
    private static final URI NAME;
    static {
      try {
        NAME = new URI("raw:///");
      } catch (URISyntaxException se) {
        throw new IllegalArgumentException("bad uri", se);
      }
    }

    @Override
    public URI getUri() {
      return NAME;
    }

    @Override
    public String getScheme() {
      return "raw";
    }

    @Override
    public FileStatus getFileStatus(Path path) throws IOException {
      File file = pathToFile(path);
      if (!file.exists()) {
        throw new FileNotFoundException("Can't find " + path);
      }
      // get close enough
      short mod = 0;
      if (file.canRead()) {
        mod |= 0444;
      }
      if (file.canWrite()) {
        mod |= 0200;
      }
      if (file.canExecute()) {
        mod |= 0111;
      }
      return new FileStatus(file.length(), file.isDirectory(), 1, 1024,
          file.lastModified(), file.lastModified(),
          FsPermission.createImmutable(mod), "owen", "users", path);
    }
  }

  private static final String COL1 = "id";
  private static final String COL2 = "msg";

  private final HiveConf conf;
  private IDriver driver;
  private final IMetaStoreClient msClient;

  final String metaStoreURI = null;

  // partitioned table
  private final static String dbName = "testing";
  private final static String tblName = "alerts";
  private final static String[] fieldNames = new String[]{COL1,COL2};
  List<String> partitionVals;
  private static Path partLoc;
  private static Path partLoc2;

  // unpartitioned table
  private final static String dbName2 = "testing2";
  private final static String tblName2 = "alerts";
  private final static String[] fieldNames2 = new String[]{COL1,COL2};


  // for bucket join testing
  private final static String dbName3 = "testing3";
  private final static String tblName3 = "dimensionTable";
  private final static String dbName4 = "testing4";
  private final static String tblName4 = "factTable";
  List<String> partitionVals2;


  private final String PART1_CONTINENT = "Asia";
  private final String PART1_COUNTRY = "India";

  @Rule
  public TemporaryFolder dbFolder = new TemporaryFolder();


  public TestStreaming() throws Exception {
    partitionVals = new ArrayList<String>(2);
    partitionVals.add(PART1_CONTINENT);
    partitionVals.add(PART1_COUNTRY);

    partitionVals2 = new ArrayList<String>(1);
    partitionVals2.add(PART1_COUNTRY);


    conf = new HiveConf(this.getClass());
    conf.set("fs.raw.impl", RawFileSystem.class.getName());
    conf
    .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
        "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
    TxnDbUtil.setConfValues(conf);
    if (metaStoreURI!=null) {
      conf.setVar(HiveConf.ConfVars.METASTOREURIS, metaStoreURI);
    }
    conf.setBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI, true);
    conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, true);
    dbFolder.create();


    //1) Start from a clean slate (metastore)
    TxnDbUtil.cleanDb(conf);
    TxnDbUtil.prepDb(conf);

    //2) obtain metastore clients
    msClient = new HiveMetaStoreClient(conf);
  }

  @Before
  public void setup() throws Exception {
    SessionState.start(new CliSessionState(conf));
    driver = DriverFactory.newDriver(conf);
    driver.setMaxRows(200002);//make sure Driver returns all results
    // drop and recreate the necessary databases and tables
    dropDB(msClient, dbName);

    String[] colNames = new String[] {COL1, COL2};
    String[] colTypes = new String[] {serdeConstants.INT_TYPE_NAME, serdeConstants.STRING_TYPE_NAME};
    String[] bucketCols = new String[] {COL1};
    String loc1 = dbFolder.newFolder(dbName + ".db").toString();
    String[] partNames = new String[]{"Continent", "Country"};
    partLoc = createDbAndTable(driver, dbName, tblName, partitionVals, colNames, colTypes, bucketCols, partNames, loc1, 1);

    dropDB(msClient, dbName2);
    String loc2 = dbFolder.newFolder(dbName2 + ".db").toString();
    partLoc2 = createDbAndTable(driver, dbName2, tblName2, null, colNames, colTypes, bucketCols, null, loc2, 2);

    String loc3 = dbFolder.newFolder("testing5.db").toString();
    createStoreSales("testing5", loc3);

    runDDL(driver, "drop table testBucketing3.streamedtable");
    runDDL(driver, "drop table testBucketing3.finaltable");
    runDDL(driver, "drop table testBucketing3.nobucket");
  }

  @After
  public void cleanup() throws Exception {
    msClient.close();
    driver.close();
  }

  private static List<FieldSchema> getPartitionKeys() {
    List<FieldSchema> fields = new ArrayList<FieldSchema>();
    // Defining partition names in unsorted order
    fields.add(new FieldSchema("continent", serdeConstants.STRING_TYPE_NAME, ""));
    fields.add(new FieldSchema("country", serdeConstants.STRING_TYPE_NAME, ""));
    return fields;
  }

  private void createStoreSales(String dbName, String loc) throws Exception {
    String dbUri = "raw://" + new Path(loc).toUri().toString();
    String tableLoc = dbUri + Path.SEPARATOR + "store_sales";

    boolean success = runDDL(driver, "create database IF NOT EXISTS " + dbName + " location '" + dbUri + "'");
    Assert.assertTrue(success);
    success = runDDL(driver, "use " + dbName);
    Assert.assertTrue(success);

    success = runDDL(driver, "drop table if exists store_sales");
    Assert.assertTrue(success);
    success = runDDL(driver, "create table store_sales\n" +
      "(\n" +
      "    ss_sold_date_sk           int,\n" +
      "    ss_sold_time_sk           int,\n" +
      "    ss_item_sk                int,\n" +
      "    ss_customer_sk            int,\n" +
      "    ss_cdemo_sk               int,\n" +
      "    ss_hdemo_sk               int,\n" +
      "    ss_addr_sk                int,\n" +
      "    ss_store_sk               int,\n" +
      "    ss_promo_sk               int,\n" +
      "    ss_ticket_number          int,\n" +
      "    ss_quantity               int,\n" +
      "    ss_wholesale_cost         decimal(7,2),\n" +
      "    ss_list_price             decimal(7,2),\n" +
      "    ss_sales_price            decimal(7,2),\n" +
      "    ss_ext_discount_amt       decimal(7,2),\n" +
      "    ss_ext_sales_price        decimal(7,2),\n" +
      "    ss_ext_wholesale_cost     decimal(7,2),\n" +
      "    ss_ext_list_price         decimal(7,2),\n" +
      "    ss_ext_tax                decimal(7,2),\n" +
      "    ss_coupon_amt             decimal(7,2),\n" +
      "    ss_net_paid               decimal(7,2),\n" +
      "    ss_net_paid_inc_tax       decimal(7,2),\n" +
      "    ss_net_profit             decimal(7,2)\n" +
      ")\n" +
      " partitioned by (dt string)\n" +
      "clustered by (ss_store_sk, ss_promo_sk)\n" +
      "INTO 4 BUCKETS stored as orc " + " location '" + tableLoc +  "'" + "  TBLPROPERTIES ('orc.compress'='NONE', 'transactional'='true')");
    Assert.assertTrue(success);

    success = runDDL(driver, "alter table store_sales add partition(dt='2015')");
    Assert.assertTrue(success);
  }
  /**
   * make sure it works with table where bucket col is not 1st col
   * @throws Exception
   */
  @Test
  public void testBucketingWhereBucketColIsNotFirstCol() throws Exception {
    List<String> partitionVals = new ArrayList<String>();
    partitionVals.add("2015");
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testing5", "store_sales", partitionVals);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] {"ss_sold_date_sk","ss_sold_time_sk", "ss_item_sk",
      "ss_customer_sk", "ss_cdemo_sk", "ss_hdemo_sk", "ss_addr_sk", "ss_store_sk", "ss_promo_sk", "ss_ticket_number", "ss_quantity",
      "ss_wholesale_cost", "ss_list_price", "ss_sales_price", "ss_ext_discount_amt", "ss_ext_sales_price", "ss_ext_wholesale_cost",
      "ss_ext_list_price", "ss_ext_tax", "ss_coupon_amt", "ss_net_paid", "ss_net_paid_inc_tax", "ss_net_profit"},",", endPt, connection);

    TransactionBatch txnBatch =  connection.fetchTransactionBatch(2, writer);
    txnBatch.beginNextTransaction();

    StringBuilder row = new StringBuilder();
    for(int i = 0; i < 10; i++) {
      for(int ints = 0; ints < 11; ints++) {
        row.append(ints).append(',');
      }
      for(int decs = 0; decs < 12; decs++) {
        row.append(i + 0.1).append(',');
      }
      row.setLength(row.length() - 1);
      txnBatch.write(row.toString().getBytes());
    }
    txnBatch.commit();
    txnBatch.close();
    connection.close();

    ArrayList<String> res = queryTable(driver, "select row__id.bucketid, * from testing5.store_sales");
    for (String re : res) {
      System.out.println(re);
    }
  }

  /**
   * Test that streaming can write to unbucketed table.
   */
  @Test
  public void testNoBuckets() throws Exception {
    queryTable(driver, "drop table if exists default.streamingnobuckets");
    //todo: why does it need transactional_properties?
    queryTable(driver, "create table default.streamingnobuckets (a string, b string) stored as orc TBLPROPERTIES('transactional'='true', 'transactional_properties'='default')");
    queryTable(driver, "insert into default.streamingnobuckets values('foo','bar')");
    List<String> rs = queryTable(driver, "select * from default.streamingNoBuckets");
    Assert.assertEquals(1, rs.size());
    Assert.assertEquals("foo\tbar", rs.get(0));
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "Default", "StreamingNoBuckets", null);
    String[] colNames1 = new String[] { "a", "b" };
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    DelimitedInputWriter wr = new DelimitedInputWriter(colNames1,",",  endPt, connection);

    TransactionBatch txnBatch =  connection.fetchTransactionBatch(2, wr);
    txnBatch.beginNextTransaction();
    txnBatch.write("a1,b2".getBytes());
    txnBatch.write("a3,b4".getBytes());
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    ShowLocksResponse resp = txnHandler.showLocks(new ShowLocksRequest());
    Assert.assertEquals(resp.getLocksSize(), 1);
    Assert.assertEquals("streamingnobuckets", resp.getLocks().get(0).getTablename());
    Assert.assertEquals("default", resp.getLocks().get(0).getDbname());
    txnBatch.commit();
    txnBatch.beginNextTransaction();
    txnBatch.write("a5,b6".getBytes());
    txnBatch.write("a7,b8".getBytes());
    txnBatch.commit();
    txnBatch.close();

    Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
    rs = queryTable(driver,"select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");

    Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
    Assert.assertTrue(rs.get(0), rs.get(0).endsWith("streamingnobuckets/delta_0000001_0000001_0000/bucket_00000_0"));
    Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\ta1\tb2"));
    Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
    Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
    Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
    Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
    Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
    Assert.assertTrue(rs.get(4), rs.get(4).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\ta7\tb8"));
    Assert.assertTrue(rs.get(4), rs.get(4).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));

    queryTable(driver, "update default.streamingnobuckets set a=0, b=0 where a='a7'");
    queryTable(driver, "delete from default.streamingnobuckets where a='a1'");
    rs = queryTable(driver, "select a, b from default.streamingnobuckets order by a, b");
    int row = 0;
    Assert.assertEquals("at row=" + row, "0\t0", rs.get(row++));
    Assert.assertEquals("at row=" + row, "a3\tb4", rs.get(row++));
    Assert.assertEquals("at row=" + row, "a5\tb6", rs.get(row++));
    Assert.assertEquals("at row=" + row, "foo\tbar", rs.get(row++));

    queryTable(driver, "alter table default.streamingnobuckets compact 'major'");
    runWorker(conf);
    rs = queryTable(driver,"select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");

    Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
    Assert.assertTrue(rs.get(0), rs.get(0).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
    Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
    Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
    Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
    Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
    Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t0\t0"));
    Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
  }

  /**
   * this is a clone from TestTxnStatement2....
   */
  public static void runWorker(HiveConf hiveConf) throws Exception {
    AtomicBoolean stop = new AtomicBoolean(true);
    Worker t = new Worker();
    t.setThreadId((int) t.getId());
    t.setConf(hiveConf);
    AtomicBoolean looped = new AtomicBoolean();
    t.init(stop, looped);
    t.run();
  }

  // stream data into streaming table with N buckets, then copy the data into another bucketed table
  // check if bucketing in both was done in the same way
  @Test
  @Ignore
  public void testStreamBucketingMatchesRegularBucketing() throws Exception {
    int bucketCount = 100;

    String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
    String tableLoc  = "'" + dbUri + Path.SEPARATOR + "streamedtable" + "'";
    String tableLoc2 = "'" + dbUri + Path.SEPARATOR + "finaltable" + "'";
    String tableLoc3 = "'" + dbUri + Path.SEPARATOR + "nobucket" + "'";
    try (IDriver driver = DriverFactory.newDriver(conf)) {
      runDDL(driver, "create database testBucketing3");
      runDDL(driver, "use testBucketing3");
      runDDL(driver, "create table streamedtable ( key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
        + bucketCount + " buckets  stored as orc  location " + tableLoc + " TBLPROPERTIES ('transactional'='true')");
      //  In 'nobucket' table we capture bucketid from streamedtable to workaround a hive bug that prevents joins two identically bucketed tables
      runDDL(driver, "create table nobucket ( bucketid int, key1 string,key2 int,data string ) location " + tableLoc3);
      runDDL(driver,
        "create table finaltable ( bucketid int, key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
          + bucketCount + " buckets  stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='true')");


      String[] records = new String[]{
        "PSFAHYLZVC,29,EPNMA",
        "PPPRKWAYAU,96,VUTEE",
        "MIAOFERCHI,3,WBDSI",
        "CEGQAZOWVN,0,WCUZL",
        "XWAKMNSVQF,28,YJVHU",
        "XBWTSAJWME,2,KDQFO",
        "FUVLQTAXAY,5,LDSDG",
        "QTQMDJMGJH,6,QBOMA",
        "EFLOTLWJWN,71,GHWPS",
        "PEQNAOJHCM,82,CAAFI",
        "MOEKQLGZCP,41,RUACR",
        "QZXMCOPTID,37,LFLWE",
        "EYALVWICRD,13,JEZLC",
        "VYWLZAYTXX,16,DMVZX",
        "OSALYSQIXR,47,HNZVE",
        "JGKVHKCEGQ,25,KSCJB",
        "WQFMMYDHET,12,DTRWA",
        "AJOVAYZKZQ,15,YBKFO",
        "YAQONWCUAU,31,QJNHZ",
        "DJBXUEUOEB,35,IYCBL"
      };

      HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "streamedtable", null);
      String[] colNames1 = new String[]{"key1", "key2", "data"};
      DelimitedInputWriter wr = new DelimitedInputWriter(colNames1, ",", endPt);
      StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());

      TransactionBatch txnBatch = connection.fetchTransactionBatch(2, wr);
      txnBatch.beginNextTransaction();

      for (String record : records) {
        txnBatch.write(record.toString().getBytes());
      }

      txnBatch.commit();
      txnBatch.close();
      connection.close();

      ArrayList<String> res1 = queryTable(driver, "select row__id.bucketid, * from streamedtable order by key2");
      for (String re : res1) {
        System.out.println(re);
      }

      driver.run("insert into nobucket select row__id.bucketid,* from streamedtable");
      runDDL(driver, " insert into finaltable select * from nobucket");
      ArrayList<String> res2 = queryTable(driver,
        "select row__id.bucketid,* from finaltable where row__id.bucketid<>bucketid");
      for (String s : res2) {
        LOG.error(s);
      }
      Assert.assertTrue(res2.isEmpty());
    } finally {
      conf.unset(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname);
    }
  }


  @Test
  public void testTableValidation() throws Exception {
    int bucketCount = 100;

    String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
    String tbl1 = "validation1";
    String tbl2 = "validation2";

    String tableLoc  = "'" + dbUri + Path.SEPARATOR + tbl1 + "'";
    String tableLoc2 = "'" + dbUri + Path.SEPARATOR + tbl2 + "'";

    runDDL(driver, "create database testBucketing3");
    runDDL(driver, "use testBucketing3");

    runDDL(driver, "create table " + tbl1 + " ( key1 string, data string ) clustered by ( key1 ) into "
            + bucketCount + " buckets  stored as orc  location " + tableLoc + " TBLPROPERTIES ('transactional'='false')") ;

    runDDL(driver, "create table " + tbl2 + " ( key1 string, data string ) clustered by ( key1 ) into "
            + bucketCount + " buckets  stored as orc  location " + tableLoc2 + " TBLPROPERTIES ('transactional'='false')") ;


    try {
      HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation1", null);
      endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
      Assert.assertTrue("InvalidTable exception was not thrown", false);
    } catch (InvalidTable e) {
      // expecting this exception
    }
    try {
      HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation2", null);
      endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
      Assert.assertTrue("InvalidTable exception was not thrown", false);
    } catch (InvalidTable e) {
      // expecting this exception
    }
  }

  /**
   * @deprecated use {@link #checkDataWritten2(Path, long, long, int, String, boolean, String...)} -
   * there is little value in using InputFormat directly
   */
  @Deprecated
  private void checkDataWritten(Path partitionPath, long minTxn, long maxTxn, int buckets, int numExpectedFiles,
                                String... records) throws Exception {
    ValidWriteIdList writeIds = getTransactionContext(conf);
    AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, writeIds, null, false, null, false);
    Assert.assertEquals(0, dir.getObsolete().size());
    Assert.assertEquals(0, dir.getOriginalFiles().size());
    List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
    System.out.println("Files found: ");
    for (AcidUtils.ParsedDelta pd : current) {
      System.out.println(pd.getPath().toString());
    }
    Assert.assertEquals(numExpectedFiles, current.size());

    // find the absolute minimum transaction
    long min = Long.MAX_VALUE;
    long max = Long.MIN_VALUE;
    for (AcidUtils.ParsedDelta pd : current) {
      if (pd.getMaxWriteId() > max) {
        max = pd.getMaxWriteId();
      }
      if (pd.getMinWriteId() < min) {
        min = pd.getMinWriteId();
      }
    }
    Assert.assertEquals(minTxn, min);
    Assert.assertEquals(maxTxn, max);

    InputFormat inf = new OrcInputFormat();
    JobConf job = new JobConf();
    job.set("mapred.input.dir", partitionPath.toString());
    job.set(BUCKET_COUNT, Integer.toString(buckets));
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
    AcidUtils.setAcidOperationalProperties(job, true, null);
    job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());
    job.set(ValidTxnList.VALID_TXNS_KEY, conf.get(ValidTxnList.VALID_TXNS_KEY));
    InputSplit[] splits = inf.getSplits(job, buckets);
    Assert.assertEquals(numExpectedFiles, splits.length);
    org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr =
            inf.getRecordReader(splits[0], job, Reporter.NULL);

    NullWritable key = rr.createKey();
    OrcStruct value = rr.createValue();
    for (String record : records) {
      Assert.assertEquals(true, rr.next(key, value));
      Assert.assertEquals(record, value.toString());
    }
    Assert.assertEquals(false, rr.next(key, value));
  }
  /**
   * @param validationQuery query to read from table to compare data against {@code records}
   * @param records expected data.  each row is CVS list of values
   */
  private void checkDataWritten2(Path partitionPath, long minTxn, long maxTxn, int numExpectedFiles,
                                String validationQuery, boolean vectorize, String... records) throws Exception {
    AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, getTransactionContext(conf), null,
        false, null, false);
    Assert.assertEquals(0, dir.getObsolete().size());
    Assert.assertEquals(0, dir.getOriginalFiles().size());
    List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
    System.out.println("Files found: ");
    for (AcidUtils.ParsedDelta pd : current) {
      System.out.println(pd.getPath().toString());
    }
    Assert.assertEquals(numExpectedFiles, current.size());

    // find the absolute minimum transaction
    long min = Long.MAX_VALUE;
    long max = Long.MIN_VALUE;
    for (AcidUtils.ParsedDelta pd : current) {
      if (pd.getMaxWriteId() > max) {
        max = pd.getMaxWriteId();
      }
      if (pd.getMinWriteId() < min) {
        min = pd.getMinWriteId();
      }
    }
    Assert.assertEquals(minTxn, min);
    Assert.assertEquals(maxTxn, max);
    boolean isVectorizationEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
    if(vectorize) {
      conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
    }

    String currStrategy = conf.getVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY);
    for(String strategy : ((Validator.StringSet)HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.getValidator()).getExpected()) {
      //run it with each split strategy - make sure there are differences
      conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, strategy.toUpperCase());
      List<String> actualResult = queryTable(driver, validationQuery);
      for (int i = 0; i < actualResult.size(); i++) {
        Assert.assertEquals("diff at [" + i + "].  actual=" + actualResult + " expected=" +
          Arrays.toString(records), records[i], actualResult.get(i));
      }
    }
    conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, currStrategy);
    conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, isVectorizationEnabled);
  }

  private ValidWriteIdList getTransactionContext(Configuration conf) throws Exception {
    ValidTxnList validTxnList = msClient.getValidTxns();
    conf.set(ValidTxnList.VALID_TXNS_KEY, validTxnList.writeToString());
    List<TableValidWriteIds> v = msClient.getValidWriteIds(Collections
        .singletonList(TableName.getDbTable(dbName, tblName)), validTxnList.writeToString());
    return TxnCommonUtils.createValidReaderWriteIdList(v.get(0));
  }
  private void checkNothingWritten(Path partitionPath) throws Exception {
    AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, getTransactionContext(conf), null,
        false, null, false);
    Assert.assertEquals(0, dir.getObsolete().size());
    Assert.assertEquals(0, dir.getOriginalFiles().size());
    List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
    Assert.assertEquals(0, current.size());
  }

  @Test
  public void testEndpointConnection() throws Exception {
    // For partitioned table, partitionVals are specified
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); //shouldn't throw
    connection.close();

    // For unpartitioned table, partitionVals are not specified
    endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
    endPt.newConnection(false, "UT_" + Thread.currentThread().getName()).close(); // should not throw

    // For partitioned table, partitionVals are not specified
    try {
      endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, null);
      connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
      Assert.assertTrue("ConnectionError was not thrown", false);
      connection.close();
    } catch (ConnectionError e) {
      // expecting this exception
      String errMsg = "doesn't specify any partitions for partitioned table";
      Assert.assertTrue(e.toString().endsWith(errMsg));
    }

    // For unpartitioned table, partition values are specified
    try {
      endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, partitionVals);
      connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
      Assert.assertTrue("ConnectionError was not thrown", false);
      connection.close();
    } catch (ConnectionError e) {
      // expecting this exception
      String errMsg = "specifies partitions for unpartitioned table";
      Assert.assertTrue(e.toString().endsWith(errMsg));
    }
  }

  @Test
  public void testAddPartition() throws Exception {
    List<String> newPartVals = new ArrayList<String>(2);
    newPartVals.add(PART1_CONTINENT);
    newPartVals.add("Nepal");

    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName
            , newPartVals);

    // Ensure partition is absent
    try {
      msClient.getPartition(endPt.database, endPt.table, endPt.partitionVals);
      Assert.assertTrue("Partition already exists", false);
    } catch (NoSuchObjectException e) {
      // expect this exception
    }

    // Create partition
    Assert.assertNotNull(endPt.newConnection(true, "UT_" + Thread.currentThread().getName()));

    // Ensure partition is present
    Partition p = msClient.getPartition(endPt.database, endPt.table, endPt.partitionVals);
    Assert.assertNotNull("Did not find added partition", p);
  }

  @Test
  public void testTransactionBatchEmptyCommit() throws Exception {
    // 1)  to partitioned table
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
            partitionVals);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);

    TransactionBatch txnBatch =  connection.fetchTransactionBatch(10, writer);

    txnBatch.beginNextTransaction();
    txnBatch.commit();
    Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
            , txnBatch.getCurrentTransactionState());
    txnBatch.close();
    connection.close();

    // 2) To unpartitioned table
    endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
    writer = new DelimitedInputWriter(fieldNames2,",", endPt);
    connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());

    txnBatch =  connection.fetchTransactionBatch(10, writer);
    txnBatch.beginNextTransaction();
    txnBatch.commit();
    Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
            , txnBatch.getCurrentTransactionState());
    txnBatch.close();
    connection.close();
  }

  /**
   * check that transactions that have not heartbeated and timedout get properly aborted
   * @throws Exception
   */
  @Test
  public void testTimeOutReaper() throws Exception {
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2,",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());

    TransactionBatch txnBatch =  connection.fetchTransactionBatch(5, writer);
    txnBatch.beginNextTransaction();
    conf.setTimeVar(HiveConf.ConfVars.HIVE_TIMEDOUT_TXN_REAPER_START, 0, TimeUnit.SECONDS);
    //ensure txn timesout
    conf.setTimeVar(HiveConf.ConfVars.HIVE_TXN_TIMEOUT, 1, TimeUnit.MILLISECONDS);
    AcidHouseKeeperService houseKeeperService = new AcidHouseKeeperService();
    houseKeeperService.setConf(conf);
    houseKeeperService.run();
    try {
      //should fail because the TransactionBatch timed out
      txnBatch.commit();
    }
    catch(TransactionError e) {
      Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
    }
    txnBatch.close();
    txnBatch =  connection.fetchTransactionBatch(10, writer);
    txnBatch.beginNextTransaction();
    txnBatch.commit();
    txnBatch.beginNextTransaction();
    houseKeeperService.run();
    try {
      //should fail because the TransactionBatch timed out
      txnBatch.commit();
    }
    catch(TransactionError e) {
      Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
    }
    txnBatch.close();
    connection.close();
  }

  @Test
  public void testHeartbeat() throws Exception {
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2,",", endPt, connection);

    TransactionBatch txnBatch =  connection.fetchTransactionBatch(5, writer);
    txnBatch.beginNextTransaction();
    //todo: this should ideally check Transaction heartbeat as well, but heartbeat
    //timestamp is not reported yet
    //GetOpenTxnsInfoResponse txnresp = msClient.showTxns();
    ShowLocksRequest request = new ShowLocksRequest();
    request.setDbname(dbName2);
    request.setTablename(tblName2);
    ShowLocksResponse response = msClient.showLocks(request);
    Assert.assertEquals("Wrong nubmer of locks: " + response, 1, response.getLocks().size());
    ShowLocksResponseElement lock = response.getLocks().get(0);
    long acquiredAt = lock.getAcquiredat();
    long heartbeatAt = lock.getLastheartbeat();
    txnBatch.heartbeat();
    response = msClient.showLocks(request);
    Assert.assertEquals("Wrong number of locks2: " + response, 1, response.getLocks().size());
    lock = response.getLocks().get(0);
    Assert.assertEquals("Acquired timestamp didn't match", acquiredAt, lock.getAcquiredat());
    Assert.assertTrue("Expected new heartbeat (" + lock.getLastheartbeat() +
      ") == old heartbeat(" + heartbeatAt +")", lock.getLastheartbeat() == heartbeatAt);
    txnBatch.close();
    int txnBatchSize = 200;
    txnBatch = connection.fetchTransactionBatch(txnBatchSize, writer);
    for(int i = 0; i < txnBatchSize; i++) {
      txnBatch.beginNextTransaction();
      if(i % 47 == 0) {
        txnBatch.heartbeat();
      }
      if(i % 10 == 0) {
        txnBatch.abort();
      }
      else {
        txnBatch.commit();
      }
      if(i % 37 == 0) {
        txnBatch.heartbeat();
      }
    }

  }
  @Test
  public void testTransactionBatchEmptyAbort() throws Exception {
    // 1) to partitioned table
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
            partitionVals);
    StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
    DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);

    TransactionBatch txnBatch =  connection.fetchTransactionBatch(10, writer);
    txnBatch.beginNextTransaction();
    txnBatch.abort();
    Assert.assertEquals(TransactionBatch.TxnState.ABORTED
            , txnBatch.getCurrentTransactionState());
    txnBatch.close();
    connection.close();

    // 2) to unpartitioned table
    endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
    writer = new DelimitedInputWriter(fieldNames,",", endPt);
    connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());

    txnBatch =  connection.fetchTransactionBatch(10, writer);
    txnBatch.beginNextTransaction();
    txnBatch.abort();
    Assert.assertEquals(TransactionBatch.TxnState.ABORTED
            , txnBatch.getCurrentTransactionState());
    txnBatch.close();
    connection.close();
  }

  @Test
  public void testTransactionBatchCommit_Delimited() throws Exception {
    testTransactionBatchCommit_Delimited(null);
  }
  @Test
  public void testTransactionBatchCommit_DelimitedUGI() throws Exception {
    testTransactionBatchCommit_Delimited(Utils.getUGI());
  }
  private void testTransactionBatchCommit_Delimited(UserGroupInformation ugi) throws Exception {
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
      partitionVals);
    StreamingConnection connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
    DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, conf, connection);

    // 1st Txn
    TransactionBatch txnBatch =  connection.fetchTransactionBatch(10, writer);
    txnBatch.beginNextTransaction();
    Assert.assertEquals(TransactionBatch.TxnState.OPEN
      , txnBatch.getCurrentTransactionState());
    txnBatch.write("1,Hello streaming".getBytes());
    txnBatch.commit();

    checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");

    Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
      , txnBatch.getCurrentTransactionState());

    // 2nd Txn
    txnBatch.beginNextTransaction();
    Assert.assertEquals(TransactionBatch.TxnState.OPEN
      , txnBatch.getCurrentTransactionState());
    txnBatch.write("2,Welcome to streaming".getBytes());

    // data should not be visible
    checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");

    txnBatch.commit();

    checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
      "{2, Welcome to streaming}");

    txnBatch.close();
    Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
      , txnBatch.getCurrentTransactionState());


    connection.close();


    // To Unpartitioned table
    endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
    connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
    writer = new DelimitedInputWriter(fieldNames,",", endPt, conf, connection);

    // 1st Txn
    txnBatch =  connection.fetchTransactionBatch(10, writer);
    txnBatch.beginNextTransaction();
    Assert.assertEquals(TransactionBatch.TxnState.OPEN
      , txnBatch.getCurrentTransactionState());
    txnBatch.write("1,Hello streaming".getBytes());
    txnBatch.commit();

    Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
      , txnBatch.getCurrentTransactionState());
    connection.close();
  }

  @Test
  public void testTransactionBatchCommit_Regex() throws Exception {
    testTransactionBatchCommit_Regex(null);
  }
  @Test
  public void testTransactionBatchCommit_RegexUGI() throws Exception {
    testTransactionBatchCommit_Regex(Utils.getUGI());
  }
  private void testTransactionBatchCommit_Regex(UserGroupInformation ugi) throws Exception {
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
      partitionVals);
    StreamingConnection connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
    String regex = "([^,]*),(.*)";
    StrictRegexWriter writer = new StrictRegexWriter(regex, endPt, conf, connection);

    // 1st Txn
    TransactionBatch txnBatch =  connection.fetchTransactionBatch(10, writer);
    txnBatch.beginNextTransaction();
    Assert.assertEquals(TransactionBatch.TxnState.OPEN
      , txnBatch.getCurrentTransactionState());
    txnBatch.write("1,Hello streaming".getBytes());
    txnBatch.commit();

    checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");

    Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
      , txnBatch.getCurrentTransactionState());

    // 2nd Txn
    txnBatch.beginNextTransaction();
    Assert.assertEquals(TransactionBatch.TxnState.OPEN
      , txnBatch.getCurrentTransactionState());
    txnBatch.write("2,Welcome to streaming".getBytes());

    // data should not be visible
    checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");

    txnBatch.commit();

    checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
      "{2, Welcome to streaming}");

    txnBatch.close();
    Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
      , txnBatch.getCurrentTransactionState());


    connection.close();


    // To Unpartitioned table
    endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
    connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
    regex = "([^:]*):(.*)";
    writer = new StrictRegexWriter(regex, endPt, conf, connection);

    // 1st Txn
    txnBatch =  connection.fetchTransactionBatch(10, writer);
    txnBatch.beginNextTransaction();
    Assert.assertEquals(TransactionBatch.TxnState.OPEN
      , txnBatch.getCurrentTransactionState());
    txnBatch.write("1:Hello streaming".getBytes());
    txnBatch.commit();

    Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
      , txnBatch.getCurrentTransactionState());
    connection.close();
  }

  @Test
  public void testTransactionBatchCommit_Json() throws Exception {
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
            partitionVals);
    StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
    StrictJsonWriter writer = new StrictJsonWriter(endPt, connection);

    // 1st Txn
    TransactionBatch txnBatch =  connection.fetchTransactionBatch(10, writer);
    txnBatch.beginNextTransaction();
    Assert.assertEquals(TransactionBatch.TxnState.OPEN
            , txnBatch.getCurrentTransactionState());
    String rec1 = "{\"id\" : 1, \"msg\": \"Hello streaming\"}";
    txnBatch.write(rec1.getBytes());
    txnBatch.commit();

    checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");

    Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
            , txnBatch.getCurrentTransactionState());

    txnBatch.close();
    Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
            , txnBatch.getCurrentTransactionState());

    connection.close();
    List<String> rs = queryTable(driver, "select * from " + dbName + "." + tblName);
    Assert.assertEquals(1, rs.size());
  }

  @Test
  public void testRemainingTransactions() throws Exception {
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
            partitionVals);
    DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt);
    StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());

    // 1) test with txn.Commit()
    TransactionBatch txnBatch =  connection.fetchTransactionBatch(10, writer);
    int batch=0;
    int initialCount = txnBatch.remainingTransactions();
    while (txnBatch.remainingTransactions()>0) {
      txnBatch.beginNextTransaction();
      Assert.assertEquals(--initialCount, txnBatch.remainingTransactions());
      for (int rec=0; rec<2; ++rec) {
        Assert.assertEquals(TransactionBatch.TxnState.OPEN
                , txnBatch.getCurrentTransactionState());
        txnBatch.write((batch * rec + ",Hello streaming").getBytes());
      }
      txnBatch.commit();
      Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
              , txnBatch.getCurrentTransactionState());
      ++batch;
    }
    Assert.assertEquals(0, txnBatch.remainingTransactions());
    txnBatch.close();

    Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
            , txnBatch.getCurrentTransactionState());

    // 2) test with txn.Abort()
    txnBatch =  connection.fetchTransactionBatch(10, writer);
    batch=0;
    initialCount = txnBatch.remainingTransactions();
    while (txnBatch.remainingTransactions()>0) {
      txnBatch.beginNextTransaction();
      Assert.assertEquals(--initialCount,txnBatch.remainingTransactions());
      for (int rec=0; rec<2; ++rec) {
        Assert.assertEquals(TransactionBatch.TxnState.OPEN
                , txnBatch.getCurrentTransactionState());
        txnBatch.write((batch * rec + ",Hello streaming").getBytes());
      }
      txnBatch.abort();
      Assert.assertEquals(TransactionBatch.TxnState.ABORTED
              , txnBatch.getCurrentTransactionState());
      ++batch;
    }
    Assert.assertEquals(0, txnBatch.remainingTransactions());
    txnBatch.close();

    Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
            , txnBatch.getCurrentTransactionState());

    connection.close();
  }

  @Test
  public void testTransactionBatchAbort() throws Exception {

    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
            partitionVals);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);


    TransactionBatch txnBatch =  connection.fetchTransactionBatch(10, writer);
    txnBatch.beginNextTransaction();
    txnBatch.write("1,Hello streaming".getBytes());
    txnBatch.write("2,Welcome to streaming".getBytes());
    txnBatch.abort();

    checkNothingWritten(partLoc);

    Assert.assertEquals(TransactionBatch.TxnState.ABORTED
            , txnBatch.getCurrentTransactionState());

    txnBatch.close();
    connection.close();

    checkNothingWritten(partLoc);

  }


  @Test
  public void testTransactionBatchAbortAndCommit() throws Exception {
    String agentInfo = "UT_" + Thread.currentThread().getName();
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
            partitionVals);
    StreamingConnection connection = endPt.newConnection(false, agentInfo);
    DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);

    TransactionBatch txnBatch =  connection.fetchTransactionBatch(10, writer);
    txnBatch.beginNextTransaction();
    txnBatch.write("1,Hello streaming".getBytes());
    txnBatch.write("2,Welcome to streaming".getBytes());
    ShowLocksResponse resp = msClient.showLocks(new ShowLocksRequest());
    Assert.assertEquals("LockCount", 1, resp.getLocksSize());
    Assert.assertEquals("LockType", LockType.SHARED_READ, resp.getLocks().get(0).getType());
    Assert.assertEquals("LockState", LockState.ACQUIRED, resp.getLocks().get(0).getState());
    Assert.assertEquals("AgentInfo", agentInfo, resp.getLocks().get(0).getAgentInfo());
    txnBatch.abort();

    checkNothingWritten(partLoc);

    Assert.assertEquals(TransactionBatch.TxnState.ABORTED
            , txnBatch.getCurrentTransactionState());

    txnBatch.beginNextTransaction();
    txnBatch.write("1,Hello streaming".getBytes());
    txnBatch.write("2,Welcome to streaming".getBytes());
    txnBatch.commit();

    checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
            "{2, Welcome to streaming}");

    txnBatch.close();
    connection.close();
  }

  @Test
  public void testMultipleTransactionBatchCommits() throws Exception {
    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
            partitionVals);
    DelimitedInputWriter writer = new Delimit…
Tech Fingerprint

Alerts (79)

'=' Maintainability Info: Avoid using unnamed 'magic' numbers directly in comparisons or assignments. Use named constants (static final variables) instead to improve readability and maintainability.
334 375 446 717 748 762 781 797 819 840 867 880 904 944 972 1013 1033 1063 1086 1120 1147
'<' Maintainability Info: Avoid using unnamed 'magic' numbers directly in comparisons or assignments. Use named constants (static final variables) instead to improve readability and maintainability.
338 339 342
'.close()' Manual .close() call detected. Prefer using try-with-resources (Java 7+) for automatic and safer resource management, especially handling exceptions during close.
349 350 389 684 688 695 707 754 755 767 768 796 809 810 839 872 873 885 886 930 935 953 998 1003 1022 1046 1050 1080 1103 1108 1131 1132 1171 1172
'System.out.println(' Use a logging framework (e.g., SLF4J, Log4j) for better control and configurability
354 571 573 625 627
'throws Exception' Declaring 'throws Exception' is too broad. Declare specific checked exceptions that the method might throw, allowing callers to handle them appropriately.
565 619 663
'List' Raw collection type used. Specify generic type arguments (e.g., List<String>, Map<Integer, Client>) for type safety and clarity. Avoid raw types unless interacting with legacy code.
566 663 664
'+' Performance Info: Using string concatenation ('+' or '+=') inside loops can be inefficient due to repeated String object creation. Use StringBuilder (or StringBuffer for thread-safety) instead.
655 1051 1060 1072 1095
'instanceof' Frequent 'instanceof' checks can indicate a need for better polymorphism (using overridden methods in subclasses) or visitor pattern. Consider if the design can be improved.
794 807
'==' Maintainability Info: Avoid using unnamed 'magic' numbers directly in comparisons or assignments. Use named constants (static final variables) instead to improve readability and maintainability.
844 847 853