PageRenderTime 57ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java

http://github.com/apache/hive
Java | 2373 lines | 1922 code | 302 blank | 149 comment | 141 complexity | 0b89a080b86042fa3282259deeab8106 MD5 | raw file
Possible License(s): Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hive.hcatalog.streaming;
  19. import java.io.ByteArrayOutputStream;
  20. import java.io.File;
  21. import java.io.FileFilter;
  22. import java.io.FileNotFoundException;
  23. import java.io.IOException;
  24. import java.io.PrintStream;
  25. import java.net.URI;
  26. import java.net.URISyntaxException;
  27. import java.nio.ByteBuffer;
  28. import java.util.ArrayList;
  29. import java.util.Arrays;
  30. import java.util.Collection;
  31. import java.util.Collections;
  32. import java.util.HashMap;
  33. import java.util.List;
  34. import java.util.Map;
  35. import java.util.concurrent.TimeUnit;
  36. import java.util.concurrent.atomic.AtomicBoolean;
  37. import org.apache.hadoop.conf.Configuration;
  38. import org.apache.hadoop.fs.FSDataInputStream;
  39. import org.apache.hadoop.fs.FSDataOutputStream;
  40. import org.apache.hadoop.fs.FileStatus;
  41. import org.apache.hadoop.fs.FileSystem;
  42. import org.apache.hadoop.fs.Path;
  43. import org.apache.hadoop.fs.RawLocalFileSystem;
  44. import org.apache.hadoop.fs.permission.FsPermission;
  45. import org.apache.hadoop.hive.cli.CliSessionState;
  46. import org.apache.hadoop.hive.common.JavaUtils;
  47. import org.apache.hadoop.hive.common.TableName;
  48. import org.apache.hadoop.hive.common.ValidTxnList;
  49. import org.apache.hadoop.hive.common.ValidWriteIdList;
  50. import org.apache.hadoop.hive.conf.HiveConf;
  51. import org.apache.hadoop.hive.conf.Validator;
  52. import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
  53. import org.apache.hadoop.hive.metastore.IMetaStoreClient;
  54. import org.apache.hadoop.hive.metastore.api.FieldSchema;
  55. import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
  56. import org.apache.hadoop.hive.metastore.api.LockState;
  57. import org.apache.hadoop.hive.metastore.api.LockType;
  58. import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
  59. import org.apache.hadoop.hive.metastore.api.Partition;
  60. import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
  61. import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
  62. import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
  63. import org.apache.hadoop.hive.metastore.api.TableValidWriteIds;
  64. import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
  65. import org.apache.hadoop.hive.metastore.api.TxnInfo;
  66. import org.apache.hadoop.hive.metastore.api.TxnState;
  67. import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
  68. import org.apache.hadoop.hive.metastore.txn.AcidHouseKeeperService;
  69. import org.apache.hadoop.hive.metastore.txn.TxnCommonUtils;
  70. import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
  71. import org.apache.hadoop.hive.metastore.txn.TxnStore;
  72. import org.apache.hadoop.hive.metastore.txn.TxnUtils;
  73. import org.apache.hadoop.hive.ql.DriverFactory;
  74. import org.apache.hadoop.hive.ql.IDriver;
  75. import org.apache.hadoop.hive.ql.io.AcidUtils;
  76. import org.apache.hadoop.hive.ql.io.BucketCodec;
  77. import org.apache.hadoop.hive.ql.io.IOConstants;
  78. import org.apache.hadoop.hive.ql.io.orc.OrcFile;
  79. import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
  80. import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
  81. import org.apache.hadoop.hive.ql.io.orc.Reader;
  82. import org.apache.hadoop.hive.ql.io.orc.RecordReader;
  83. import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
  84. import org.apache.hadoop.hive.ql.session.SessionState;
  85. import org.apache.hadoop.hive.ql.txn.compactor.Worker;
  86. import org.apache.hadoop.hive.serde.serdeConstants;
  87. import org.apache.hadoop.hive.serde2.objectinspector.StructField;
  88. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
  89. import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
  90. import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
  91. import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
  92. import org.apache.hadoop.hive.shims.Utils;
  93. import org.apache.hadoop.io.NullWritable;
  94. import org.apache.hadoop.mapred.InputFormat;
  95. import org.apache.hadoop.mapred.InputSplit;
  96. import org.apache.hadoop.mapred.JobConf;
  97. import org.apache.hadoop.mapred.Reporter;
  98. import org.apache.hadoop.security.UserGroupInformation;
  99. import org.apache.orc.impl.OrcAcidUtils;
  100. import org.apache.orc.tools.FileDump;
  101. import org.apache.thrift.TException;
  102. import org.junit.After;
  103. import org.junit.Assert;
  104. import org.junit.Before;
  105. import org.junit.Ignore;
  106. import org.junit.Rule;
  107. import org.junit.Test;
  108. import org.junit.rules.TemporaryFolder;
  109. import org.slf4j.Logger;
  110. import org.slf4j.LoggerFactory;
  111. import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_COUNT;
  112. public class TestStreaming {
  113. private static final Logger LOG = LoggerFactory.getLogger(TestStreaming.class);
  114. public static class RawFileSystem extends RawLocalFileSystem {
  115. private static final URI NAME;
  116. static {
  117. try {
  118. NAME = new URI("raw:///");
  119. } catch (URISyntaxException se) {
  120. throw new IllegalArgumentException("bad uri", se);
  121. }
  122. }
  123. @Override
  124. public URI getUri() {
  125. return NAME;
  126. }
  127. @Override
  128. public String getScheme() {
  129. return "raw";
  130. }
  131. @Override
  132. public FileStatus getFileStatus(Path path) throws IOException {
  133. File file = pathToFile(path);
  134. if (!file.exists()) {
  135. throw new FileNotFoundException("Can't find " + path);
  136. }
  137. // get close enough
  138. short mod = 0;
  139. if (file.canRead()) {
  140. mod |= 0444;
  141. }
  142. if (file.canWrite()) {
  143. mod |= 0200;
  144. }
  145. if (file.canExecute()) {
  146. mod |= 0111;
  147. }
  148. return new FileStatus(file.length(), file.isDirectory(), 1, 1024,
  149. file.lastModified(), file.lastModified(),
  150. FsPermission.createImmutable(mod), "owen", "users", path);
  151. }
  152. }
  153. private static final String COL1 = "id";
  154. private static final String COL2 = "msg";
  155. private final HiveConf conf;
  156. private IDriver driver;
  157. private final IMetaStoreClient msClient;
  158. final String metaStoreURI = null;
  159. // partitioned table
  160. private final static String dbName = "testing";
  161. private final static String tblName = "alerts";
  162. private final static String[] fieldNames = new String[]{COL1,COL2};
  163. List<String> partitionVals;
  164. private static Path partLoc;
  165. private static Path partLoc2;
  166. // unpartitioned table
  167. private final static String dbName2 = "testing2";
  168. private final static String tblName2 = "alerts";
  169. private final static String[] fieldNames2 = new String[]{COL1,COL2};
  170. // for bucket join testing
  171. private final static String dbName3 = "testing3";
  172. private final static String tblName3 = "dimensionTable";
  173. private final static String dbName4 = "testing4";
  174. private final static String tblName4 = "factTable";
  175. List<String> partitionVals2;
  176. private final String PART1_CONTINENT = "Asia";
  177. private final String PART1_COUNTRY = "India";
  178. @Rule
  179. public TemporaryFolder dbFolder = new TemporaryFolder();
  180. public TestStreaming() throws Exception {
  181. partitionVals = new ArrayList<String>(2);
  182. partitionVals.add(PART1_CONTINENT);
  183. partitionVals.add(PART1_COUNTRY);
  184. partitionVals2 = new ArrayList<String>(1);
  185. partitionVals2.add(PART1_COUNTRY);
  186. conf = new HiveConf(this.getClass());
  187. conf.set("fs.raw.impl", RawFileSystem.class.getName());
  188. conf
  189. .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
  190. "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
  191. TxnDbUtil.setConfValues(conf);
  192. if (metaStoreURI!=null) {
  193. conf.setVar(HiveConf.ConfVars.METASTOREURIS, metaStoreURI);
  194. }
  195. conf.setBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI, true);
  196. conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, true);
  197. dbFolder.create();
  198. //1) Start from a clean slate (metastore)
  199. TxnDbUtil.cleanDb(conf);
  200. TxnDbUtil.prepDb(conf);
  201. //2) obtain metastore clients
  202. msClient = new HiveMetaStoreClient(conf);
  203. }
  204. @Before
  205. public void setup() throws Exception {
  206. SessionState.start(new CliSessionState(conf));
  207. driver = DriverFactory.newDriver(conf);
  208. driver.setMaxRows(200002);//make sure Driver returns all results
  209. // drop and recreate the necessary databases and tables
  210. dropDB(msClient, dbName);
  211. String[] colNames = new String[] {COL1, COL2};
  212. String[] colTypes = new String[] {serdeConstants.INT_TYPE_NAME, serdeConstants.STRING_TYPE_NAME};
  213. String[] bucketCols = new String[] {COL1};
  214. String loc1 = dbFolder.newFolder(dbName + ".db").toString();
  215. String[] partNames = new String[]{"Continent", "Country"};
  216. partLoc = createDbAndTable(driver, dbName, tblName, partitionVals, colNames, colTypes, bucketCols, partNames, loc1, 1);
  217. dropDB(msClient, dbName2);
  218. String loc2 = dbFolder.newFolder(dbName2 + ".db").toString();
  219. partLoc2 = createDbAndTable(driver, dbName2, tblName2, null, colNames, colTypes, bucketCols, null, loc2, 2);
  220. String loc3 = dbFolder.newFolder("testing5.db").toString();
  221. createStoreSales("testing5", loc3);
  222. runDDL(driver, "drop table testBucketing3.streamedtable");
  223. runDDL(driver, "drop table testBucketing3.finaltable");
  224. runDDL(driver, "drop table testBucketing3.nobucket");
  225. }
  226. @After
  227. public void cleanup() throws Exception {
  228. msClient.close();
  229. driver.close();
  230. }
  231. private static List<FieldSchema> getPartitionKeys() {
  232. List<FieldSchema> fields = new ArrayList<FieldSchema>();
  233. // Defining partition names in unsorted order
  234. fields.add(new FieldSchema("continent", serdeConstants.STRING_TYPE_NAME, ""));
  235. fields.add(new FieldSchema("country", serdeConstants.STRING_TYPE_NAME, ""));
  236. return fields;
  237. }
  238. private void createStoreSales(String dbName, String loc) throws Exception {
  239. String dbUri = "raw://" + new Path(loc).toUri().toString();
  240. String tableLoc = dbUri + Path.SEPARATOR + "store_sales";
  241. boolean success = runDDL(driver, "create database IF NOT EXISTS " + dbName + " location '" + dbUri + "'");
  242. Assert.assertTrue(success);
  243. success = runDDL(driver, "use " + dbName);
  244. Assert.assertTrue(success);
  245. success = runDDL(driver, "drop table if exists store_sales");
  246. Assert.assertTrue(success);
  247. success = runDDL(driver, "create table store_sales\n" +
  248. "(\n" +
  249. " ss_sold_date_sk int,\n" +
  250. " ss_sold_time_sk int,\n" +
  251. " ss_item_sk int,\n" +
  252. " ss_customer_sk int,\n" +
  253. " ss_cdemo_sk int,\n" +
  254. " ss_hdemo_sk int,\n" +
  255. " ss_addr_sk int,\n" +
  256. " ss_store_sk int,\n" +
  257. " ss_promo_sk int,\n" +
  258. " ss_ticket_number int,\n" +
  259. " ss_quantity int,\n" +
  260. " ss_wholesale_cost decimal(7,2),\n" +
  261. " ss_list_price decimal(7,2),\n" +
  262. " ss_sales_price decimal(7,2),\n" +
  263. " ss_ext_discount_amt decimal(7,2),\n" +
  264. " ss_ext_sales_price decimal(7,2),\n" +
  265. " ss_ext_wholesale_cost decimal(7,2),\n" +
  266. " ss_ext_list_price decimal(7,2),\n" +
  267. " ss_ext_tax decimal(7,2),\n" +
  268. " ss_coupon_amt decimal(7,2),\n" +
  269. " ss_net_paid decimal(7,2),\n" +
  270. " ss_net_paid_inc_tax decimal(7,2),\n" +
  271. " ss_net_profit decimal(7,2)\n" +
  272. ")\n" +
  273. " partitioned by (dt string)\n" +
  274. "clustered by (ss_store_sk, ss_promo_sk)\n" +
  275. "INTO 4 BUCKETS stored as orc " + " location '" + tableLoc + "'" + " TBLPROPERTIES ('orc.compress'='NONE', 'transactional'='true')");
  276. Assert.assertTrue(success);
  277. success = runDDL(driver, "alter table store_sales add partition(dt='2015')");
  278. Assert.assertTrue(success);
  279. }
  280. /**
  281. * make sure it works with table where bucket col is not 1st col
  282. * @throws Exception
  283. */
  284. @Test
  285. public void testBucketingWhereBucketColIsNotFirstCol() throws Exception {
  286. List<String> partitionVals = new ArrayList<String>();
  287. partitionVals.add("2015");
  288. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testing5", "store_sales", partitionVals);
  289. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  290. DelimitedInputWriter writer = new DelimitedInputWriter(new String[] {"ss_sold_date_sk","ss_sold_time_sk", "ss_item_sk",
  291. "ss_customer_sk", "ss_cdemo_sk", "ss_hdemo_sk", "ss_addr_sk", "ss_store_sk", "ss_promo_sk", "ss_ticket_number", "ss_quantity",
  292. "ss_wholesale_cost", "ss_list_price", "ss_sales_price", "ss_ext_discount_amt", "ss_ext_sales_price", "ss_ext_wholesale_cost",
  293. "ss_ext_list_price", "ss_ext_tax", "ss_coupon_amt", "ss_net_paid", "ss_net_paid_inc_tax", "ss_net_profit"},",", endPt, connection);
  294. TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
  295. txnBatch.beginNextTransaction();
  296. StringBuilder row = new StringBuilder();
  297. for(int i = 0; i < 10; i++) {
  298. for(int ints = 0; ints < 11; ints++) {
  299. row.append(ints).append(',');
  300. }
  301. for(int decs = 0; decs < 12; decs++) {
  302. row.append(i + 0.1).append(',');
  303. }
  304. row.setLength(row.length() - 1);
  305. txnBatch.write(row.toString().getBytes());
  306. }
  307. txnBatch.commit();
  308. txnBatch.close();
  309. connection.close();
  310. ArrayList<String> res = queryTable(driver, "select row__id.bucketid, * from testing5.store_sales");
  311. for (String re : res) {
  312. System.out.println(re);
  313. }
  314. }
  315. /**
  316. * Test that streaming can write to unbucketed table.
  317. */
  318. @Test
  319. public void testNoBuckets() throws Exception {
  320. queryTable(driver, "drop table if exists default.streamingnobuckets");
  321. //todo: why does it need transactional_properties?
  322. queryTable(driver, "create table default.streamingnobuckets (a string, b string) stored as orc TBLPROPERTIES('transactional'='true', 'transactional_properties'='default')");
  323. queryTable(driver, "insert into default.streamingnobuckets values('foo','bar')");
  324. List<String> rs = queryTable(driver, "select * from default.streamingNoBuckets");
  325. Assert.assertEquals(1, rs.size());
  326. Assert.assertEquals("foo\tbar", rs.get(0));
  327. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "Default", "StreamingNoBuckets", null);
  328. String[] colNames1 = new String[] { "a", "b" };
  329. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  330. DelimitedInputWriter wr = new DelimitedInputWriter(colNames1,",", endPt, connection);
  331. TransactionBatch txnBatch = connection.fetchTransactionBatch(2, wr);
  332. txnBatch.beginNextTransaction();
  333. txnBatch.write("a1,b2".getBytes());
  334. txnBatch.write("a3,b4".getBytes());
  335. TxnStore txnHandler = TxnUtils.getTxnStore(conf);
  336. ShowLocksResponse resp = txnHandler.showLocks(new ShowLocksRequest());
  337. Assert.assertEquals(resp.getLocksSize(), 1);
  338. Assert.assertEquals("streamingnobuckets", resp.getLocks().get(0).getTablename());
  339. Assert.assertEquals("default", resp.getLocks().get(0).getDbname());
  340. txnBatch.commit();
  341. txnBatch.beginNextTransaction();
  342. txnBatch.write("a5,b6".getBytes());
  343. txnBatch.write("a7,b8".getBytes());
  344. txnBatch.commit();
  345. txnBatch.close();
  346. Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
  347. rs = queryTable(driver,"select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");
  348. Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
  349. Assert.assertTrue(rs.get(0), rs.get(0).endsWith("streamingnobuckets/delta_0000001_0000001_0000/bucket_00000_0"));
  350. Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\ta1\tb2"));
  351. Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
  352. Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
  353. Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
  354. Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
  355. Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
  356. Assert.assertTrue(rs.get(4), rs.get(4).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\ta7\tb8"));
  357. Assert.assertTrue(rs.get(4), rs.get(4).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
  358. queryTable(driver, "update default.streamingnobuckets set a=0, b=0 where a='a7'");
  359. queryTable(driver, "delete from default.streamingnobuckets where a='a1'");
  360. rs = queryTable(driver, "select a, b from default.streamingnobuckets order by a, b");
  361. int row = 0;
  362. Assert.assertEquals("at row=" + row, "0\t0", rs.get(row++));
  363. Assert.assertEquals("at row=" + row, "a3\tb4", rs.get(row++));
  364. Assert.assertEquals("at row=" + row, "a5\tb6", rs.get(row++));
  365. Assert.assertEquals("at row=" + row, "foo\tbar", rs.get(row++));
  366. queryTable(driver, "alter table default.streamingnobuckets compact 'major'");
  367. runWorker(conf);
  368. rs = queryTable(driver,"select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");
  369. Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
  370. Assert.assertTrue(rs.get(0), rs.get(0).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
  371. Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
  372. Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
  373. Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
  374. Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
  375. Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t0\t0"));
  376. Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
  377. }
  378. /**
  379. * this is a clone from TestTxnStatement2....
  380. */
  381. public static void runWorker(HiveConf hiveConf) throws Exception {
  382. AtomicBoolean stop = new AtomicBoolean(true);
  383. Worker t = new Worker();
  384. t.setThreadId((int) t.getId());
  385. t.setConf(hiveConf);
  386. AtomicBoolean looped = new AtomicBoolean();
  387. t.init(stop, looped);
  388. t.run();
  389. }
  390. // stream data into streaming table with N buckets, then copy the data into another bucketed table
  391. // check if bucketing in both was done in the same way
  392. @Test
  393. @Ignore
  394. public void testStreamBucketingMatchesRegularBucketing() throws Exception {
  395. int bucketCount = 100;
  396. String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
  397. String tableLoc = "'" + dbUri + Path.SEPARATOR + "streamedtable" + "'";
  398. String tableLoc2 = "'" + dbUri + Path.SEPARATOR + "finaltable" + "'";
  399. String tableLoc3 = "'" + dbUri + Path.SEPARATOR + "nobucket" + "'";
  400. try (IDriver driver = DriverFactory.newDriver(conf)) {
  401. runDDL(driver, "create database testBucketing3");
  402. runDDL(driver, "use testBucketing3");
  403. runDDL(driver, "create table streamedtable ( key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
  404. + bucketCount + " buckets stored as orc location " + tableLoc + " TBLPROPERTIES ('transactional'='true')");
  405. // In 'nobucket' table we capture bucketid from streamedtable to workaround a hive bug that prevents joins two identically bucketed tables
  406. runDDL(driver, "create table nobucket ( bucketid int, key1 string,key2 int,data string ) location " + tableLoc3);
  407. runDDL(driver,
  408. "create table finaltable ( bucketid int, key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
  409. + bucketCount + " buckets stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='true')");
  410. String[] records = new String[]{
  411. "PSFAHYLZVC,29,EPNMA",
  412. "PPPRKWAYAU,96,VUTEE",
  413. "MIAOFERCHI,3,WBDSI",
  414. "CEGQAZOWVN,0,WCUZL",
  415. "XWAKMNSVQF,28,YJVHU",
  416. "XBWTSAJWME,2,KDQFO",
  417. "FUVLQTAXAY,5,LDSDG",
  418. "QTQMDJMGJH,6,QBOMA",
  419. "EFLOTLWJWN,71,GHWPS",
  420. "PEQNAOJHCM,82,CAAFI",
  421. "MOEKQLGZCP,41,RUACR",
  422. "QZXMCOPTID,37,LFLWE",
  423. "EYALVWICRD,13,JEZLC",
  424. "VYWLZAYTXX,16,DMVZX",
  425. "OSALYSQIXR,47,HNZVE",
  426. "JGKVHKCEGQ,25,KSCJB",
  427. "WQFMMYDHET,12,DTRWA",
  428. "AJOVAYZKZQ,15,YBKFO",
  429. "YAQONWCUAU,31,QJNHZ",
  430. "DJBXUEUOEB,35,IYCBL"
  431. };
  432. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "streamedtable", null);
  433. String[] colNames1 = new String[]{"key1", "key2", "data"};
  434. DelimitedInputWriter wr = new DelimitedInputWriter(colNames1, ",", endPt);
  435. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  436. TransactionBatch txnBatch = connection.fetchTransactionBatch(2, wr);
  437. txnBatch.beginNextTransaction();
  438. for (String record : records) {
  439. txnBatch.write(record.toString().getBytes());
  440. }
  441. txnBatch.commit();
  442. txnBatch.close();
  443. connection.close();
  444. ArrayList<String> res1 = queryTable(driver, "select row__id.bucketid, * from streamedtable order by key2");
  445. for (String re : res1) {
  446. System.out.println(re);
  447. }
  448. driver.run("insert into nobucket select row__id.bucketid,* from streamedtable");
  449. runDDL(driver, " insert into finaltable select * from nobucket");
  450. ArrayList<String> res2 = queryTable(driver,
  451. "select row__id.bucketid,* from finaltable where row__id.bucketid<>bucketid");
  452. for (String s : res2) {
  453. LOG.error(s);
  454. }
  455. Assert.assertTrue(res2.isEmpty());
  456. } finally {
  457. conf.unset(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname);
  458. }
  459. }
  460. @Test
  461. public void testTableValidation() throws Exception {
  462. int bucketCount = 100;
  463. String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
  464. String tbl1 = "validation1";
  465. String tbl2 = "validation2";
  466. String tableLoc = "'" + dbUri + Path.SEPARATOR + tbl1 + "'";
  467. String tableLoc2 = "'" + dbUri + Path.SEPARATOR + tbl2 + "'";
  468. runDDL(driver, "create database testBucketing3");
  469. runDDL(driver, "use testBucketing3");
  470. runDDL(driver, "create table " + tbl1 + " ( key1 string, data string ) clustered by ( key1 ) into "
  471. + bucketCount + " buckets stored as orc location " + tableLoc + " TBLPROPERTIES ('transactional'='false')") ;
  472. runDDL(driver, "create table " + tbl2 + " ( key1 string, data string ) clustered by ( key1 ) into "
  473. + bucketCount + " buckets stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='false')") ;
  474. try {
  475. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation1", null);
  476. endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  477. Assert.assertTrue("InvalidTable exception was not thrown", false);
  478. } catch (InvalidTable e) {
  479. // expecting this exception
  480. }
  481. try {
  482. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation2", null);
  483. endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  484. Assert.assertTrue("InvalidTable exception was not thrown", false);
  485. } catch (InvalidTable e) {
  486. // expecting this exception
  487. }
  488. }
  489. /**
  490. * @deprecated use {@link #checkDataWritten2(Path, long, long, int, String, boolean, String...)} -
  491. * there is little value in using InputFormat directly
  492. */
  493. @Deprecated
  494. private void checkDataWritten(Path partitionPath, long minTxn, long maxTxn, int buckets, int numExpectedFiles,
  495. String... records) throws Exception {
  496. ValidWriteIdList writeIds = getTransactionContext(conf);
  497. AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, writeIds, null, false, null, false);
  498. Assert.assertEquals(0, dir.getObsolete().size());
  499. Assert.assertEquals(0, dir.getOriginalFiles().size());
  500. List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
  501. System.out.println("Files found: ");
  502. for (AcidUtils.ParsedDelta pd : current) {
  503. System.out.println(pd.getPath().toString());
  504. }
  505. Assert.assertEquals(numExpectedFiles, current.size());
  506. // find the absolute minimum transaction
  507. long min = Long.MAX_VALUE;
  508. long max = Long.MIN_VALUE;
  509. for (AcidUtils.ParsedDelta pd : current) {
  510. if (pd.getMaxWriteId() > max) {
  511. max = pd.getMaxWriteId();
  512. }
  513. if (pd.getMinWriteId() < min) {
  514. min = pd.getMinWriteId();
  515. }
  516. }
  517. Assert.assertEquals(minTxn, min);
  518. Assert.assertEquals(maxTxn, max);
  519. InputFormat inf = new OrcInputFormat();
  520. JobConf job = new JobConf();
  521. job.set("mapred.input.dir", partitionPath.toString());
  522. job.set(BUCKET_COUNT, Integer.toString(buckets));
  523. job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
  524. job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
  525. AcidUtils.setAcidOperationalProperties(job, true, null);
  526. job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
  527. job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());
  528. job.set(ValidTxnList.VALID_TXNS_KEY, conf.get(ValidTxnList.VALID_TXNS_KEY));
  529. InputSplit[] splits = inf.getSplits(job, buckets);
  530. Assert.assertEquals(numExpectedFiles, splits.length);
  531. org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr =
  532. inf.getRecordReader(splits[0], job, Reporter.NULL);
  533. NullWritable key = rr.createKey();
  534. OrcStruct value = rr.createValue();
  535. for (String record : records) {
  536. Assert.assertEquals(true, rr.next(key, value));
  537. Assert.assertEquals(record, value.toString());
  538. }
  539. Assert.assertEquals(false, rr.next(key, value));
  540. }
  541. /**
  542. * @param validationQuery query to read from table to compare data against {@code records}
  543. * @param records expected data. each row is CVS list of values
  544. */
  545. private void checkDataWritten2(Path partitionPath, long minTxn, long maxTxn, int numExpectedFiles,
  546. String validationQuery, boolean vectorize, String... records) throws Exception {
  547. AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, getTransactionContext(conf), null,
  548. false, null, false);
  549. Assert.assertEquals(0, dir.getObsolete().size());
  550. Assert.assertEquals(0, dir.getOriginalFiles().size());
  551. List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
  552. System.out.println("Files found: ");
  553. for (AcidUtils.ParsedDelta pd : current) {
  554. System.out.println(pd.getPath().toString());
  555. }
  556. Assert.assertEquals(numExpectedFiles, current.size());
  557. // find the absolute minimum transaction
  558. long min = Long.MAX_VALUE;
  559. long max = Long.MIN_VALUE;
  560. for (AcidUtils.ParsedDelta pd : current) {
  561. if (pd.getMaxWriteId() > max) {
  562. max = pd.getMaxWriteId();
  563. }
  564. if (pd.getMinWriteId() < min) {
  565. min = pd.getMinWriteId();
  566. }
  567. }
  568. Assert.assertEquals(minTxn, min);
  569. Assert.assertEquals(maxTxn, max);
  570. boolean isVectorizationEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
  571. if(vectorize) {
  572. conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
  573. }
  574. String currStrategy = conf.getVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY);
  575. for(String strategy : ((Validator.StringSet)HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.getValidator()).getExpected()) {
  576. //run it with each split strategy - make sure there are differences
  577. conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, strategy.toUpperCase());
  578. List<String> actualResult = queryTable(driver, validationQuery);
  579. for (int i = 0; i < actualResult.size(); i++) {
  580. Assert.assertEquals("diff at [" + i + "]. actual=" + actualResult + " expected=" +
  581. Arrays.toString(records), records[i], actualResult.get(i));
  582. }
  583. }
  584. conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, currStrategy);
  585. conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, isVectorizationEnabled);
  586. }
  587. private ValidWriteIdList getTransactionContext(Configuration conf) throws Exception {
  588. ValidTxnList validTxnList = msClient.getValidTxns();
  589. conf.set(ValidTxnList.VALID_TXNS_KEY, validTxnList.writeToString());
  590. List<TableValidWriteIds> v = msClient.getValidWriteIds(Collections
  591. .singletonList(TableName.getDbTable(dbName, tblName)), validTxnList.writeToString());
  592. return TxnCommonUtils.createValidReaderWriteIdList(v.get(0));
  593. }
  594. private void checkNothingWritten(Path partitionPath) throws Exception {
  595. AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, getTransactionContext(conf), null,
  596. false, null, false);
  597. Assert.assertEquals(0, dir.getObsolete().size());
  598. Assert.assertEquals(0, dir.getOriginalFiles().size());
  599. List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
  600. Assert.assertEquals(0, current.size());
  601. }
  602. @Test
  603. public void testEndpointConnection() throws Exception {
  604. // For partitioned table, partitionVals are specified
  605. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals);
  606. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); //shouldn't throw
  607. connection.close();
  608. // For unpartitioned table, partitionVals are not specified
  609. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  610. endPt.newConnection(false, "UT_" + Thread.currentThread().getName()).close(); // should not throw
  611. // For partitioned table, partitionVals are not specified
  612. try {
  613. endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, null);
  614. connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
  615. Assert.assertTrue("ConnectionError was not thrown", false);
  616. connection.close();
  617. } catch (ConnectionError e) {
  618. // expecting this exception
  619. String errMsg = "doesn't specify any partitions for partitioned table";
  620. Assert.assertTrue(e.toString().endsWith(errMsg));
  621. }
  622. // For unpartitioned table, partition values are specified
  623. try {
  624. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, partitionVals);
  625. connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  626. Assert.assertTrue("ConnectionError was not thrown", false);
  627. connection.close();
  628. } catch (ConnectionError e) {
  629. // expecting this exception
  630. String errMsg = "specifies partitions for unpartitioned table";
  631. Assert.assertTrue(e.toString().endsWith(errMsg));
  632. }
  633. }
  634. @Test
  635. public void testAddPartition() throws Exception {
  636. List<String> newPartVals = new ArrayList<String>(2);
  637. newPartVals.add(PART1_CONTINENT);
  638. newPartVals.add("Nepal");
  639. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName
  640. , newPartVals);
  641. // Ensure partition is absent
  642. try {
  643. msClient.getPartition(endPt.database, endPt.table, endPt.partitionVals);
  644. Assert.assertTrue("Partition already exists", false);
  645. } catch (NoSuchObjectException e) {
  646. // expect this exception
  647. }
  648. // Create partition
  649. Assert.assertNotNull(endPt.newConnection(true, "UT_" + Thread.currentThread().getName()));
  650. // Ensure partition is present
  651. Partition p = msClient.getPartition(endPt.database, endPt.table, endPt.partitionVals);
  652. Assert.assertNotNull("Did not find added partition", p);
  653. }
  654. @Test
  655. public void testTransactionBatchEmptyCommit() throws Exception {
  656. // 1) to partitioned table
  657. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  658. partitionVals);
  659. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  660. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
  661. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  662. txnBatch.beginNextTransaction();
  663. txnBatch.commit();
  664. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  665. , txnBatch.getCurrentTransactionState());
  666. txnBatch.close();
  667. connection.close();
  668. // 2) To unpartitioned table
  669. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  670. writer = new DelimitedInputWriter(fieldNames2,",", endPt);
  671. connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  672. txnBatch = connection.fetchTransactionBatch(10, writer);
  673. txnBatch.beginNextTransaction();
  674. txnBatch.commit();
  675. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  676. , txnBatch.getCurrentTransactionState());
  677. txnBatch.close();
  678. connection.close();
  679. }
  680. /**
  681. * check that transactions that have not heartbeated and timedout get properly aborted
  682. * @throws Exception
  683. */
  684. @Test
  685. public void testTimeOutReaper() throws Exception {
  686. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  687. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2,",", endPt);
  688. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  689. TransactionBatch txnBatch = connection.fetchTransactionBatch(5, writer);
  690. txnBatch.beginNextTransaction();
  691. conf.setTimeVar(HiveConf.ConfVars.HIVE_TIMEDOUT_TXN_REAPER_START, 0, TimeUnit.SECONDS);
  692. //ensure txn timesout
  693. conf.setTimeVar(HiveConf.ConfVars.HIVE_TXN_TIMEOUT, 1, TimeUnit.MILLISECONDS);
  694. AcidHouseKeeperService houseKeeperService = new AcidHouseKeeperService();
  695. houseKeeperService.setConf(conf);
  696. houseKeeperService.run();
  697. try {
  698. //should fail because the TransactionBatch timed out
  699. txnBatch.commit();
  700. }
  701. catch(TransactionError e) {
  702. Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
  703. }
  704. txnBatch.close();
  705. txnBatch = connection.fetchTransactionBatch(10, writer);
  706. txnBatch.beginNextTransaction();
  707. txnBatch.commit();
  708. txnBatch.beginNextTransaction();
  709. houseKeeperService.run();
  710. try {
  711. //should fail because the TransactionBatch timed out
  712. txnBatch.commit();
  713. }
  714. catch(TransactionError e) {
  715. Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
  716. }
  717. txnBatch.close();
  718. connection.close();
  719. }
  720. @Test
  721. public void testHeartbeat() throws Exception {
  722. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  723. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  724. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2,",", endPt, connection);
  725. TransactionBatch txnBatch = connection.fetchTransactionBatch(5, writer);
  726. txnBatch.beginNextTransaction();
  727. //todo: this should ideally check Transaction heartbeat as well, but heartbeat
  728. //timestamp is not reported yet
  729. //GetOpenTxnsInfoResponse txnresp = msClient.showTxns();
  730. ShowLocksRequest request = new ShowLocksRequest();
  731. request.setDbname(dbName2);
  732. request.setTablename(tblName2);
  733. ShowLocksResponse response = msClient.showLocks(request);
  734. Assert.assertEquals("Wrong nubmer of locks: " + response, 1, response.getLocks().size());
  735. ShowLocksResponseElement lock = response.getLocks().get(0);
  736. long acquiredAt = lock.getAcquiredat();
  737. long heartbeatAt = lock.getLastheartbeat();
  738. txnBatch.heartbeat();
  739. response = msClient.showLocks(request);
  740. Assert.assertEquals("Wrong number of locks2: " + response, 1, response.getLocks().size());
  741. lock = response.getLocks().get(0);
  742. Assert.assertEquals("Acquired timestamp didn't match", acquiredAt, lock.getAcquiredat());
  743. Assert.assertTrue("Expected new heartbeat (" + lock.getLastheartbeat() +
  744. ") == old heartbeat(" + heartbeatAt +")", lock.getLastheartbeat() == heartbeatAt);
  745. txnBatch.close();
  746. int txnBatchSize = 200;
  747. txnBatch = connection.fetchTransactionBatch(txnBatchSize, writer);
  748. for(int i = 0; i < txnBatchSize; i++) {
  749. txnBatch.beginNextTransaction();
  750. if(i % 47 == 0) {
  751. txnBatch.heartbeat();
  752. }
  753. if(i % 10 == 0) {
  754. txnBatch.abort();
  755. }
  756. else {
  757. txnBatch.commit();
  758. }
  759. if(i % 37 == 0) {
  760. txnBatch.heartbeat();
  761. }
  762. }
  763. }
  764. @Test
  765. public void testTransactionBatchEmptyAbort() throws Exception {
  766. // 1) to partitioned table
  767. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  768. partitionVals);
  769. StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
  770. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
  771. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  772. txnBatch.beginNextTransaction();
  773. txnBatch.abort();
  774. Assert.assertEquals(TransactionBatch.TxnState.ABORTED
  775. , txnBatch.getCurrentTransactionState());
  776. txnBatch.close();
  777. connection.close();
  778. // 2) to unpartitioned table
  779. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  780. writer = new DelimitedInputWriter(fieldNames,",", endPt);
  781. connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
  782. txnBatch = connection.fetchTransactionBatch(10, writer);
  783. txnBatch.beginNextTransaction();
  784. txnBatch.abort();
  785. Assert.assertEquals(TransactionBatch.TxnState.ABORTED
  786. , txnBatch.getCurrentTransactionState());
  787. txnBatch.close();
  788. connection.close();
  789. }
  790. @Test
  791. public void testTransactionBatchCommit_Delimited() throws Exception {
  792. testTransactionBatchCommit_Delimited(null);
  793. }
  794. @Test
  795. public void testTransactionBatchCommit_DelimitedUGI() throws Exception {
  796. testTransactionBatchCommit_Delimited(Utils.getUGI());
  797. }
  798. private void testTransactionBatchCommit_Delimited(UserGroupInformation ugi) throws Exception {
  799. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  800. partitionVals);
  801. StreamingConnection connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
  802. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, conf, connection);
  803. // 1st Txn
  804. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  805. txnBatch.beginNextTransaction();
  806. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  807. , txnBatch.getCurrentTransactionState());
  808. txnBatch.write("1,Hello streaming".getBytes());
  809. txnBatch.commit();
  810. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
  811. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  812. , txnBatch.getCurrentTransactionState());
  813. // 2nd Txn
  814. txnBatch.beginNextTransaction();
  815. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  816. , txnBatch.getCurrentTransactionState());
  817. txnBatch.write("2,Welcome to streaming".getBytes());
  818. // data should not be visible
  819. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
  820. txnBatch.commit();
  821. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
  822. "{2, Welcome to streaming}");
  823. txnBatch.close();
  824. Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
  825. , txnBatch.getCurrentTransactionState());
  826. connection.close();
  827. // To Unpartitioned table
  828. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  829. connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
  830. writer = new DelimitedInputWriter(fieldNames,",", endPt, conf, connection);
  831. // 1st Txn
  832. txnBatch = connection.fetchTransactionBatch(10, writer);
  833. txnBatch.beginNextTransaction();
  834. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  835. , txnBatch.getCurrentTransactionState());
  836. txnBatch.write("1,Hello streaming".getBytes());
  837. txnBatch.commit();
  838. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  839. , txnBatch.getCurrentTransactionState());
  840. connection.close();
  841. }
  842. @Test
  843. public void testTransactionBatchCommit_Regex() throws Exception {
  844. testTransactionBatchCommit_Regex(null);
  845. }
  846. @Test
  847. public void testTransactionBatchCommit_RegexUGI() throws Exception {
  848. testTransactionBatchCommit_Regex(Utils.getUGI());
  849. }
  850. private void testTransactionBatchCommit_Regex(UserGroupInformation ugi) throws Exception {
  851. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  852. partitionVals);
  853. StreamingConnection connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
  854. String regex = "([^,]*),(.*)";
  855. StrictRegexWriter writer = new StrictRegexWriter(regex, endPt, conf, connection);
  856. // 1st Txn
  857. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  858. txnBatch.beginNextTransaction();
  859. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  860. , txnBatch.getCurrentTransactionState());
  861. txnBatch.write("1,Hello streaming".getBytes());
  862. txnBatch.commit();
  863. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
  864. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  865. , txnBatch.getCurrentTransactionState());
  866. // 2nd Txn
  867. txnBatch.beginNextTransaction();
  868. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  869. , txnBatch.getCurrentTransactionState());
  870. txnBatch.write("2,Welcome to streaming".getBytes());
  871. // data should not be visible
  872. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
  873. txnBatch.commit();
  874. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
  875. "{2, Welcome to streaming}");
  876. txnBatch.close();
  877. Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
  878. , txnBatch.getCurrentTransactionState());
  879. connection.close();
  880. // To Unpartitioned table
  881. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  882. connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
  883. regex = "([^:]*):(.*)";
  884. writer = new StrictRegexWriter(regex, endPt, conf, connection);
  885. // 1st Txn
  886. txnBatch = connection.fetchTransactionBatch(10, writer);
  887. txnBatch.beginNextTransaction();
  888. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  889. , txnBatch.getCurrentTransactionState());
  890. txnBatch.write("1:Hello streaming".getBytes());
  891. txnBatch.commit();
  892. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  893. , txnBatch.getCurrentTransactionState());
  894. connection.close();
  895. }
  896. @Test
  897. public void testTransactionBatchCommit_Json() throws Exception {
  898. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  899. partitionVals);
  900. StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
  901. StrictJsonWriter writer = new StrictJsonWriter(endPt, connection);
  902. // 1st Txn
  903. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  904. txnBatch.beginNextTransaction();
  905. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  906. , txnBatch.getCurrentTransactionState());
  907. String rec1 = "{\"id\" : 1, \"msg\": \"Hello streaming\"}";
  908. txnBatch.write(rec1.getBytes());
  909. txnBatch.commit();
  910. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
  911. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  912. , txnBatch.getCurrentTransactionState());
  913. txnBatch.close();
  914. Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
  915. , txnBatch.getCurrentTransactionState());
  916. connection.close();
  917. List<String> rs = queryTable(driver, "select * from " + dbName + "." + tblName);
  918. Assert.assertEquals(1, rs.size());
  919. }
  920. @Test
  921. public void testRemainingTransactions() throws Exception {
  922. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  923. partitionVals);
  924. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt);
  925. StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
  926. // 1) test with txn.Commit()
  927. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  928. int batch=0;
  929. int initialCount = txnBatch.remainingTransactions();
  930. while (txnBatch.remainingTransactions()>0) {
  931. txnBatch.beginNextTransaction();
  932. Assert.assertEquals(--initialCount, txnBatch.remainingTransactions());
  933. for (int rec=0; rec<2; ++rec) {
  934. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  935. , txnBatch.getCurrentTransactionState());
  936. txnBatch.write((batch * rec + ",Hello streaming").getBytes());
  937. }
  938. txnBatch.commit();
  939. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  940. , txnBatch.getCurrentTransactionState());
  941. ++batch;
  942. }
  943. Assert.assertEquals(0, txnBatch.remainingTransactions());
  944. txnBatch.close();
  945. Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
  946. , txnBatch.getCurrentTransactionState());
  947. // 2) test with txn.Abort()
  948. txnBatch = connection.fetchTransactionBatch(10, writer);
  949. batch=0;
  950. initialCount = txnBatch.remainingTransactions();
  951. while (txnBatch.remainingTransactions()>0) {
  952. txnBatch.beginNextTransaction();
  953. Assert.assertEquals(--initialCount,txnBatch.remainingTransactions());
  954. for (int rec=0; rec<2; ++rec) {
  955. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  956. , txnBatch.getCurrentTransactionState());
  957. txnBatch.write((batch * rec + ",Hello streaming").getBytes());
  958. }
  959. txnBatch.abort();
  960. Assert.assertEquals(TransactionBatch.TxnState.ABORTED
  961. , txnBatch.getCurrentTransactionState());
  962. ++batch;
  963. }
  964. Assert.assertEquals(0, txnBatch.remainingTransactions());
  965. txnBatch.close();
  966. Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
  967. , txnBatch.getCurrentTransactionState());
  968. connection.close();
  969. }
  970. @Test
  971. public void testTransactionBatchAbort() throws Exception {
  972. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  973. partitionVals);
  974. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  975. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
  976. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  977. txnBatch.beginNextTransaction();
  978. txnBatch.write("1,Hello streaming".getBytes());
  979. txnBatch.write("2,Welcome to streaming".getBytes());
  980. txnBatch.abort();
  981. checkNothingWritten(partLoc);
  982. Assert.assertEquals(TransactionBatch.TxnState.ABORTED
  983. , txnBatch.getCurrentTransactionState());
  984. txnBatch.close();
  985. connection.close();
  986. checkNothingWritten(partLoc);
  987. }
  988. @Test
  989. public void testTransactionBatchAbortAndCommit() throws Exception {
  990. String agentInfo = "UT_" + Thread.currentThread().getName();
  991. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  992. partitionVals);
  993. StreamingConnection connection = endPt.newConnection(false, agentInfo);
  994. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
  995. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  996. txnBatch.beginNextTransaction();
  997. txnBatch.write("1,Hello streaming".getBytes());
  998. txnBatch.write("2,Welcome to streaming".getBytes());
  999. ShowLocksResponse resp = msClient.showLocks(new ShowLocksRequest());
  1000. Assert.assertEquals("LockCount", 1, resp.getLocksSize());
  1001. Assert.assertEquals("LockType", LockType.SHARED_READ, resp.getLocks().get(0).getType());
  1002. Assert.assertEquals("LockState", LockState.ACQUIRED, resp.getLocks().get(0).getState());
  1003. Assert.assertEquals("AgentInfo", agentInfo, resp.getLocks().get(0).getAgentInfo());
  1004. txnBatch.abort();
  1005. checkNothingWritten(partLoc);
  1006. Assert.assertEquals(TransactionBatch.TxnState.ABORTED
  1007. , txnBatch.getCurrentTransactionState());
  1008. txnBatch.beginNextTransaction();
  1009. txnBatch.write("1,Hello streaming".getBytes());
  1010. txnBatch.write("2,Welcome to streaming".getBytes());
  1011. txnBatch.commit();
  1012. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
  1013. "{2, Welcome to streaming}");
  1014. txnBatch.close();
  1015. connection.close();
  1016. }
  1017. @Test
  1018. public void testMultipleTransactionBatchCommits() throws Exception {
  1019. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  1020. partitionVals);
  1021. DelimitedInputWriter writer = new Delimit

Large files files are truncated, but you can click here to view the full file