PageRenderTime 64ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java

http://github.com/apache/hive
Java | 2373 lines | 1922 code | 302 blank | 149 comment | 141 complexity | 0b89a080b86042fa3282259deeab8106 MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hive.hcatalog.streaming;
  19. import java.io.ByteArrayOutputStream;
  20. import java.io.File;
  21. import java.io.FileFilter;
  22. import java.io.FileNotFoundException;
  23. import java.io.IOException;
  24. import java.io.PrintStream;
  25. import java.net.URI;
  26. import java.net.URISyntaxException;
  27. import java.nio.ByteBuffer;
  28. import java.util.ArrayList;
  29. import java.util.Arrays;
  30. import java.util.Collection;
  31. import java.util.Collections;
  32. import java.util.HashMap;
  33. import java.util.List;
  34. import java.util.Map;
  35. import java.util.concurrent.TimeUnit;
  36. import java.util.concurrent.atomic.AtomicBoolean;
  37. import org.apache.hadoop.conf.Configuration;
  38. import org.apache.hadoop.fs.FSDataInputStream;
  39. import org.apache.hadoop.fs.FSDataOutputStream;
  40. import org.apache.hadoop.fs.FileStatus;
  41. import org.apache.hadoop.fs.FileSystem;
  42. import org.apache.hadoop.fs.Path;
  43. import org.apache.hadoop.fs.RawLocalFileSystem;
  44. import org.apache.hadoop.fs.permission.FsPermission;
  45. import org.apache.hadoop.hive.cli.CliSessionState;
  46. import org.apache.hadoop.hive.common.JavaUtils;
  47. import org.apache.hadoop.hive.common.TableName;
  48. import org.apache.hadoop.hive.common.ValidTxnList;
  49. import org.apache.hadoop.hive.common.ValidWriteIdList;
  50. import org.apache.hadoop.hive.conf.HiveConf;
  51. import org.apache.hadoop.hive.conf.Validator;
  52. import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
  53. import org.apache.hadoop.hive.metastore.IMetaStoreClient;
  54. import org.apache.hadoop.hive.metastore.api.FieldSchema;
  55. import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
  56. import org.apache.hadoop.hive.metastore.api.LockState;
  57. import org.apache.hadoop.hive.metastore.api.LockType;
  58. import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
  59. import org.apache.hadoop.hive.metastore.api.Partition;
  60. import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
  61. import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
  62. import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
  63. import org.apache.hadoop.hive.metastore.api.TableValidWriteIds;
  64. import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
  65. import org.apache.hadoop.hive.metastore.api.TxnInfo;
  66. import org.apache.hadoop.hive.metastore.api.TxnState;
  67. import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
  68. import org.apache.hadoop.hive.metastore.txn.AcidHouseKeeperService;
  69. import org.apache.hadoop.hive.metastore.txn.TxnCommonUtils;
  70. import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
  71. import org.apache.hadoop.hive.metastore.txn.TxnStore;
  72. import org.apache.hadoop.hive.metastore.txn.TxnUtils;
  73. import org.apache.hadoop.hive.ql.DriverFactory;
  74. import org.apache.hadoop.hive.ql.IDriver;
  75. import org.apache.hadoop.hive.ql.io.AcidUtils;
  76. import org.apache.hadoop.hive.ql.io.BucketCodec;
  77. import org.apache.hadoop.hive.ql.io.IOConstants;
  78. import org.apache.hadoop.hive.ql.io.orc.OrcFile;
  79. import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
  80. import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
  81. import org.apache.hadoop.hive.ql.io.orc.Reader;
  82. import org.apache.hadoop.hive.ql.io.orc.RecordReader;
  83. import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
  84. import org.apache.hadoop.hive.ql.session.SessionState;
  85. import org.apache.hadoop.hive.ql.txn.compactor.Worker;
  86. import org.apache.hadoop.hive.serde.serdeConstants;
  87. import org.apache.hadoop.hive.serde2.objectinspector.StructField;
  88. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
  89. import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
  90. import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
  91. import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
  92. import org.apache.hadoop.hive.shims.Utils;
  93. import org.apache.hadoop.io.NullWritable;
  94. import org.apache.hadoop.mapred.InputFormat;
  95. import org.apache.hadoop.mapred.InputSplit;
  96. import org.apache.hadoop.mapred.JobConf;
  97. import org.apache.hadoop.mapred.Reporter;
  98. import org.apache.hadoop.security.UserGroupInformation;
  99. import org.apache.orc.impl.OrcAcidUtils;
  100. import org.apache.orc.tools.FileDump;
  101. import org.apache.thrift.TException;
  102. import org.junit.After;
  103. import org.junit.Assert;
  104. import org.junit.Before;
  105. import org.junit.Ignore;
  106. import org.junit.Rule;
  107. import org.junit.Test;
  108. import org.junit.rules.TemporaryFolder;
  109. import org.slf4j.Logger;
  110. import org.slf4j.LoggerFactory;
  111. import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_COUNT;
  112. public class TestStreaming {
  113. private static final Logger LOG = LoggerFactory.getLogger(TestStreaming.class);
  114. public static class RawFileSystem extends RawLocalFileSystem {
  115. private static final URI NAME;
  116. static {
  117. try {
  118. NAME = new URI("raw:///");
  119. } catch (URISyntaxException se) {
  120. throw new IllegalArgumentException("bad uri", se);
  121. }
  122. }
  123. @Override
  124. public URI getUri() {
  125. return NAME;
  126. }
  127. @Override
  128. public String getScheme() {
  129. return "raw";
  130. }
  131. @Override
  132. public FileStatus getFileStatus(Path path) throws IOException {
  133. File file = pathToFile(path);
  134. if (!file.exists()) {
  135. throw new FileNotFoundException("Can't find " + path);
  136. }
  137. // get close enough
  138. short mod = 0;
  139. if (file.canRead()) {
  140. mod |= 0444;
  141. }
  142. if (file.canWrite()) {
  143. mod |= 0200;
  144. }
  145. if (file.canExecute()) {
  146. mod |= 0111;
  147. }
  148. return new FileStatus(file.length(), file.isDirectory(), 1, 1024,
  149. file.lastModified(), file.lastModified(),
  150. FsPermission.createImmutable(mod), "owen", "users", path);
  151. }
  152. }
  153. private static final String COL1 = "id";
  154. private static final String COL2 = "msg";
  155. private final HiveConf conf;
  156. private IDriver driver;
  157. private final IMetaStoreClient msClient;
  158. final String metaStoreURI = null;
  159. // partitioned table
  160. private final static String dbName = "testing";
  161. private final static String tblName = "alerts";
  162. private final static String[] fieldNames = new String[]{COL1,COL2};
  163. List<String> partitionVals;
  164. private static Path partLoc;
  165. private static Path partLoc2;
  166. // unpartitioned table
  167. private final static String dbName2 = "testing2";
  168. private final static String tblName2 = "alerts";
  169. private final static String[] fieldNames2 = new String[]{COL1,COL2};
  170. // for bucket join testing
  171. private final static String dbName3 = "testing3";
  172. private final static String tblName3 = "dimensionTable";
  173. private final static String dbName4 = "testing4";
  174. private final static String tblName4 = "factTable";
  175. List<String> partitionVals2;
  176. private final String PART1_CONTINENT = "Asia";
  177. private final String PART1_COUNTRY = "India";
  178. @Rule
  179. public TemporaryFolder dbFolder = new TemporaryFolder();
  180. public TestStreaming() throws Exception {
  181. partitionVals = new ArrayList<String>(2);
  182. partitionVals.add(PART1_CONTINENT);
  183. partitionVals.add(PART1_COUNTRY);
  184. partitionVals2 = new ArrayList<String>(1);
  185. partitionVals2.add(PART1_COUNTRY);
  186. conf = new HiveConf(this.getClass());
  187. conf.set("fs.raw.impl", RawFileSystem.class.getName());
  188. conf
  189. .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
  190. "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
  191. TxnDbUtil.setConfValues(conf);
  192. if (metaStoreURI!=null) {
  193. conf.setVar(HiveConf.ConfVars.METASTOREURIS, metaStoreURI);
  194. }
  195. conf.setBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI, true);
  196. conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, true);
  197. dbFolder.create();
  198. //1) Start from a clean slate (metastore)
  199. TxnDbUtil.cleanDb(conf);
  200. TxnDbUtil.prepDb(conf);
  201. //2) obtain metastore clients
  202. msClient = new HiveMetaStoreClient(conf);
  203. }
  204. @Before
  205. public void setup() throws Exception {
  206. SessionState.start(new CliSessionState(conf));
  207. driver = DriverFactory.newDriver(conf);
  208. driver.setMaxRows(200002);//make sure Driver returns all results
  209. // drop and recreate the necessary databases and tables
  210. dropDB(msClient, dbName);
  211. String[] colNames = new String[] {COL1, COL2};
  212. String[] colTypes = new String[] {serdeConstants.INT_TYPE_NAME, serdeConstants.STRING_TYPE_NAME};
  213. String[] bucketCols = new String[] {COL1};
  214. String loc1 = dbFolder.newFolder(dbName + ".db").toString();
  215. String[] partNames = new String[]{"Continent", "Country"};
  216. partLoc = createDbAndTable(driver, dbName, tblName, partitionVals, colNames, colTypes, bucketCols, partNames, loc1, 1);
  217. dropDB(msClient, dbName2);
  218. String loc2 = dbFolder.newFolder(dbName2 + ".db").toString();
  219. partLoc2 = createDbAndTable(driver, dbName2, tblName2, null, colNames, colTypes, bucketCols, null, loc2, 2);
  220. String loc3 = dbFolder.newFolder("testing5.db").toString();
  221. createStoreSales("testing5", loc3);
  222. runDDL(driver, "drop table testBucketing3.streamedtable");
  223. runDDL(driver, "drop table testBucketing3.finaltable");
  224. runDDL(driver, "drop table testBucketing3.nobucket");
  225. }
  226. @After
  227. public void cleanup() throws Exception {
  228. msClient.close();
  229. driver.close();
  230. }
  231. private static List<FieldSchema> getPartitionKeys() {
  232. List<FieldSchema> fields = new ArrayList<FieldSchema>();
  233. // Defining partition names in unsorted order
  234. fields.add(new FieldSchema("continent", serdeConstants.STRING_TYPE_NAME, ""));
  235. fields.add(new FieldSchema("country", serdeConstants.STRING_TYPE_NAME, ""));
  236. return fields;
  237. }
  238. private void createStoreSales(String dbName, String loc) throws Exception {
  239. String dbUri = "raw://" + new Path(loc).toUri().toString();
  240. String tableLoc = dbUri + Path.SEPARATOR + "store_sales";
  241. boolean success = runDDL(driver, "create database IF NOT EXISTS " + dbName + " location '" + dbUri + "'");
  242. Assert.assertTrue(success);
  243. success = runDDL(driver, "use " + dbName);
  244. Assert.assertTrue(success);
  245. success = runDDL(driver, "drop table if exists store_sales");
  246. Assert.assertTrue(success);
  247. success = runDDL(driver, "create table store_sales\n" +
  248. "(\n" +
  249. " ss_sold_date_sk int,\n" +
  250. " ss_sold_time_sk int,\n" +
  251. " ss_item_sk int,\n" +
  252. " ss_customer_sk int,\n" +
  253. " ss_cdemo_sk int,\n" +
  254. " ss_hdemo_sk int,\n" +
  255. " ss_addr_sk int,\n" +
  256. " ss_store_sk int,\n" +
  257. " ss_promo_sk int,\n" +
  258. " ss_ticket_number int,\n" +
  259. " ss_quantity int,\n" +
  260. " ss_wholesale_cost decimal(7,2),\n" +
  261. " ss_list_price decimal(7,2),\n" +
  262. " ss_sales_price decimal(7,2),\n" +
  263. " ss_ext_discount_amt decimal(7,2),\n" +
  264. " ss_ext_sales_price decimal(7,2),\n" +
  265. " ss_ext_wholesale_cost decimal(7,2),\n" +
  266. " ss_ext_list_price decimal(7,2),\n" +
  267. " ss_ext_tax decimal(7,2),\n" +
  268. " ss_coupon_amt decimal(7,2),\n" +
  269. " ss_net_paid decimal(7,2),\n" +
  270. " ss_net_paid_inc_tax decimal(7,2),\n" +
  271. " ss_net_profit decimal(7,2)\n" +
  272. ")\n" +
  273. " partitioned by (dt string)\n" +
  274. "clustered by (ss_store_sk, ss_promo_sk)\n" +
  275. "INTO 4 BUCKETS stored as orc " + " location '" + tableLoc + "'" + " TBLPROPERTIES ('orc.compress'='NONE', 'transactional'='true')");
  276. Assert.assertTrue(success);
  277. success = runDDL(driver, "alter table store_sales add partition(dt='2015')");
  278. Assert.assertTrue(success);
  279. }
  280. /**
  281. * make sure it works with table where bucket col is not 1st col
  282. * @throws Exception
  283. */
  284. @Test
  285. public void testBucketingWhereBucketColIsNotFirstCol() throws Exception {
  286. List<String> partitionVals = new ArrayList<String>();
  287. partitionVals.add("2015");
  288. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testing5", "store_sales", partitionVals);
  289. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  290. DelimitedInputWriter writer = new DelimitedInputWriter(new String[] {"ss_sold_date_sk","ss_sold_time_sk", "ss_item_sk",
  291. "ss_customer_sk", "ss_cdemo_sk", "ss_hdemo_sk", "ss_addr_sk", "ss_store_sk", "ss_promo_sk", "ss_ticket_number", "ss_quantity",
  292. "ss_wholesale_cost", "ss_list_price", "ss_sales_price", "ss_ext_discount_amt", "ss_ext_sales_price", "ss_ext_wholesale_cost",
  293. "ss_ext_list_price", "ss_ext_tax", "ss_coupon_amt", "ss_net_paid", "ss_net_paid_inc_tax", "ss_net_profit"},",", endPt, connection);
  294. TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
  295. txnBatch.beginNextTransaction();
  296. StringBuilder row = new StringBuilder();
  297. for(int i = 0; i < 10; i++) {
  298. for(int ints = 0; ints < 11; ints++) {
  299. row.append(ints).append(',');
  300. }
  301. for(int decs = 0; decs < 12; decs++) {
  302. row.append(i + 0.1).append(',');
  303. }
  304. row.setLength(row.length() - 1);
  305. txnBatch.write(row.toString().getBytes());
  306. }
  307. txnBatch.commit();
  308. txnBatch.close();
  309. connection.close();
  310. ArrayList<String> res = queryTable(driver, "select row__id.bucketid, * from testing5.store_sales");
  311. for (String re : res) {
  312. System.out.println(re);
  313. }
  314. }
  315. /**
  316. * Test that streaming can write to unbucketed table.
  317. */
  318. @Test
  319. public void testNoBuckets() throws Exception {
  320. queryTable(driver, "drop table if exists default.streamingnobuckets");
  321. //todo: why does it need transactional_properties?
  322. queryTable(driver, "create table default.streamingnobuckets (a string, b string) stored as orc TBLPROPERTIES('transactional'='true', 'transactional_properties'='default')");
  323. queryTable(driver, "insert into default.streamingnobuckets values('foo','bar')");
  324. List<String> rs = queryTable(driver, "select * from default.streamingNoBuckets");
  325. Assert.assertEquals(1, rs.size());
  326. Assert.assertEquals("foo\tbar", rs.get(0));
  327. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "Default", "StreamingNoBuckets", null);
  328. String[] colNames1 = new String[] { "a", "b" };
  329. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  330. DelimitedInputWriter wr = new DelimitedInputWriter(colNames1,",", endPt, connection);
  331. TransactionBatch txnBatch = connection.fetchTransactionBatch(2, wr);
  332. txnBatch.beginNextTransaction();
  333. txnBatch.write("a1,b2".getBytes());
  334. txnBatch.write("a3,b4".getBytes());
  335. TxnStore txnHandler = TxnUtils.getTxnStore(conf);
  336. ShowLocksResponse resp = txnHandler.showLocks(new ShowLocksRequest());
  337. Assert.assertEquals(resp.getLocksSize(), 1);
  338. Assert.assertEquals("streamingnobuckets", resp.getLocks().get(0).getTablename());
  339. Assert.assertEquals("default", resp.getLocks().get(0).getDbname());
  340. txnBatch.commit();
  341. txnBatch.beginNextTransaction();
  342. txnBatch.write("a5,b6".getBytes());
  343. txnBatch.write("a7,b8".getBytes());
  344. txnBatch.commit();
  345. txnBatch.close();
  346. Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
  347. rs = queryTable(driver,"select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");
  348. Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
  349. Assert.assertTrue(rs.get(0), rs.get(0).endsWith("streamingnobuckets/delta_0000001_0000001_0000/bucket_00000_0"));
  350. Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\ta1\tb2"));
  351. Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
  352. Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
  353. Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
  354. Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
  355. Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
  356. Assert.assertTrue(rs.get(4), rs.get(4).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\ta7\tb8"));
  357. Assert.assertTrue(rs.get(4), rs.get(4).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
  358. queryTable(driver, "update default.streamingnobuckets set a=0, b=0 where a='a7'");
  359. queryTable(driver, "delete from default.streamingnobuckets where a='a1'");
  360. rs = queryTable(driver, "select a, b from default.streamingnobuckets order by a, b");
  361. int row = 0;
  362. Assert.assertEquals("at row=" + row, "0\t0", rs.get(row++));
  363. Assert.assertEquals("at row=" + row, "a3\tb4", rs.get(row++));
  364. Assert.assertEquals("at row=" + row, "a5\tb6", rs.get(row++));
  365. Assert.assertEquals("at row=" + row, "foo\tbar", rs.get(row++));
  366. queryTable(driver, "alter table default.streamingnobuckets compact 'major'");
  367. runWorker(conf);
  368. rs = queryTable(driver,"select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");
  369. Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
  370. Assert.assertTrue(rs.get(0), rs.get(0).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
  371. Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
  372. Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
  373. Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
  374. Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
  375. Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t0\t0"));
  376. Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
  377. }
  378. /**
  379. * this is a clone from TestTxnStatement2....
  380. */
  381. public static void runWorker(HiveConf hiveConf) throws Exception {
  382. AtomicBoolean stop = new AtomicBoolean(true);
  383. Worker t = new Worker();
  384. t.setThreadId((int) t.getId());
  385. t.setConf(hiveConf);
  386. AtomicBoolean looped = new AtomicBoolean();
  387. t.init(stop, looped);
  388. t.run();
  389. }
  390. // stream data into streaming table with N buckets, then copy the data into another bucketed table
  391. // check if bucketing in both was done in the same way
  392. @Test
  393. @Ignore
  394. public void testStreamBucketingMatchesRegularBucketing() throws Exception {
  395. int bucketCount = 100;
  396. String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
  397. String tableLoc = "'" + dbUri + Path.SEPARATOR + "streamedtable" + "'";
  398. String tableLoc2 = "'" + dbUri + Path.SEPARATOR + "finaltable" + "'";
  399. String tableLoc3 = "'" + dbUri + Path.SEPARATOR + "nobucket" + "'";
  400. try (IDriver driver = DriverFactory.newDriver(conf)) {
  401. runDDL(driver, "create database testBucketing3");
  402. runDDL(driver, "use testBucketing3");
  403. runDDL(driver, "create table streamedtable ( key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
  404. + bucketCount + " buckets stored as orc location " + tableLoc + " TBLPROPERTIES ('transactional'='true')");
  405. // In 'nobucket' table we capture bucketid from streamedtable to workaround a hive bug that prevents joins two identically bucketed tables
  406. runDDL(driver, "create table nobucket ( bucketid int, key1 string,key2 int,data string ) location " + tableLoc3);
  407. runDDL(driver,
  408. "create table finaltable ( bucketid int, key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
  409. + bucketCount + " buckets stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='true')");
  410. String[] records = new String[]{
  411. "PSFAHYLZVC,29,EPNMA",
  412. "PPPRKWAYAU,96,VUTEE",
  413. "MIAOFERCHI,3,WBDSI",
  414. "CEGQAZOWVN,0,WCUZL",
  415. "XWAKMNSVQF,28,YJVHU",
  416. "XBWTSAJWME,2,KDQFO",
  417. "FUVLQTAXAY,5,LDSDG",
  418. "QTQMDJMGJH,6,QBOMA",
  419. "EFLOTLWJWN,71,GHWPS",
  420. "PEQNAOJHCM,82,CAAFI",
  421. "MOEKQLGZCP,41,RUACR",
  422. "QZXMCOPTID,37,LFLWE",
  423. "EYALVWICRD,13,JEZLC",
  424. "VYWLZAYTXX,16,DMVZX",
  425. "OSALYSQIXR,47,HNZVE",
  426. "JGKVHKCEGQ,25,KSCJB",
  427. "WQFMMYDHET,12,DTRWA",
  428. "AJOVAYZKZQ,15,YBKFO",
  429. "YAQONWCUAU,31,QJNHZ",
  430. "DJBXUEUOEB,35,IYCBL"
  431. };
  432. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "streamedtable", null);
  433. String[] colNames1 = new String[]{"key1", "key2", "data"};
  434. DelimitedInputWriter wr = new DelimitedInputWriter(colNames1, ",", endPt);
  435. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  436. TransactionBatch txnBatch = connection.fetchTransactionBatch(2, wr);
  437. txnBatch.beginNextTransaction();
  438. for (String record : records) {
  439. txnBatch.write(record.toString().getBytes());
  440. }
  441. txnBatch.commit();
  442. txnBatch.close();
  443. connection.close();
  444. ArrayList<String> res1 = queryTable(driver, "select row__id.bucketid, * from streamedtable order by key2");
  445. for (String re : res1) {
  446. System.out.println(re);
  447. }
  448. driver.run("insert into nobucket select row__id.bucketid,* from streamedtable");
  449. runDDL(driver, " insert into finaltable select * from nobucket");
  450. ArrayList<String> res2 = queryTable(driver,
  451. "select row__id.bucketid,* from finaltable where row__id.bucketid<>bucketid");
  452. for (String s : res2) {
  453. LOG.error(s);
  454. }
  455. Assert.assertTrue(res2.isEmpty());
  456. } finally {
  457. conf.unset(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname);
  458. }
  459. }
  460. @Test
  461. public void testTableValidation() throws Exception {
  462. int bucketCount = 100;
  463. String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
  464. String tbl1 = "validation1";
  465. String tbl2 = "validation2";
  466. String tableLoc = "'" + dbUri + Path.SEPARATOR + tbl1 + "'";
  467. String tableLoc2 = "'" + dbUri + Path.SEPARATOR + tbl2 + "'";
  468. runDDL(driver, "create database testBucketing3");
  469. runDDL(driver, "use testBucketing3");
  470. runDDL(driver, "create table " + tbl1 + " ( key1 string, data string ) clustered by ( key1 ) into "
  471. + bucketCount + " buckets stored as orc location " + tableLoc + " TBLPROPERTIES ('transactional'='false')") ;
  472. runDDL(driver, "create table " + tbl2 + " ( key1 string, data string ) clustered by ( key1 ) into "
  473. + bucketCount + " buckets stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='false')") ;
  474. try {
  475. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation1", null);
  476. endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  477. Assert.assertTrue("InvalidTable exception was not thrown", false);
  478. } catch (InvalidTable e) {
  479. // expecting this exception
  480. }
  481. try {
  482. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation2", null);
  483. endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  484. Assert.assertTrue("InvalidTable exception was not thrown", false);
  485. } catch (InvalidTable e) {
  486. // expecting this exception
  487. }
  488. }
  489. /**
  490. * @deprecated use {@link #checkDataWritten2(Path, long, long, int, String, boolean, String...)} -
  491. * there is little value in using InputFormat directly
  492. */
  493. @Deprecated
  494. private void checkDataWritten(Path partitionPath, long minTxn, long maxTxn, int buckets, int numExpectedFiles,
  495. String... records) throws Exception {
  496. ValidWriteIdList writeIds = getTransactionContext(conf);
  497. AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, writeIds, null, false, null, false);
  498. Assert.assertEquals(0, dir.getObsolete().size());
  499. Assert.assertEquals(0, dir.getOriginalFiles().size());
  500. List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
  501. System.out.println("Files found: ");
  502. for (AcidUtils.ParsedDelta pd : current) {
  503. System.out.println(pd.getPath().toString());
  504. }
  505. Assert.assertEquals(numExpectedFiles, current.size());
  506. // find the absolute minimum transaction
  507. long min = Long.MAX_VALUE;
  508. long max = Long.MIN_VALUE;
  509. for (AcidUtils.ParsedDelta pd : current) {
  510. if (pd.getMaxWriteId() > max) {
  511. max = pd.getMaxWriteId();
  512. }
  513. if (pd.getMinWriteId() < min) {
  514. min = pd.getMinWriteId();
  515. }
  516. }
  517. Assert.assertEquals(minTxn, min);
  518. Assert.assertEquals(maxTxn, max);
  519. InputFormat inf = new OrcInputFormat();
  520. JobConf job = new JobConf();
  521. job.set("mapred.input.dir", partitionPath.toString());
  522. job.set(BUCKET_COUNT, Integer.toString(buckets));
  523. job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
  524. job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
  525. AcidUtils.setAcidOperationalProperties(job, true, null);
  526. job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
  527. job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());
  528. job.set(ValidTxnList.VALID_TXNS_KEY, conf.get(ValidTxnList.VALID_TXNS_KEY));
  529. InputSplit[] splits = inf.getSplits(job, buckets);
  530. Assert.assertEquals(numExpectedFiles, splits.length);
  531. org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr =
  532. inf.getRecordReader(splits[0], job, Reporter.NULL);
  533. NullWritable key = rr.createKey();
  534. OrcStruct value = rr.createValue();
  535. for (String record : records) {
  536. Assert.assertEquals(true, rr.next(key, value));
  537. Assert.assertEquals(record, value.toString());
  538. }
  539. Assert.assertEquals(false, rr.next(key, value));
  540. }
  541. /**
  542. * @param validationQuery query to read from table to compare data against {@code records}
  543. * @param records expected data. each row is CVS list of values
  544. */
  545. private void checkDataWritten2(Path partitionPath, long minTxn, long maxTxn, int numExpectedFiles,
  546. String validationQuery, boolean vectorize, String... records) throws Exception {
  547. AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, getTransactionContext(conf), null,
  548. false, null, false);
  549. Assert.assertEquals(0, dir.getObsolete().size());
  550. Assert.assertEquals(0, dir.getOriginalFiles().size());
  551. List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
  552. System.out.println("Files found: ");
  553. for (AcidUtils.ParsedDelta pd : current) {
  554. System.out.println(pd.getPath().toString());
  555. }
  556. Assert.assertEquals(numExpectedFiles, current.size());
  557. // find the absolute minimum transaction
  558. long min = Long.MAX_VALUE;
  559. long max = Long.MIN_VALUE;
  560. for (AcidUtils.ParsedDelta pd : current) {
  561. if (pd.getMaxWriteId() > max) {
  562. max = pd.getMaxWriteId();
  563. }
  564. if (pd.getMinWriteId() < min) {
  565. min = pd.getMinWriteId();
  566. }
  567. }
  568. Assert.assertEquals(minTxn, min);
  569. Assert.assertEquals(maxTxn, max);
  570. boolean isVectorizationEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
  571. if(vectorize) {
  572. conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
  573. }
  574. String currStrategy = conf.getVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY);
  575. for(String strategy : ((Validator.StringSet)HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.getValidator()).getExpected()) {
  576. //run it with each split strategy - make sure there are differences
  577. conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, strategy.toUpperCase());
  578. List<String> actualResult = queryTable(driver, validationQuery);
  579. for (int i = 0; i < actualResult.size(); i++) {
  580. Assert.assertEquals("diff at [" + i + "]. actual=" + actualResult + " expected=" +
  581. Arrays.toString(records), records[i], actualResult.get(i));
  582. }
  583. }
  584. conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, currStrategy);
  585. conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, isVectorizationEnabled);
  586. }
  587. private ValidWriteIdList getTransactionContext(Configuration conf) throws Exception {
  588. ValidTxnList validTxnList = msClient.getValidTxns();
  589. conf.set(ValidTxnList.VALID_TXNS_KEY, validTxnList.writeToString());
  590. List<TableValidWriteIds> v = msClient.getValidWriteIds(Collections
  591. .singletonList(TableName.getDbTable(dbName, tblName)), validTxnList.writeToString());
  592. return TxnCommonUtils.createValidReaderWriteIdList(v.get(0));
  593. }
  594. private void checkNothingWritten(Path partitionPath) throws Exception {
  595. AcidUtils.Directory dir = AcidUtils.getAcidState(null, partitionPath, conf, getTransactionContext(conf), null,
  596. false, null, false);
  597. Assert.assertEquals(0, dir.getObsolete().size());
  598. Assert.assertEquals(0, dir.getOriginalFiles().size());
  599. List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
  600. Assert.assertEquals(0, current.size());
  601. }
  602. @Test
  603. public void testEndpointConnection() throws Exception {
  604. // For partitioned table, partitionVals are specified
  605. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals);
  606. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); //shouldn't throw
  607. connection.close();
  608. // For unpartitioned table, partitionVals are not specified
  609. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  610. endPt.newConnection(false, "UT_" + Thread.currentThread().getName()).close(); // should not throw
  611. // For partitioned table, partitionVals are not specified
  612. try {
  613. endPt = new HiveEndPoint(metaStoreURI, dbName, tblName, null);
  614. connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
  615. Assert.assertTrue("ConnectionError was not thrown", false);
  616. connection.close();
  617. } catch (ConnectionError e) {
  618. // expecting this exception
  619. String errMsg = "doesn't specify any partitions for partitioned table";
  620. Assert.assertTrue(e.toString().endsWith(errMsg));
  621. }
  622. // For unpartitioned table, partition values are specified
  623. try {
  624. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, partitionVals);
  625. connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  626. Assert.assertTrue("ConnectionError was not thrown", false);
  627. connection.close();
  628. } catch (ConnectionError e) {
  629. // expecting this exception
  630. String errMsg = "specifies partitions for unpartitioned table";
  631. Assert.assertTrue(e.toString().endsWith(errMsg));
  632. }
  633. }
  634. @Test
  635. public void testAddPartition() throws Exception {
  636. List<String> newPartVals = new ArrayList<String>(2);
  637. newPartVals.add(PART1_CONTINENT);
  638. newPartVals.add("Nepal");
  639. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName
  640. , newPartVals);
  641. // Ensure partition is absent
  642. try {
  643. msClient.getPartition(endPt.database, endPt.table, endPt.partitionVals);
  644. Assert.assertTrue("Partition already exists", false);
  645. } catch (NoSuchObjectException e) {
  646. // expect this exception
  647. }
  648. // Create partition
  649. Assert.assertNotNull(endPt.newConnection(true, "UT_" + Thread.currentThread().getName()));
  650. // Ensure partition is present
  651. Partition p = msClient.getPartition(endPt.database, endPt.table, endPt.partitionVals);
  652. Assert.assertNotNull("Did not find added partition", p);
  653. }
  654. @Test
  655. public void testTransactionBatchEmptyCommit() throws Exception {
  656. // 1) to partitioned table
  657. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  658. partitionVals);
  659. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  660. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
  661. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  662. txnBatch.beginNextTransaction();
  663. txnBatch.commit();
  664. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  665. , txnBatch.getCurrentTransactionState());
  666. txnBatch.close();
  667. connection.close();
  668. // 2) To unpartitioned table
  669. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  670. writer = new DelimitedInputWriter(fieldNames2,",", endPt);
  671. connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  672. txnBatch = connection.fetchTransactionBatch(10, writer);
  673. txnBatch.beginNextTransaction();
  674. txnBatch.commit();
  675. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  676. , txnBatch.getCurrentTransactionState());
  677. txnBatch.close();
  678. connection.close();
  679. }
  680. /**
  681. * check that transactions that have not heartbeated and timedout get properly aborted
  682. * @throws Exception
  683. */
  684. @Test
  685. public void testTimeOutReaper() throws Exception {
  686. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  687. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2,",", endPt);
  688. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  689. TransactionBatch txnBatch = connection.fetchTransactionBatch(5, writer);
  690. txnBatch.beginNextTransaction();
  691. conf.setTimeVar(HiveConf.ConfVars.HIVE_TIMEDOUT_TXN_REAPER_START, 0, TimeUnit.SECONDS);
  692. //ensure txn timesout
  693. conf.setTimeVar(HiveConf.ConfVars.HIVE_TXN_TIMEOUT, 1, TimeUnit.MILLISECONDS);
  694. AcidHouseKeeperService houseKeeperService = new AcidHouseKeeperService();
  695. houseKeeperService.setConf(conf);
  696. houseKeeperService.run();
  697. try {
  698. //should fail because the TransactionBatch timed out
  699. txnBatch.commit();
  700. }
  701. catch(TransactionError e) {
  702. Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
  703. }
  704. txnBatch.close();
  705. txnBatch = connection.fetchTransactionBatch(10, writer);
  706. txnBatch.beginNextTransaction();
  707. txnBatch.commit();
  708. txnBatch.beginNextTransaction();
  709. houseKeeperService.run();
  710. try {
  711. //should fail because the TransactionBatch timed out
  712. txnBatch.commit();
  713. }
  714. catch(TransactionError e) {
  715. Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
  716. }
  717. txnBatch.close();
  718. connection.close();
  719. }
  720. @Test
  721. public void testHeartbeat() throws Exception {
  722. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  723. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  724. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2,",", endPt, connection);
  725. TransactionBatch txnBatch = connection.fetchTransactionBatch(5, writer);
  726. txnBatch.beginNextTransaction();
  727. //todo: this should ideally check Transaction heartbeat as well, but heartbeat
  728. //timestamp is not reported yet
  729. //GetOpenTxnsInfoResponse txnresp = msClient.showTxns();
  730. ShowLocksRequest request = new ShowLocksRequest();
  731. request.setDbname(dbName2);
  732. request.setTablename(tblName2);
  733. ShowLocksResponse response = msClient.showLocks(request);
  734. Assert.assertEquals("Wrong nubmer of locks: " + response, 1, response.getLocks().size());
  735. ShowLocksResponseElement lock = response.getLocks().get(0);
  736. long acquiredAt = lock.getAcquiredat();
  737. long heartbeatAt = lock.getLastheartbeat();
  738. txnBatch.heartbeat();
  739. response = msClient.showLocks(request);
  740. Assert.assertEquals("Wrong number of locks2: " + response, 1, response.getLocks().size());
  741. lock = response.getLocks().get(0);
  742. Assert.assertEquals("Acquired timestamp didn't match", acquiredAt, lock.getAcquiredat());
  743. Assert.assertTrue("Expected new heartbeat (" + lock.getLastheartbeat() +
  744. ") == old heartbeat(" + heartbeatAt +")", lock.getLastheartbeat() == heartbeatAt);
  745. txnBatch.close();
  746. int txnBatchSize = 200;
  747. txnBatch = connection.fetchTransactionBatch(txnBatchSize, writer);
  748. for(int i = 0; i < txnBatchSize; i++) {
  749. txnBatch.beginNextTransaction();
  750. if(i % 47 == 0) {
  751. txnBatch.heartbeat();
  752. }
  753. if(i % 10 == 0) {
  754. txnBatch.abort();
  755. }
  756. else {
  757. txnBatch.commit();
  758. }
  759. if(i % 37 == 0) {
  760. txnBatch.heartbeat();
  761. }
  762. }
  763. }
  764. @Test
  765. public void testTransactionBatchEmptyAbort() throws Exception {
  766. // 1) to partitioned table
  767. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  768. partitionVals);
  769. StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
  770. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
  771. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  772. txnBatch.beginNextTransaction();
  773. txnBatch.abort();
  774. Assert.assertEquals(TransactionBatch.TxnState.ABORTED
  775. , txnBatch.getCurrentTransactionState());
  776. txnBatch.close();
  777. connection.close();
  778. // 2) to unpartitioned table
  779. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  780. writer = new DelimitedInputWriter(fieldNames,",", endPt);
  781. connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
  782. txnBatch = connection.fetchTransactionBatch(10, writer);
  783. txnBatch.beginNextTransaction();
  784. txnBatch.abort();
  785. Assert.assertEquals(TransactionBatch.TxnState.ABORTED
  786. , txnBatch.getCurrentTransactionState());
  787. txnBatch.close();
  788. connection.close();
  789. }
  790. @Test
  791. public void testTransactionBatchCommit_Delimited() throws Exception {
  792. testTransactionBatchCommit_Delimited(null);
  793. }
  794. @Test
  795. public void testTransactionBatchCommit_DelimitedUGI() throws Exception {
  796. testTransactionBatchCommit_Delimited(Utils.getUGI());
  797. }
  798. private void testTransactionBatchCommit_Delimited(UserGroupInformation ugi) throws Exception {
  799. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  800. partitionVals);
  801. StreamingConnection connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
  802. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, conf, connection);
  803. // 1st Txn
  804. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  805. txnBatch.beginNextTransaction();
  806. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  807. , txnBatch.getCurrentTransactionState());
  808. txnBatch.write("1,Hello streaming".getBytes());
  809. txnBatch.commit();
  810. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
  811. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  812. , txnBatch.getCurrentTransactionState());
  813. // 2nd Txn
  814. txnBatch.beginNextTransaction();
  815. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  816. , txnBatch.getCurrentTransactionState());
  817. txnBatch.write("2,Welcome to streaming".getBytes());
  818. // data should not be visible
  819. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
  820. txnBatch.commit();
  821. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
  822. "{2, Welcome to streaming}");
  823. txnBatch.close();
  824. Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
  825. , txnBatch.getCurrentTransactionState());
  826. connection.close();
  827. // To Unpartitioned table
  828. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  829. connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
  830. writer = new DelimitedInputWriter(fieldNames,",", endPt, conf, connection);
  831. // 1st Txn
  832. txnBatch = connection.fetchTransactionBatch(10, writer);
  833. txnBatch.beginNextTransaction();
  834. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  835. , txnBatch.getCurrentTransactionState());
  836. txnBatch.write("1,Hello streaming".getBytes());
  837. txnBatch.commit();
  838. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  839. , txnBatch.getCurrentTransactionState());
  840. connection.close();
  841. }
  842. @Test
  843. public void testTransactionBatchCommit_Regex() throws Exception {
  844. testTransactionBatchCommit_Regex(null);
  845. }
  846. @Test
  847. public void testTransactionBatchCommit_RegexUGI() throws Exception {
  848. testTransactionBatchCommit_Regex(Utils.getUGI());
  849. }
  850. private void testTransactionBatchCommit_Regex(UserGroupInformation ugi) throws Exception {
  851. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  852. partitionVals);
  853. StreamingConnection connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
  854. String regex = "([^,]*),(.*)";
  855. StrictRegexWriter writer = new StrictRegexWriter(regex, endPt, conf, connection);
  856. // 1st Txn
  857. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  858. txnBatch.beginNextTransaction();
  859. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  860. , txnBatch.getCurrentTransactionState());
  861. txnBatch.write("1,Hello streaming".getBytes());
  862. txnBatch.commit();
  863. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
  864. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  865. , txnBatch.getCurrentTransactionState());
  866. // 2nd Txn
  867. txnBatch.beginNextTransaction();
  868. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  869. , txnBatch.getCurrentTransactionState());
  870. txnBatch.write("2,Welcome to streaming".getBytes());
  871. // data should not be visible
  872. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
  873. txnBatch.commit();
  874. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
  875. "{2, Welcome to streaming}");
  876. txnBatch.close();
  877. Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
  878. , txnBatch.getCurrentTransactionState());
  879. connection.close();
  880. // To Unpartitioned table
  881. endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
  882. connection = endPt.newConnection(true, conf, ugi, "UT_" + Thread.currentThread().getName());
  883. regex = "([^:]*):(.*)";
  884. writer = new StrictRegexWriter(regex, endPt, conf, connection);
  885. // 1st Txn
  886. txnBatch = connection.fetchTransactionBatch(10, writer);
  887. txnBatch.beginNextTransaction();
  888. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  889. , txnBatch.getCurrentTransactionState());
  890. txnBatch.write("1:Hello streaming".getBytes());
  891. txnBatch.commit();
  892. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  893. , txnBatch.getCurrentTransactionState());
  894. connection.close();
  895. }
  896. @Test
  897. public void testTransactionBatchCommit_Json() throws Exception {
  898. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  899. partitionVals);
  900. StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
  901. StrictJsonWriter writer = new StrictJsonWriter(endPt, connection);
  902. // 1st Txn
  903. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  904. txnBatch.beginNextTransaction();
  905. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  906. , txnBatch.getCurrentTransactionState());
  907. String rec1 = "{\"id\" : 1, \"msg\": \"Hello streaming\"}";
  908. txnBatch.write(rec1.getBytes());
  909. txnBatch.commit();
  910. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");
  911. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  912. , txnBatch.getCurrentTransactionState());
  913. txnBatch.close();
  914. Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
  915. , txnBatch.getCurrentTransactionState());
  916. connection.close();
  917. List<String> rs = queryTable(driver, "select * from " + dbName + "." + tblName);
  918. Assert.assertEquals(1, rs.size());
  919. }
  920. @Test
  921. public void testRemainingTransactions() throws Exception {
  922. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  923. partitionVals);
  924. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt);
  925. StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
  926. // 1) test with txn.Commit()
  927. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  928. int batch=0;
  929. int initialCount = txnBatch.remainingTransactions();
  930. while (txnBatch.remainingTransactions()>0) {
  931. txnBatch.beginNextTransaction();
  932. Assert.assertEquals(--initialCount, txnBatch.remainingTransactions());
  933. for (int rec=0; rec<2; ++rec) {
  934. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  935. , txnBatch.getCurrentTransactionState());
  936. txnBatch.write((batch * rec + ",Hello streaming").getBytes());
  937. }
  938. txnBatch.commit();
  939. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  940. , txnBatch.getCurrentTransactionState());
  941. ++batch;
  942. }
  943. Assert.assertEquals(0, txnBatch.remainingTransactions());
  944. txnBatch.close();
  945. Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
  946. , txnBatch.getCurrentTransactionState());
  947. // 2) test with txn.Abort()
  948. txnBatch = connection.fetchTransactionBatch(10, writer);
  949. batch=0;
  950. initialCount = txnBatch.remainingTransactions();
  951. while (txnBatch.remainingTransactions()>0) {
  952. txnBatch.beginNextTransaction();
  953. Assert.assertEquals(--initialCount,txnBatch.remainingTransactions());
  954. for (int rec=0; rec<2; ++rec) {
  955. Assert.assertEquals(TransactionBatch.TxnState.OPEN
  956. , txnBatch.getCurrentTransactionState());
  957. txnBatch.write((batch * rec + ",Hello streaming").getBytes());
  958. }
  959. txnBatch.abort();
  960. Assert.assertEquals(TransactionBatch.TxnState.ABORTED
  961. , txnBatch.getCurrentTransactionState());
  962. ++batch;
  963. }
  964. Assert.assertEquals(0, txnBatch.remainingTransactions());
  965. txnBatch.close();
  966. Assert.assertEquals(TransactionBatch.TxnState.INACTIVE
  967. , txnBatch.getCurrentTransactionState());
  968. connection.close();
  969. }
  970. @Test
  971. public void testTransactionBatchAbort() throws Exception {
  972. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  973. partitionVals);
  974. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  975. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
  976. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  977. txnBatch.beginNextTransaction();
  978. txnBatch.write("1,Hello streaming".getBytes());
  979. txnBatch.write("2,Welcome to streaming".getBytes());
  980. txnBatch.abort();
  981. checkNothingWritten(partLoc);
  982. Assert.assertEquals(TransactionBatch.TxnState.ABORTED
  983. , txnBatch.getCurrentTransactionState());
  984. txnBatch.close();
  985. connection.close();
  986. checkNothingWritten(partLoc);
  987. }
  988. @Test
  989. public void testTransactionBatchAbortAndCommit() throws Exception {
  990. String agentInfo = "UT_" + Thread.currentThread().getName();
  991. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  992. partitionVals);
  993. StreamingConnection connection = endPt.newConnection(false, agentInfo);
  994. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt, connection);
  995. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  996. txnBatch.beginNextTransaction();
  997. txnBatch.write("1,Hello streaming".getBytes());
  998. txnBatch.write("2,Welcome to streaming".getBytes());
  999. ShowLocksResponse resp = msClient.showLocks(new ShowLocksRequest());
  1000. Assert.assertEquals("LockCount", 1, resp.getLocksSize());
  1001. Assert.assertEquals("LockType", LockType.SHARED_READ, resp.getLocks().get(0).getType());
  1002. Assert.assertEquals("LockState", LockState.ACQUIRED, resp.getLocks().get(0).getState());
  1003. Assert.assertEquals("AgentInfo", agentInfo, resp.getLocks().get(0).getAgentInfo());
  1004. txnBatch.abort();
  1005. checkNothingWritten(partLoc);
  1006. Assert.assertEquals(TransactionBatch.TxnState.ABORTED
  1007. , txnBatch.getCurrentTransactionState());
  1008. txnBatch.beginNextTransaction();
  1009. txnBatch.write("1,Hello streaming".getBytes());
  1010. txnBatch.write("2,Welcome to streaming".getBytes());
  1011. txnBatch.commit();
  1012. checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}",
  1013. "{2, Welcome to streaming}");
  1014. txnBatch.close();
  1015. connection.close();
  1016. }
  1017. @Test
  1018. public void testMultipleTransactionBatchCommits() throws Exception {
  1019. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  1020. partitionVals);
  1021. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames,",", endPt);
  1022. StreamingConnection connection = endPt.newConnection(true, "UT_" + Thread.currentThread().getName());
  1023. TransactionBatch txnBatch = connection.fetchTransactionBatch(10, writer);
  1024. txnBatch.beginNextTransaction();
  1025. txnBatch.write("1,Hello streaming".getBytes());
  1026. txnBatch.commit();
  1027. String validationQuery = "select id, msg from " + dbName + "." + tblName + " order by id, msg";
  1028. checkDataWritten2(partLoc, 1, 10, 1, validationQuery, false, "1\tHello streaming");
  1029. txnBatch.beginNextTransaction();
  1030. txnBatch.write("2,Welcome to streaming".getBytes());
  1031. txnBatch.commit();
  1032. checkDataWritten2(partLoc, 1, 10, 1, validationQuery, true, "1\tHello streaming",
  1033. "2\tWelcome to streaming");
  1034. txnBatch.close();
  1035. // 2nd Txn Batch
  1036. txnBatch = connection.fetchTransactionBatch(10, writer);
  1037. txnBatch.beginNextTransaction();
  1038. txnBatch.write("3,Hello streaming - once again".getBytes());
  1039. txnBatch.commit();
  1040. checkDataWritten2(partLoc, 1, 20, 2, validationQuery, false, "1\tHello streaming",
  1041. "2\tWelcome to streaming", "3\tHello streaming - once again");
  1042. txnBatch.beginNextTransaction();
  1043. txnBatch.write("4,Welcome to streaming - once again".getBytes());
  1044. txnBatch.commit();
  1045. checkDataWritten2(partLoc, 1, 20, 2, validationQuery, true, "1\tHello streaming",
  1046. "2\tWelcome to streaming", "3\tHello streaming - once again",
  1047. "4\tWelcome to streaming - once again");
  1048. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  1049. , txnBatch.getCurrentTransactionState());
  1050. txnBatch.close();
  1051. connection.close();
  1052. }
  1053. @Test
  1054. public void testInterleavedTransactionBatchCommits() throws Exception {
  1055. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName, tblName,
  1056. partitionVals);
  1057. DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames, ",", endPt);
  1058. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  1059. // Acquire 1st Txn Batch
  1060. TransactionBatch txnBatch1 = connection.fetchTransactionBatch(10, writer);
  1061. txnBatch1.beginNextTransaction();
  1062. // Acquire 2nd Txn Batch
  1063. DelimitedInputWriter writer2 = new DelimitedInputWriter(fieldNames, ",", endPt);
  1064. TransactionBatch txnBatch2 = connection.fetchTransactionBatch(10, writer2);
  1065. txnBatch2.beginNextTransaction();
  1066. // Interleaved writes to both batches
  1067. txnBatch1.write("1,Hello streaming".getBytes());
  1068. txnBatch2.write("3,Hello streaming - once again".getBytes());
  1069. checkNothingWritten(partLoc);
  1070. txnBatch2.commit();
  1071. String validationQuery = "select id, msg from " + dbName + "." + tblName + " order by id, msg";
  1072. checkDataWritten2(partLoc, 11, 20, 1,
  1073. validationQuery, true, "3\tHello streaming - once again");
  1074. txnBatch1.commit();
  1075. /*now both batches have committed (but not closed) so we for each primary file we expect a side
  1076. file to exist and indicate the true length of primary file*/
  1077. FileSystem fs = partLoc.getFileSystem(conf);
  1078. AcidUtils.Directory dir = AcidUtils.getAcidState(fs, partLoc, conf, getTransactionContext(conf), null, false, null, false);
  1079. for(AcidUtils.ParsedDelta pd : dir.getCurrentDirectories()) {
  1080. for(FileStatus stat : fs.listStatus(pd.getPath(), AcidUtils.bucketFileFilter)) {
  1081. Path lengthFile = OrcAcidUtils.getSideFile(stat.getPath());
  1082. Assert.assertTrue(lengthFile + " missing", fs.exists(lengthFile));
  1083. long lengthFileSize = fs.getFileStatus(lengthFile).getLen();
  1084. Assert.assertTrue("Expected " + lengthFile + " to be non empty. lengh=" +
  1085. lengthFileSize, lengthFileSize > 0);
  1086. long logicalLength = AcidUtils.getLogicalLength(fs, stat);
  1087. long actualLength = stat.getLen();
  1088. Assert.assertTrue("", logicalLength == actualLength);
  1089. }
  1090. }
  1091. checkDataWritten2(partLoc, 1, 20, 2,
  1092. validationQuery, false,"1\tHello streaming", "3\tHello streaming - once again");
  1093. txnBatch1.beginNextTransaction();
  1094. txnBatch1.write("2,Welcome to streaming".getBytes());
  1095. txnBatch2.beginNextTransaction();
  1096. txnBatch2.write("4,Welcome to streaming - once again".getBytes());
  1097. //here each batch has written data and committed (to bucket0 since table only has 1 bucket)
  1098. //so each of 2 deltas has 1 bucket0 and 1 bucket0_flush_length. Furthermore, each bucket0
  1099. //has now received more data(logically - it's buffered) but it is not yet committed.
  1100. //lets check that side files exist, etc
  1101. dir = AcidUtils.getAcidState(fs, partLoc, conf, getTransactionContext(conf), null, false, null, false);
  1102. for(AcidUtils.ParsedDelta pd : dir.getCurrentDirectories()) {
  1103. for(FileStatus stat : fs.listStatus(pd.getPath(), AcidUtils.bucketFileFilter)) {
  1104. Path lengthFile = OrcAcidUtils.getSideFile(stat.getPath());
  1105. Assert.assertTrue(lengthFile + " missing", fs.exists(lengthFile));
  1106. long lengthFileSize = fs.getFileStatus(lengthFile).getLen();
  1107. Assert.assertTrue("Expected " + lengthFile + " to be non empty. lengh=" +
  1108. lengthFileSize, lengthFileSize > 0);
  1109. long logicalLength = AcidUtils.getLogicalLength(fs, stat);
  1110. long actualLength = stat.getLen();
  1111. Assert.assertTrue("", logicalLength <= actualLength);
  1112. }
  1113. }
  1114. checkDataWritten2(partLoc, 1, 20, 2,
  1115. validationQuery, true,"1\tHello streaming", "3\tHello streaming - once again");
  1116. txnBatch1.commit();
  1117. checkDataWritten2(partLoc, 1, 20, 2,
  1118. validationQuery, false, "1\tHello streaming",
  1119. "2\tWelcome to streaming",
  1120. "3\tHello streaming - once again");
  1121. txnBatch2.commit();
  1122. checkDataWritten2(partLoc, 1, 20, 2,
  1123. validationQuery, true, "1\tHello streaming",
  1124. "2\tWelcome to streaming",
  1125. "3\tHello streaming - once again",
  1126. "4\tWelcome to streaming - once again");
  1127. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  1128. , txnBatch1.getCurrentTransactionState());
  1129. Assert.assertEquals(TransactionBatch.TxnState.COMMITTED
  1130. , txnBatch2.getCurrentTransactionState());
  1131. txnBatch1.close();
  1132. txnBatch2.close();
  1133. connection.close();
  1134. }
  1135. private static class WriterThd extends Thread {
  1136. private final StreamingConnection conn;
  1137. private final DelimitedInputWriter writer;
  1138. private final String data;
  1139. private Throwable error;
  1140. WriterThd(HiveEndPoint ep, String data) throws Exception {
  1141. super("Writer_" + data);
  1142. writer = new DelimitedInputWriter(fieldNames, ",", ep);
  1143. conn = ep.newConnection(false, "UT_" + Thread.currentThread().getName());
  1144. this.data = data;
  1145. setUncaughtExceptionHandler(new UncaughtExceptionHandler() {
  1146. @Override
  1147. public void uncaughtException(Thread thread, Throwable throwable) {
  1148. error = throwable;
  1149. LOG.error("Thread " + thread.getName() + " died: " + throwable.getMessage(), throwable);
  1150. }
  1151. });
  1152. }
  1153. @Override
  1154. public void run() {
  1155. TransactionBatch txnBatch = null;
  1156. try {
  1157. txnBatch = conn.fetchTransactionBatch(10, writer);
  1158. while (txnBatch.remainingTransactions() > 0) {
  1159. txnBatch.beginNextTransaction();
  1160. txnBatch.write(data.getBytes());
  1161. txnBatch.write(data.getBytes());
  1162. txnBatch.commit();
  1163. } // while
  1164. } catch (Exception e) {
  1165. throw new RuntimeException(e);
  1166. } finally {
  1167. if (txnBatch != null) {
  1168. try {
  1169. txnBatch.close();
  1170. } catch (Exception e) {
  1171. LOG.error("txnBatch.close() failed: " + e.getMessage(), e);
  1172. conn.close();
  1173. }
  1174. }
  1175. try {
  1176. conn.close();
  1177. } catch (Exception e) {
  1178. LOG.error("conn.close() failed: " + e.getMessage(), e);
  1179. }
  1180. }
  1181. }
  1182. }
  1183. /**
  1184. * Make sure that creating an already existing partion is handled gracefully
  1185. * @throws Exception
  1186. */
  1187. @Test
  1188. public void testCreatePartition() throws Exception {
  1189. final HiveEndPoint ep = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals);
  1190. StreamingConnection conn = ep.newConnection(true);
  1191. conn.close();
  1192. conn = ep.newConnection(true);
  1193. conn.close();
  1194. }
  1195. @Test
  1196. public void testConcurrentTransactionBatchCommits() throws Exception {
  1197. final HiveEndPoint ep = new HiveEndPoint(metaStoreURI, dbName, tblName, partitionVals);
  1198. List<WriterThd> writers = new ArrayList<WriterThd>(3);
  1199. writers.add(new WriterThd(ep, "1,Matrix"));
  1200. writers.add(new WriterThd(ep, "2,Gandhi"));
  1201. writers.add(new WriterThd(ep, "3,Silence"));
  1202. for(WriterThd w : writers) {
  1203. w.start();
  1204. }
  1205. for(WriterThd w : writers) {
  1206. w.join();
  1207. }
  1208. for(WriterThd w : writers) {
  1209. if(w.error != null) {
  1210. Assert.assertFalse("Writer thread" + w.getName() + " died: " + w.error.getMessage() +
  1211. " See log file for stack trace", true);
  1212. }
  1213. }
  1214. }
  1215. private ArrayList<SampleRec> dumpBucket(Path orcFile) throws IOException {
  1216. org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(new Configuration());
  1217. Reader reader = OrcFile.createReader(orcFile,
  1218. OrcFile.readerOptions(conf).filesystem(fs));
  1219. RecordReader rows = reader.rows();
  1220. StructObjectInspector inspector = (StructObjectInspector) reader
  1221. .getObjectInspector();
  1222. System.out.format("Found Bucket File : %s \n", orcFile.getName());
  1223. ArrayList<SampleRec> result = new ArrayList<SampleRec>();
  1224. while (rows.hasNext()) {
  1225. Object row = rows.next(null);
  1226. SampleRec rec = (SampleRec) deserializeDeltaFileRow(row, inspector)[5];
  1227. result.add(rec);
  1228. }
  1229. return result;
  1230. }
  1231. // Assumes stored data schema = [acid fields],string,int,string
  1232. // return array of 6 fields, where the last field has the actual data
  1233. private static Object[] deserializeDeltaFileRow(Object row, StructObjectInspector inspector) {
  1234. List<? extends StructField> fields = inspector.getAllStructFieldRefs();
  1235. WritableIntObjectInspector f0ins = (WritableIntObjectInspector) fields.get(0).getFieldObjectInspector();
  1236. WritableLongObjectInspector f1ins = (WritableLongObjectInspector) fields.get(1).getFieldObjectInspector();
  1237. WritableIntObjectInspector f2ins = (WritableIntObjectInspector) fields.get(2).getFieldObjectInspector();
  1238. WritableLongObjectInspector f3ins = (WritableLongObjectInspector) fields.get(3).getFieldObjectInspector();
  1239. WritableLongObjectInspector f4ins = (WritableLongObjectInspector) fields.get(4).getFieldObjectInspector();
  1240. StructObjectInspector f5ins = (StructObjectInspector) fields.get(5).getFieldObjectInspector();
  1241. int f0 = f0ins.get(inspector.getStructFieldData(row, fields.get(0)));
  1242. long f1 = f1ins.get(inspector.getStructFieldData(row, fields.get(1)));
  1243. int f2 = f2ins.get(inspector.getStructFieldData(row, fields.get(2)));
  1244. long f3 = f3ins.get(inspector.getStructFieldData(row, fields.get(3)));
  1245. long f4 = f4ins.get(inspector.getStructFieldData(row, fields.get(4)));
  1246. SampleRec f5 = deserializeInner(inspector.getStructFieldData(row, fields.get(5)), f5ins);
  1247. return new Object[] {f0, f1, f2, f3, f4, f5};
  1248. }
  1249. // Assumes row schema => string,int,string
  1250. private static SampleRec deserializeInner(Object row, StructObjectInspector inspector) {
  1251. List<? extends StructField> fields = inspector.getAllStructFieldRefs();
  1252. WritableStringObjectInspector f0ins = (WritableStringObjectInspector) fields.get(0).getFieldObjectInspector();
  1253. WritableIntObjectInspector f1ins = (WritableIntObjectInspector) fields.get(1).getFieldObjectInspector();
  1254. WritableStringObjectInspector f2ins = (WritableStringObjectInspector) fields.get(2).getFieldObjectInspector();
  1255. String f0 = f0ins.getPrimitiveJavaObject(inspector.getStructFieldData(row, fields.get(0)));
  1256. int f1 = f1ins.get(inspector.getStructFieldData(row, fields.get(1)));
  1257. String f2 = f2ins.getPrimitiveJavaObject(inspector.getStructFieldData(row, fields.get(2)));
  1258. return new SampleRec(f0, f1, f2);
  1259. }
  1260. @Test
  1261. public void testBucketing() throws Exception {
  1262. String agentInfo = "UT_" + Thread.currentThread().getName();
  1263. dropDB(msClient, dbName3);
  1264. dropDB(msClient, dbName4);
  1265. // 1) Create two bucketed tables
  1266. String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db";
  1267. dbLocation = dbLocation.replaceAll("\\\\","/"); // for windows paths
  1268. String[] colNames = "key1,key2,data".split(",");
  1269. String[] colTypes = "string,int,string".split(",");
  1270. String[] bucketNames = "key1,key2".split(",");
  1271. int bucketCount = 4;
  1272. createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames
  1273. , null, dbLocation, bucketCount);
  1274. String dbLocation2 = dbFolder.newFolder(dbName4).getCanonicalPath() + ".db";
  1275. dbLocation2 = dbLocation2.replaceAll("\\\\","/"); // for windows paths
  1276. String[] colNames2 = "key3,key4,data2".split(",");
  1277. String[] colTypes2 = "string,int,string".split(",");
  1278. String[] bucketNames2 = "key3,key4".split(",");
  1279. createDbAndTable(driver, dbName4, tblName4, null, colNames2, colTypes2, bucketNames2
  1280. , null, dbLocation2, bucketCount);
  1281. // 2) Insert data into both tables
  1282. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName3, tblName3, null);
  1283. StreamingConnection connection = endPt.newConnection(false, agentInfo);
  1284. DelimitedInputWriter writer = new DelimitedInputWriter(colNames,",", endPt, connection);
  1285. TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
  1286. txnBatch.beginNextTransaction();
  1287. txnBatch.write("name0,1,Hello streaming".getBytes());
  1288. txnBatch.write("name2,2,Welcome to streaming".getBytes());
  1289. txnBatch.write("name4,2,more Streaming unlimited".getBytes());
  1290. txnBatch.write("name5,2,even more Streaming unlimited".getBytes());
  1291. txnBatch.commit();
  1292. HiveEndPoint endPt2 = new HiveEndPoint(metaStoreURI, dbName4, tblName4, null);
  1293. StreamingConnection connection2 = endPt2.newConnection(false, agentInfo);
  1294. DelimitedInputWriter writer2 = new DelimitedInputWriter(colNames2,",", endPt2, connection);
  1295. TransactionBatch txnBatch2 = connection2.fetchTransactionBatch(2, writer2);
  1296. txnBatch2.beginNextTransaction();
  1297. txnBatch2.write("name5,2,fact3".getBytes()); // bucket 0
  1298. txnBatch2.write("name8,2,fact3".getBytes()); // bucket 1
  1299. txnBatch2.write("name0,1,fact1".getBytes()); // bucket 2
  1300. txnBatch2.commit();
  1301. // 3 Check data distribution in buckets
  1302. HashMap<Integer, ArrayList<SampleRec>> actual1 = dumpAllBuckets(dbLocation, tblName3);
  1303. HashMap<Integer, ArrayList<SampleRec>> actual2 = dumpAllBuckets(dbLocation2, tblName4);
  1304. System.err.println("\n Table 1");
  1305. System.err.println(actual1);
  1306. System.err.println("\n Table 2");
  1307. System.err.println(actual2);
  1308. // assert bucket listing is as expected
  1309. Assert.assertEquals("number of buckets does not match expectation", actual1.values().size(), 3);
  1310. Assert.assertTrue("bucket 0 shouldn't have been created", actual1.get(0) == null);
  1311. Assert.assertEquals("records in bucket does not match expectation", actual1.get(1).size(), 1);
  1312. Assert.assertEquals("records in bucket does not match expectation", actual1.get(2).size(), 2);
  1313. Assert.assertEquals("records in bucket does not match expectation", actual1.get(3).size(), 1);
  1314. }
  1315. private void runCmdOnDriver(String cmd) throws QueryFailedException {
  1316. boolean t = runDDL(driver, cmd);
  1317. Assert.assertTrue(cmd + " failed", t);
  1318. }
  1319. @Test
  1320. public void testFileDump() throws Exception {
  1321. String agentInfo = "UT_" + Thread.currentThread().getName();
  1322. dropDB(msClient, dbName3);
  1323. dropDB(msClient, dbName4);
  1324. // 1) Create two bucketed tables
  1325. String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db";
  1326. dbLocation = dbLocation.replaceAll("\\\\","/"); // for windows paths
  1327. String[] colNames = "key1,key2,data".split(",");
  1328. String[] colTypes = "string,int,string".split(",");
  1329. String[] bucketNames = "key1,key2".split(",");
  1330. int bucketCount = 4;
  1331. createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames
  1332. , null, dbLocation, bucketCount);
  1333. String dbLocation2 = dbFolder.newFolder(dbName4).getCanonicalPath() + ".db";
  1334. dbLocation2 = dbLocation2.replaceAll("\\\\","/"); // for windows paths
  1335. String[] colNames2 = "key3,key4,data2".split(",");
  1336. String[] colTypes2 = "string,int,string".split(",");
  1337. String[] bucketNames2 = "key3,key4".split(",");
  1338. createDbAndTable(driver, dbName4, tblName4, null, colNames2, colTypes2, bucketNames2
  1339. , null, dbLocation2, bucketCount);
  1340. // 2) Insert data into both tables
  1341. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName3, tblName3, null);
  1342. StreamingConnection connection = endPt.newConnection(false, agentInfo);
  1343. DelimitedInputWriter writer = new DelimitedInputWriter(colNames,",", endPt, connection);
  1344. TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
  1345. txnBatch.beginNextTransaction();
  1346. txnBatch.write("name0,1,Hello streaming".getBytes());
  1347. txnBatch.write("name2,2,Welcome to streaming".getBytes());
  1348. txnBatch.write("name4,2,more Streaming unlimited".getBytes());
  1349. txnBatch.write("name5,2,even more Streaming unlimited".getBytes());
  1350. txnBatch.commit();
  1351. PrintStream origErr = System.err;
  1352. ByteArrayOutputStream myErr = new ByteArrayOutputStream();
  1353. // replace stderr and run command
  1354. System.setErr(new PrintStream(myErr));
  1355. FileDump.main(new String[]{dbLocation});
  1356. System.err.flush();
  1357. System.setErr(origErr);
  1358. String errDump = new String(myErr.toByteArray());
  1359. Assert.assertEquals(false, errDump.contains("file(s) are corrupted"));
  1360. // since this test runs on local file system which does not have an API to tell if files or
  1361. // open or not, we are testing for negative case even though the bucket files are still open
  1362. // for writes (transaction batch not closed yet)
  1363. Assert.assertEquals(false, errDump.contains("is still open for writes."));
  1364. HiveEndPoint endPt2 = new HiveEndPoint(metaStoreURI, dbName4, tblName4, null);
  1365. DelimitedInputWriter writer2 = new DelimitedInputWriter(colNames2,",", endPt2);
  1366. StreamingConnection connection2 = endPt2.newConnection(false, agentInfo);
  1367. TransactionBatch txnBatch2 = connection2.fetchTransactionBatch(2, writer2);
  1368. txnBatch2.beginNextTransaction();
  1369. txnBatch2.write("name5,2,fact3".getBytes()); // bucket 0
  1370. txnBatch2.write("name8,2,fact3".getBytes()); // bucket 1
  1371. txnBatch2.write("name0,1,fact1".getBytes()); // bucket 2
  1372. // no data for bucket 3 -- expect 0 length bucket file
  1373. txnBatch2.commit();
  1374. origErr = System.err;
  1375. myErr = new ByteArrayOutputStream();
  1376. // replace stderr and run command
  1377. System.setErr(new PrintStream(myErr));
  1378. FileDump.main(new String[]{dbLocation});
  1379. System.out.flush();
  1380. System.err.flush();
  1381. System.setErr(origErr);
  1382. errDump = new String(myErr.toByteArray());
  1383. Assert.assertEquals(false, errDump.contains("Exception"));
  1384. Assert.assertEquals(false, errDump.contains("file(s) are corrupted"));
  1385. Assert.assertEquals(false, errDump.contains("is still open for writes."));
  1386. }
  1387. @Test
  1388. public void testFileDumpCorruptDataFiles() throws Exception {
  1389. dropDB(msClient, dbName3);
  1390. // 1) Create two bucketed tables
  1391. String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db";
  1392. dbLocation = dbLocation.replaceAll("\\\\","/"); // for windows paths
  1393. String[] colNames = "key1,key2,data".split(",");
  1394. String[] colTypes = "string,int,string".split(",");
  1395. String[] bucketNames = "key1,key2".split(",");
  1396. int bucketCount = 4;
  1397. createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames
  1398. , null, dbLocation, bucketCount);
  1399. // 2) Insert data into both tables
  1400. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName3, tblName3, null);
  1401. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  1402. DelimitedInputWriter writer = new DelimitedInputWriter(colNames,",", endPt, connection);
  1403. // we need side file for this test, so we create 2 txn batch and test with only one
  1404. TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
  1405. txnBatch.beginNextTransaction();
  1406. txnBatch.write("name0,1,Hello streaming".getBytes());
  1407. txnBatch.write("name2,2,Welcome to streaming".getBytes());
  1408. txnBatch.write("name4,2,more Streaming unlimited".getBytes());
  1409. txnBatch.write("name5,2,even more Streaming unlimited".getBytes());
  1410. txnBatch.commit();
  1411. // intentionally corrupt some files
  1412. Path path = new Path(dbLocation);
  1413. Collection<String> files = FileDump.getAllFilesInPath(path, conf);
  1414. int readableFooter = -1;
  1415. for (String file : files) {
  1416. if (file.contains("bucket_00000")) {
  1417. // empty out the file
  1418. corruptDataFile(file, conf, Integer.MIN_VALUE);
  1419. } else if (file.contains("bucket_00001")) {
  1420. corruptDataFile(file, conf, -1);
  1421. } else if (file.contains("bucket_00002")) {
  1422. corruptDataFile(file, conf, 100);
  1423. } else if (file.contains("bucket_00003")) {
  1424. corruptDataFile(file, conf, 100);
  1425. }
  1426. }
  1427. PrintStream origErr = System.err;
  1428. ByteArrayOutputStream myErr = new ByteArrayOutputStream();
  1429. // replace stderr and run command
  1430. System.setErr(new PrintStream(myErr));
  1431. FileDump.main(new String[]{dbLocation});
  1432. System.err.flush();
  1433. System.setErr(origErr);
  1434. String errDump = new String(myErr.toByteArray());
  1435. Assert.assertEquals(false, errDump.contains("Exception"));
  1436. Assert.assertEquals(true, errDump.contains("3 file(s) are corrupted"));
  1437. Assert.assertEquals(false, errDump.contains("is still open for writes."));
  1438. origErr = System.err;
  1439. myErr = new ByteArrayOutputStream();
  1440. // replace stderr and run command
  1441. System.setErr(new PrintStream(myErr));
  1442. FileDump.main(new String[]{dbLocation, "--recover", "--skip-dump"});
  1443. System.err.flush();
  1444. System.setErr(origErr);
  1445. errDump = new String(myErr.toByteArray());
  1446. Assert.assertEquals(true, errDump.contains("bucket_00001 recovered successfully!"));
  1447. Assert.assertEquals(true, errDump.contains("No readable footers found. Creating empty orc file."));
  1448. Assert.assertEquals(true, errDump.contains("bucket_00002 recovered successfully!"));
  1449. Assert.assertEquals(true, errDump.contains("bucket_00003 recovered successfully!"));
  1450. Assert.assertEquals(false, errDump.contains("Exception"));
  1451. Assert.assertEquals(false, errDump.contains("is still open for writes."));
  1452. // test after recovery
  1453. origErr = System.err;
  1454. myErr = new ByteArrayOutputStream();
  1455. // replace stdout and run command
  1456. System.setErr(new PrintStream(myErr));
  1457. FileDump.main(new String[]{dbLocation});
  1458. System.err.flush();
  1459. System.setErr(origErr);
  1460. errDump = new String(myErr.toByteArray());
  1461. Assert.assertEquals(false, errDump.contains("Exception"));
  1462. Assert.assertEquals(false, errDump.contains("file(s) are corrupted"));
  1463. Assert.assertEquals(false, errDump.contains("is still open for writes."));
  1464. // after recovery there shouldn't be any *_flush_length files
  1465. files = FileDump.getAllFilesInPath(path, conf);
  1466. for (String file : files) {
  1467. Assert.assertEquals(false, file.contains("_flush_length"));
  1468. }
  1469. txnBatch.close();
  1470. }
  1471. private void corruptDataFile(final String file, final Configuration conf, final int addRemoveBytes)
  1472. throws Exception {
  1473. Path bPath = new Path(file);
  1474. Path cPath = new Path(bPath.getParent(), bPath.getName() + ".corrupt");
  1475. FileSystem fs = bPath.getFileSystem(conf);
  1476. FileStatus fileStatus = fs.getFileStatus(bPath);
  1477. int len = addRemoveBytes == Integer.MIN_VALUE ? 0 : (int) fileStatus.getLen() + addRemoveBytes;
  1478. byte[] buffer = new byte[len];
  1479. FSDataInputStream fdis = fs.open(bPath);
  1480. fdis.readFully(0, buffer, 0, (int) Math.min(fileStatus.getLen(), buffer.length));
  1481. fdis.close();
  1482. FSDataOutputStream fdos = fs.create(cPath, true);
  1483. fdos.write(buffer, 0, buffer.length);
  1484. fdos.close();
  1485. fs.delete(bPath, false);
  1486. fs.rename(cPath, bPath);
  1487. }
  1488. @Test
  1489. public void testFileDumpCorruptSideFiles() throws Exception {
  1490. dropDB(msClient, dbName3);
  1491. // 1) Create two bucketed tables
  1492. String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db";
  1493. dbLocation = dbLocation.replaceAll("\\\\","/"); // for windows paths
  1494. String[] colNames = "key1,key2,data".split(",");
  1495. String[] colTypes = "string,int,string".split(",");
  1496. String[] bucketNames = "key1,key2".split(",");
  1497. int bucketCount = 4;
  1498. createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames
  1499. , null, dbLocation, bucketCount);
  1500. // 2) Insert data into both tables
  1501. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName3, tblName3, null);
  1502. StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
  1503. DelimitedInputWriter writer = new DelimitedInputWriter(colNames,",", endPt, connection);
  1504. TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
  1505. txnBatch.beginNextTransaction();
  1506. txnBatch.write("name0,1,Hello streaming".getBytes());
  1507. txnBatch.write("name2,2,Welcome to streaming".getBytes());
  1508. txnBatch.write("name4,2,more Streaming unlimited".getBytes());
  1509. txnBatch.write("name5,2,even more Streaming unlimited".getBytes());
  1510. txnBatch.write("name6,3,aHello streaming".getBytes());
  1511. txnBatch.commit();
  1512. Map<String,List<Long>> offsetMap = new HashMap<String,List<Long>>();
  1513. recordOffsets(conf, dbLocation, offsetMap);
  1514. txnBatch.beginNextTransaction();
  1515. txnBatch.write("name01,11,-Hello streaming".getBytes());
  1516. txnBatch.write("name21,21,-Welcome to streaming".getBytes());
  1517. txnBatch.write("name41,21,-more Streaming unlimited".getBytes());
  1518. txnBatch.write("name51,21,-even more Streaming unlimited".getBytes());
  1519. txnBatch.write("name02,12,--Hello streaming".getBytes());
  1520. txnBatch.write("name22,22,--Welcome to streaming".getBytes());
  1521. txnBatch.write("name42,22,--more Streaming unlimited".getBytes());
  1522. txnBatch.write("name52,22,--even more Streaming unlimited".getBytes());
  1523. txnBatch.write("name7,4,aWelcome to streaming".getBytes());
  1524. txnBatch.write("name8,5,amore Streaming unlimited".getBytes());
  1525. txnBatch.write("name9,6,aeven more Streaming unlimited".getBytes());
  1526. txnBatch.write("name10,7,bHello streaming".getBytes());
  1527. txnBatch.write("name11,8,bWelcome to streaming".getBytes());
  1528. txnBatch.write("name12,9,bmore Streaming unlimited".getBytes());
  1529. txnBatch.write("name13,10,beven more Streaming unlimited".getBytes());
  1530. txnBatch.commit();
  1531. recordOffsets(conf, dbLocation, offsetMap);
  1532. // intentionally corrupt some files
  1533. Path path = new Path(dbLocation);
  1534. Collection<String> files = FileDump.getAllFilesInPath(path, conf);
  1535. for (String file : files) {
  1536. if (file.contains("bucket_00000")) {
  1537. corruptSideFile(file, conf, offsetMap, "bucket_00000", -1); // corrupt last entry
  1538. } else if (file.contains("bucket_00001")) {
  1539. corruptSideFile(file, conf, offsetMap, "bucket_00001", 0); // empty out side file
  1540. } else if (file.contains("bucket_00002")) {
  1541. corruptSideFile(file, conf, offsetMap, "bucket_00002", 3); // total 3 entries (2 valid + 1 fake)
  1542. } else if (file.contains("bucket_00003")) {
  1543. corruptSideFile(file, conf, offsetMap, "bucket_00003", 10); // total 10 entries (2 valid + 8 fake)
  1544. }
  1545. }
  1546. PrintStream origErr = System.err;
  1547. ByteArrayOutputStream myErr = new ByteArrayOutputStream();
  1548. // replace stderr and run command
  1549. System.setErr(new PrintStream(myErr));
  1550. FileDump.main(new String[]{dbLocation});
  1551. System.err.flush();
  1552. System.setErr(origErr);
  1553. String errDump = new String(myErr.toByteArray());
  1554. Assert.assertEquals(true, errDump.contains("bucket_00000_flush_length [length: 11"));
  1555. Assert.assertEquals(true, errDump.contains("bucket_00001_flush_length [length: 0"));
  1556. Assert.assertEquals(true, errDump.contains("bucket_00002_flush_length [length: 24"));
  1557. Assert.assertEquals(true, errDump.contains("bucket_00003_flush_length [length: 80"));
  1558. Assert.assertEquals(false, errDump.contains("Exception"));
  1559. Assert.assertEquals(true, errDump.contains("4 file(s) are corrupted"));
  1560. Assert.assertEquals(false, errDump.contains("is still open for writes."));
  1561. origErr = System.err;
  1562. myErr = new ByteArrayOutputStream();
  1563. // replace stderr and run command
  1564. System.setErr(new PrintStream(myErr));
  1565. FileDump.main(new String[]{dbLocation, "--recover", "--skip-dump"});
  1566. System.err.flush();
  1567. System.setErr(origErr);
  1568. errDump = new String(myErr.toByteArray());
  1569. Assert.assertEquals(true, errDump.contains("bucket_00000 recovered successfully!"));
  1570. Assert.assertEquals(true, errDump.contains("bucket_00001 recovered successfully!"));
  1571. Assert.assertEquals(true, errDump.contains("bucket_00002 recovered successfully!"));
  1572. Assert.assertEquals(true, errDump.contains("bucket_00003 recovered successfully!"));
  1573. List<Long> offsets = offsetMap.get("bucket_00000");
  1574. Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString()));
  1575. offsets = offsetMap.get("bucket_00001");
  1576. Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString()));
  1577. offsets = offsetMap.get("bucket_00002");
  1578. Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString()));
  1579. offsets = offsetMap.get("bucket_00003");
  1580. Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString()));
  1581. Assert.assertEquals(false, errDump.contains("Exception"));
  1582. Assert.assertEquals(false, errDump.contains("is still open for writes."));
  1583. // test after recovery
  1584. origErr = System.err;
  1585. myErr = new ByteArrayOutputStream();
  1586. // replace stdout and run command
  1587. System.setErr(new PrintStream(myErr));
  1588. FileDump.main(new String[]{dbLocation});
  1589. System.err.flush();
  1590. System.setErr(origErr);
  1591. errDump = new String(myErr.toByteArray());
  1592. Assert.assertEquals(false, errDump.contains("Exception"));
  1593. Assert.assertEquals(false, errDump.contains("file(s) are corrupted"));
  1594. Assert.assertEquals(false, errDump.contains("is still open for writes."));
  1595. // after recovery there shouldn't be any *_flush_length files
  1596. files = FileDump.getAllFilesInPath(path, conf);
  1597. for (String file : files) {
  1598. Assert.assertEquals(false, file.contains("_flush_length"));
  1599. }
  1600. txnBatch.close();
  1601. }
  1602. private void corruptSideFile(final String file, final HiveConf conf,
  1603. final Map<String, List<Long>> offsetMap, final String key, final int numEntries)
  1604. throws IOException {
  1605. Path dataPath = new Path(file);
  1606. Path sideFilePath = OrcAcidUtils.getSideFile(dataPath);
  1607. Path cPath = new Path(sideFilePath.getParent(), sideFilePath.getName() + ".corrupt");
  1608. FileSystem fs = sideFilePath.getFileSystem(conf);
  1609. List<Long> offsets = offsetMap.get(key);
  1610. long lastOffset = offsets.get(offsets.size() - 1);
  1611. FSDataOutputStream fdos = fs.create(cPath, true);
  1612. // corrupt last entry
  1613. if (numEntries < 0) {
  1614. byte[] lastOffsetBytes = longToBytes(lastOffset);
  1615. for (int i = 0; i < offsets.size() - 1; i++) {
  1616. fdos.writeLong(offsets.get(i));
  1617. }
  1618. fdos.write(lastOffsetBytes, 0, 3);
  1619. } else if (numEntries > 0) {
  1620. int firstRun = Math.min(offsets.size(), numEntries);
  1621. // add original entries
  1622. for (int i=0; i < firstRun; i++) {
  1623. fdos.writeLong(offsets.get(i));
  1624. }
  1625. // add fake entries
  1626. int remaining = numEntries - firstRun;
  1627. for (int i = 0; i < remaining; i++) {
  1628. fdos.writeLong(lastOffset + ((i + 1) * 100));
  1629. }
  1630. }
  1631. fdos.close();
  1632. fs.delete(sideFilePath, false);
  1633. fs.rename(cPath, sideFilePath);
  1634. }
  1635. private byte[] longToBytes(long x) {
  1636. ByteBuffer buffer = ByteBuffer.allocate(8);
  1637. buffer.putLong(x);
  1638. return buffer.array();
  1639. }
  1640. private void recordOffsets(final HiveConf conf, final String dbLocation,
  1641. final Map<String, List<Long>> offsetMap) throws IOException {
  1642. Path path = new Path(dbLocation);
  1643. Collection<String> files = FileDump.getAllFilesInPath(path, conf);
  1644. for (String file: files) {
  1645. Path bPath = new Path(file);
  1646. FileSystem fs = bPath.getFileSystem(conf);
  1647. FileStatus fileStatus = fs.getFileStatus(bPath);
  1648. long len = fileStatus.getLen();
  1649. if (file.contains("bucket_00000")) {
  1650. if (offsetMap.containsKey("bucket_00000")) {
  1651. List<Long> offsets = offsetMap.get("bucket_00000");
  1652. offsets.add(len);
  1653. offsetMap.put("bucket_00000", offsets);
  1654. } else {
  1655. List<Long> offsets = new ArrayList<Long>();
  1656. offsets.add(len);
  1657. offsetMap.put("bucket_00000", offsets);
  1658. }
  1659. } else if (file.contains("bucket_00001")) {
  1660. if (offsetMap.containsKey("bucket_00001")) {
  1661. List<Long> offsets = offsetMap.get("bucket_00001");
  1662. offsets.add(len);
  1663. offsetMap.put("bucket_00001", offsets);
  1664. } else {
  1665. List<Long> offsets = new ArrayList<Long>();
  1666. offsets.add(len);
  1667. offsetMap.put("bucket_00001", offsets);
  1668. }
  1669. } else if (file.contains("bucket_00002")) {
  1670. if (offsetMap.containsKey("bucket_00002")) {
  1671. List<Long> offsets = offsetMap.get("bucket_00002");
  1672. offsets.add(len);
  1673. offsetMap.put("bucket_00002", offsets);
  1674. } else {
  1675. List<Long> offsets = new ArrayList<Long>();
  1676. offsets.add(len);
  1677. offsetMap.put("bucket_00002", offsets);
  1678. }
  1679. } else if (file.contains("bucket_00003")) {
  1680. if (offsetMap.containsKey("bucket_00003")) {
  1681. List<Long> offsets = offsetMap.get("bucket_00003");
  1682. offsets.add(len);
  1683. offsetMap.put("bucket_00003", offsets);
  1684. } else {
  1685. List<Long> offsets = new ArrayList<Long>();
  1686. offsets.add(len);
  1687. offsetMap.put("bucket_00003", offsets);
  1688. }
  1689. }
  1690. }
  1691. }
  1692. @Test
  1693. public void testErrorHandling() throws Exception {
  1694. String agentInfo = "UT_" + Thread.currentThread().getName();
  1695. runCmdOnDriver("create database testErrors");
  1696. runCmdOnDriver("use testErrors");
  1697. runCmdOnDriver("create table T(a int, b int) clustered by (b) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')");
  1698. HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testErrors", "T", null);
  1699. StreamingConnection connection = endPt.newConnection(false, agentInfo);
  1700. DelimitedInputWriter innerWriter = new DelimitedInputWriter("a,b".split(","),",", endPt, connection);
  1701. FaultyWriter writer = new FaultyWriter(innerWriter);
  1702. TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
  1703. txnBatch.close();
  1704. txnBatch.heartbeat();//this is no-op on closed batch
  1705. txnBatch.abort();//ditto
  1706. GetOpenTxnsInfoResponse r = msClient.showTxns();
  1707. Assert.assertEquals("HWM didn't match", 17, r.getTxn_high_water_mark());
  1708. List<TxnInfo> ti = r.getOpen_txns();
  1709. Assert.assertEquals("wrong status ti(0)", TxnState.ABORTED, ti.get(0).getState());
  1710. Assert.assertEquals("wrong status ti(1)", TxnState.ABORTED, ti.get(1).getState());
  1711. Exception expectedEx = null;
  1712. try {
  1713. txnBatch.beginNextTransaction();
  1714. }
  1715. catch(IllegalStateException ex) {
  1716. expectedEx = ex;
  1717. }
  1718. Assert.assertTrue("beginNextTransaction() should have failed",
  1719. expectedEx != null && expectedEx.getMessage().contains("has been closed()"));
  1720. expectedEx = null;
  1721. try {
  1722. txnBatch.write("name0,1,Hello streaming".getBytes());
  1723. }
  1724. catch(IllegalStateException ex) {
  1725. expectedEx = ex;
  1726. }
  1727. Assert.assertTrue("write() should have failed",
  1728. expectedEx != null && expectedEx.getMessage().contains("has been closed()"));
  1729. expectedEx = null;
  1730. try {
  1731. txnBatch.commit();
  1732. }
  1733. catch(IllegalStateException ex) {
  1734. expectedEx = ex;
  1735. }
  1736. Assert.assertTrue("commit() should have failed",
  1737. expectedEx != null && expectedEx.getMessage().contains("has been closed()"));
  1738. txnBatch = connection.fetchTransactionBatch(2, writer);
  1739. txnBatch.beginNextTransaction();
  1740. txnBatch.write("name2,2,Welcome to streaming".getBytes());
  1741. txnBatch.write("name4,2,more Streaming unlimited".getBytes());
  1742. txnBatch.write("name5,2,even more Streaming unlimited".getBytes());
  1743. txnBatch.commit();
  1744. //test toString()
  1745. String s = txnBatch.toString();
  1746. Assert.assertTrue("Actual: " + s, s.contains("LastUsed " + JavaUtils.txnIdToString(txnBatch.getCurrentTxnId())));
  1747. Assert.assertTrue("Actual: " + s, s.contains("TxnStatus[CO]"));
  1748. expectedEx = null;
  1749. txnBatch.beginNextTransaction();
  1750. writer.enableErrors();
  1751. try {
  1752. txnBatch.write("name6,2,Doh!".getBytes());
  1753. }
  1754. catch(StreamingIOFailure ex) {
  1755. expectedEx = ex;
  1756. txnBatch.getCurrentTransactionState();
  1757. txnBatch.getCurrentTxnId();//test it doesn't throw ArrayIndexOutOfBounds...
  1758. }
  1759. Assert.assertTrue("Wrong exception: " + (expectedEx != null ? expectedEx.getMessage() : "?"),
  1760. expectedEx != null && expectedEx.getMessage().contains("Simulated fault occurred"));
  1761. expectedEx = null;
  1762. try {
  1763. txnBatch.commit();
  1764. }
  1765. catch(IllegalStateException ex) {
  1766. expectedEx = ex;
  1767. }
  1768. Assert.assertTrue("commit() should have failed",
  1769. expectedEx != null && expectedEx.getMessage().contains("has been closed()"));
  1770. //test toString()
  1771. s = txnBatch.toString();
  1772. Assert.assertTrue("Actual: " + s, s.contains("LastUsed " + JavaUtils.txnIdToString(txnBatch.getCurrentTxnId())));
  1773. Assert.assertTrue("Actual: " + s, s.contains("TxnStatus[CA]"));
  1774. r = msClient.showTxns();
  1775. Assert.assertEquals("HWM didn't match", 19, r.getTxn_high_water_mark());
  1776. ti = r.getOpen_txns();
  1777. Assert.assertEquals("wrong status ti(0)", TxnState.ABORTED, ti.get(0).getState());
  1778. Assert.assertEquals("wrong status ti(1)", TxnState.ABORTED, ti.get(1).getState());
  1779. //txnid 3 was committed and thus not open
  1780. Assert.assertEquals("wrong status ti(2)", TxnState.ABORTED, ti.get(2).getState());
  1781. writer.disableErrors();
  1782. txnBatch = connection.fetchTransactionBatch(2, writer);
  1783. txnBatch.beginNextTransaction();
  1784. txnBatch.write("name2,2,Welcome to streaming".getBytes());
  1785. writer.enableErrors();
  1786. expectedEx = null;
  1787. try {
  1788. txnBatch.commit();
  1789. }
  1790. catch(StreamingIOFailure ex) {
  1791. expectedEx = ex;
  1792. }
  1793. Assert.assertTrue("Wrong exception: " + (expectedEx != null ? expectedEx.getMessage() : "?"),
  1794. expectedEx != null && expectedEx.getMessage().contains("Simulated fault occurred"));
  1795. r = msClient.showTxns();
  1796. Assert.assertEquals("HWM didn't match", 21, r.getTxn_high_water_mark());
  1797. ti = r.getOpen_txns();
  1798. Assert.assertEquals("wrong status ti(3)", TxnState.ABORTED, ti.get(3).getState());
  1799. Assert.assertEquals("wrong status ti(4)", TxnState.ABORTED, ti.get(4).getState());
  1800. txnBatch.abort();
  1801. }
  1802. // assumes un partitioned table
  1803. // returns a map<bucketNum, list<record> >
  1804. private HashMap<Integer, ArrayList<SampleRec>> dumpAllBuckets(String dbLocation, String tableName)
  1805. throws IOException {
  1806. HashMap<Integer, ArrayList<SampleRec>> result = new HashMap<Integer, ArrayList<SampleRec>>();
  1807. for (File deltaDir : new File(dbLocation + "/" + tableName).listFiles()) {
  1808. if(!deltaDir.getName().startsWith("delta")) {
  1809. continue;
  1810. }
  1811. File[] bucketFiles = deltaDir.listFiles(new FileFilter() {
  1812. @Override
  1813. public boolean accept(File pathname) {
  1814. String name = pathname.getName();
  1815. return !name.startsWith("_") && !name.startsWith(".");
  1816. }
  1817. });
  1818. for (File bucketFile : bucketFiles) {
  1819. if(bucketFile.toString().endsWith("length")) {
  1820. continue;
  1821. }
  1822. Integer bucketNum = getBucketNumber(bucketFile);
  1823. ArrayList<SampleRec> recs = dumpBucket(new Path(bucketFile.toString()));
  1824. result.put(bucketNum, recs);
  1825. }
  1826. }
  1827. return result;
  1828. }
  1829. //assumes bucket_NNNNN format of file name
  1830. private Integer getBucketNumber(File bucketFile) {
  1831. String fname = bucketFile.getName();
  1832. int start = fname.indexOf('_');
  1833. String number = fname.substring(start+1, fname.length());
  1834. return Integer.parseInt(number);
  1835. }
  1836. // delete db and all tables in it
  1837. public static void dropDB(IMetaStoreClient client, String databaseName) {
  1838. try {
  1839. for (String table : client.listTableNamesByFilter(databaseName, "", (short)-1)) {
  1840. client.dropTable(databaseName, table, true, true);
  1841. }
  1842. client.dropDatabase(databaseName);
  1843. } catch (TException e) {
  1844. }
  1845. }
  1846. ///////// -------- UTILS ------- /////////
  1847. // returns Path of the partition created (if any) else Path of table
  1848. private static Path createDbAndTable(IDriver driver, String databaseName,
  1849. String tableName, List<String> partVals,
  1850. String[] colNames, String[] colTypes,
  1851. String[] bucketCols,
  1852. String[] partNames, String dbLocation, int bucketCount)
  1853. throws Exception {
  1854. String dbUri = "raw://" + new Path(dbLocation).toUri().toString();
  1855. String tableLoc = dbUri + Path.SEPARATOR + tableName;
  1856. runDDL(driver, "create database IF NOT EXISTS " + databaseName + " location '" + dbUri + "'");
  1857. runDDL(driver, "use " + databaseName);
  1858. String crtTbl = "create table " + tableName +
  1859. " ( " + getTableColumnsStr(colNames,colTypes) + " )" +
  1860. getPartitionStmtStr(partNames) +
  1861. " clustered by ( " + join(bucketCols, ",") + " )" +
  1862. " into " + bucketCount + " buckets " +
  1863. " stored as orc " +
  1864. " location '" + tableLoc + "'" +
  1865. " TBLPROPERTIES ('transactional'='true') ";
  1866. runDDL(driver, crtTbl);
  1867. if(partNames!=null && partNames.length!=0) {
  1868. return addPartition(driver, tableName, partVals, partNames);
  1869. }
  1870. return new Path(tableLoc);
  1871. }
  1872. private static Path addPartition(IDriver driver, String tableName, List<String> partVals, String[] partNames)
  1873. throws Exception {
  1874. String partSpec = getPartsSpec(partNames, partVals);
  1875. String addPart = "alter table " + tableName + " add partition ( " + partSpec + " )";
  1876. runDDL(driver, addPart);
  1877. return getPartitionPath(driver, tableName, partSpec);
  1878. }
  1879. private static Path getPartitionPath(IDriver driver, String tableName, String partSpec) throws Exception {
  1880. ArrayList<String> res = queryTable(driver, "describe extended " + tableName + " PARTITION (" + partSpec + ")");
  1881. String partInfo = res.get(res.size() - 1);
  1882. int start = partInfo.indexOf("location:") + "location:".length();
  1883. int end = partInfo.indexOf(",",start);
  1884. return new Path( partInfo.substring(start,end) );
  1885. }
  1886. private static String getTableColumnsStr(String[] colNames, String[] colTypes) {
  1887. StringBuilder sb = new StringBuilder();
  1888. for (int i=0; i < colNames.length; ++i) {
  1889. sb.append(colNames[i]).append(" ").append(colTypes[i]);
  1890. if (i<colNames.length-1) {
  1891. sb.append(",");
  1892. }
  1893. }
  1894. return sb.toString();
  1895. }
  1896. // converts partNames into "partName1 string, partName2 string"
  1897. private static String getTablePartsStr(String[] partNames) {
  1898. if (partNames==null || partNames.length==0) {
  1899. return "";
  1900. }
  1901. StringBuilder sb = new StringBuilder();
  1902. for (int i=0; i < partNames.length; ++i) {
  1903. sb.append(partNames[i]).append(" string");
  1904. if (i < partNames.length-1) {
  1905. sb.append(",");
  1906. }
  1907. }
  1908. return sb.toString();
  1909. }
  1910. // converts partNames,partVals into "partName1=val1, partName2=val2"
  1911. private static String getPartsSpec(String[] partNames, List<String> partVals) {
  1912. StringBuilder sb = new StringBuilder();
  1913. for (int i=0; i < partVals.size(); ++i) {
  1914. sb.append(partNames[i]).append(" = '").append(partVals.get(i)).append("'");
  1915. if(i < partVals.size()-1) {
  1916. sb.append(",");
  1917. }
  1918. }
  1919. return sb.toString();
  1920. }
  1921. private static String join(String[] values, String delimiter) {
  1922. if(values==null) {
  1923. return null;
  1924. }
  1925. StringBuilder strbuf = new StringBuilder();
  1926. boolean first = true;
  1927. for (Object value : values) {
  1928. if (!first) { strbuf.append(delimiter); } else { first = false; }
  1929. strbuf.append(value.toString());
  1930. }
  1931. return strbuf.toString();
  1932. }
  1933. private static String getPartitionStmtStr(String[] partNames) {
  1934. if ( partNames == null || partNames.length == 0) {
  1935. return "";
  1936. }
  1937. return " partitioned by (" + getTablePartsStr(partNames) + " )";
  1938. }
  1939. private static boolean runDDL(IDriver driver, String sql) throws QueryFailedException {
  1940. LOG.debug(sql);
  1941. System.out.println(sql);
  1942. //LOG.debug("Running Hive Query: "+ sql);
  1943. try {
  1944. driver.run(sql);
  1945. return true;
  1946. } catch (CommandProcessorException e) {
  1947. LOG.error("Statement: " + sql + " failed: " + e);
  1948. return false;
  1949. }
  1950. }
  1951. private static ArrayList<String> queryTable(IDriver driver, String query) throws IOException {
  1952. try {
  1953. driver.run(query);
  1954. } catch (CommandProcessorException e) {
  1955. throw new RuntimeException(query + " failed: " + e);
  1956. }
  1957. ArrayList<String> res = new ArrayList<String>();
  1958. driver.getResults(res);
  1959. return res;
  1960. }
  1961. private static class SampleRec {
  1962. public String field1;
  1963. public int field2;
  1964. public String field3;
  1965. public SampleRec(String field1, int field2, String field3) {
  1966. this.field1 = field1;
  1967. this.field2 = field2;
  1968. this.field3 = field3;
  1969. }
  1970. @Override
  1971. public boolean equals(Object o) {
  1972. if (this == o) {
  1973. return true;
  1974. }
  1975. if (o == null || getClass() != o.getClass()) {
  1976. return false;
  1977. }
  1978. SampleRec that = (SampleRec) o;
  1979. if (field2 != that.field2) {
  1980. return false;
  1981. }
  1982. if (field1 != null ? !field1.equals(that.field1) : that.field1 != null) {
  1983. return false;
  1984. }
  1985. return !(field3 != null ? !field3.equals(that.field3) : that.field3 != null);
  1986. }
  1987. @Override
  1988. public int hashCode() {
  1989. int result = field1 != null ? field1.hashCode() : 0;
  1990. result = 31 * result + field2;
  1991. result = 31 * result + (field3 != null ? field3.hashCode() : 0);
  1992. return result;
  1993. }
  1994. @Override
  1995. public String toString() {
  1996. return " { " +
  1997. "'" + field1 + '\'' +
  1998. "," + field2 +
  1999. ",'" + field3 + '\'' +
  2000. " }";
  2001. }
  2002. }
  2003. /**
  2004. * This is test-only wrapper around the real RecordWriter.
  2005. * It can simulate faults from lower levels to test error handling logic.
  2006. */
  2007. private static final class FaultyWriter implements RecordWriter {
  2008. private final RecordWriter delegate;
  2009. private boolean shouldThrow = false;
  2010. private FaultyWriter(RecordWriter delegate) {
  2011. assert delegate != null;
  2012. this.delegate = delegate;
  2013. }
  2014. @Override
  2015. public void write(long writeId, byte[] record) throws StreamingException {
  2016. delegate.write(writeId, record);
  2017. produceFault();
  2018. }
  2019. @Override
  2020. public void flush() throws StreamingException {
  2021. delegate.flush();
  2022. produceFault();
  2023. }
  2024. @Override
  2025. public void clear() throws StreamingException {
  2026. delegate.clear();
  2027. }
  2028. @Override
  2029. public void newBatch(Long minTxnId, Long maxTxnID) throws StreamingException {
  2030. delegate.newBatch(minTxnId, maxTxnID);
  2031. }
  2032. @Override
  2033. public void closeBatch() throws StreamingException {
  2034. delegate.closeBatch();
  2035. }
  2036. /**
  2037. * allows testing of "unexpected" errors
  2038. * @throws StreamingIOFailure
  2039. */
  2040. private void produceFault() throws StreamingIOFailure {
  2041. if(shouldThrow) {
  2042. throw new StreamingIOFailure("Simulated fault occurred");
  2043. }
  2044. }
  2045. void enableErrors() {
  2046. shouldThrow = true;
  2047. }
  2048. void disableErrors() {
  2049. shouldThrow = false;
  2050. }
  2051. }
  2052. }