PageRenderTime 41ms CodeModel.GetById 10ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/odbc/src/cpp/HiveRowSet.cpp

#
C++ | 465 lines | 358 code | 45 blank | 62 comment | 86 complexity | ef0ac1e322352f29e2b9a0f23dd3923d MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include <assert.h>
  19. #include <string.h>
  20. #include "HiveRowSet.h"
  21. #include "hiveclienthelper.h"
  22. /*************************************************************************************************
  23. * Base HiveRowSet Class Logic
  24. ************************************************************************************************/
  25. HiveRowSet::HiveRowSet() {
  26. m_is_completely_read = false;
  27. m_bytes_read = 0;
  28. m_last_column_fetched = 0;
  29. m_field_buffer[0] = '\0';
  30. }
  31. HiveRowSet::~HiveRowSet() {
  32. }
  33. void HiveRowSet::reset() {
  34. m_is_completely_read = false;
  35. m_bytes_read = 0;
  36. m_last_column_fetched = 0;
  37. m_field_buffer[0] = '\0';
  38. /* Non Virtual Calls Pure Virtual Idiom */
  39. specialized_reset(); /* Call the specialized subclass reset method */
  40. }
  41. void HiveRowSet::initFieldBuffer() {
  42. /* m_field_buffer should always correspond to the field indicated by m_last_column_fetched*/
  43. extractField(m_last_column_fetched);
  44. }
  45. HiveReturn HiveRowSet::getFieldDataLen(size_t column_idx, size_t* col_len, char* err_buf,
  46. size_t err_buf_len) {
  47. RETURN_ON_ASSERT(col_len == NULL, __FUNCTION__,
  48. "Pointer to col_len (output) cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
  49. RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
  50. "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
  51. RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
  52. "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
  53. *col_len = getFieldLen(column_idx);
  54. return HIVE_SUCCESS;
  55. }
  56. HiveReturn HiveRowSet::getFieldAsCString(size_t column_idx, char* buffer, size_t buffer_len,
  57. size_t* data_byte_size, int* is_null_value, char* err_buf,
  58. size_t err_buf_len) {
  59. RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
  60. "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
  61. RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
  62. "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
  63. HIVE_ERROR);
  64. RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
  65. "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
  66. RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
  67. "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
  68. RETURN_ON_ASSERT(buffer_len == 0, __FUNCTION__,
  69. "Output buffer cannot have a size of zero.", err_buf, err_buf_len, HIVE_ERROR);
  70. if (m_last_column_fetched != column_idx) {
  71. extractField(column_idx);
  72. m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
  73. m_last_column_fetched = column_idx;
  74. m_is_completely_read = false;
  75. }
  76. if (m_is_completely_read) {
  77. return HIVE_NO_MORE_DATA; /* This field has already been completely fetched by a previous call*/
  78. }
  79. /* If the column data is the same as the null format spec... */
  80. if (strcmp(getNullFormat(), m_field_buffer) == 0) {
  81. /* This value must be NULL */
  82. *is_null_value = 1;
  83. if (data_byte_size != NULL) {
  84. *data_byte_size = 0;
  85. }
  86. buffer[0] = '\0';
  87. } else {
  88. /* This value has been determined not to be NULL */
  89. *is_null_value = 0;
  90. size_t data_total_len = getFieldLen(column_idx);
  91. /* Cannot read more data then the total number of bytes available */
  92. assert(data_total_len >= m_bytes_read);
  93. size_t bytes_remaining = data_total_len - m_bytes_read; // Excludes null char
  94. if (data_byte_size != NULL) {
  95. /* Save the number of remaining characters to return before this fetch */
  96. *data_byte_size = bytes_remaining;
  97. }
  98. /* Move pointer to the read location */
  99. const char* src_str_ptr = m_field_buffer + m_bytes_read;
  100. /* The total number of bytes to read (+1 null terminator) should be no more than the
  101. * size of the field buffer */
  102. assert(m_bytes_read + bytes_remaining + 1 <= sizeof(m_field_buffer));
  103. /* Copy as many characters as possible from the read location */
  104. size_t bytes_copied = safe_strncpy(buffer, src_str_ptr, min(buffer_len, bytes_remaining + 1)); // +1 for null terminator
  105. /* bytes_copied does not count the null terminator */
  106. m_bytes_read += bytes_copied;
  107. if (m_bytes_read < data_total_len) {
  108. return HIVE_SUCCESS_WITH_MORE_DATA; /* Data truncated; more data to return */
  109. }
  110. }
  111. m_is_completely_read = true;
  112. return HIVE_SUCCESS; /* All data successfully read */
  113. }
  114. HiveReturn HiveRowSet::getFieldAsDouble(size_t column_idx, double* buffer, int* is_null_value,
  115. char* err_buf, size_t err_buf_len) {
  116. RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
  117. "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
  118. RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
  119. "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
  120. HIVE_ERROR);
  121. RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
  122. "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
  123. RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
  124. "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
  125. if (m_last_column_fetched != column_idx) {
  126. /* Reset if this column was not fetched on the last attempt */
  127. extractField(column_idx);
  128. m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
  129. m_last_column_fetched = column_idx;
  130. m_is_completely_read = false;
  131. }
  132. if (m_is_completely_read) {
  133. return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
  134. }
  135. /* If the column data is the same as the nullformat spec... */
  136. if (strcmp(getNullFormat(), m_field_buffer) == 0) {
  137. *is_null_value = 1;
  138. *buffer = 0.0;
  139. } else {
  140. *is_null_value = 0;
  141. *buffer = atof(m_field_buffer);
  142. }
  143. m_is_completely_read = true;
  144. return HIVE_SUCCESS;
  145. }
  146. HiveReturn HiveRowSet::getFieldAsInt(size_t column_idx, int* buffer, int* is_null_value,
  147. char* err_buf, size_t err_buf_len) {
  148. RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
  149. "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
  150. RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
  151. "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
  152. HIVE_ERROR);
  153. RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
  154. "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
  155. RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
  156. "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
  157. if (m_last_column_fetched != column_idx) {
  158. extractField(column_idx);
  159. m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
  160. m_last_column_fetched = column_idx;
  161. m_is_completely_read = false;
  162. }
  163. if (m_is_completely_read) {
  164. return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
  165. }
  166. /* If the column data is the same as the null format spec... */
  167. if (strcmp(getNullFormat(), m_field_buffer) == 0) {
  168. *is_null_value = 1;
  169. *buffer = 0;
  170. } else {
  171. *is_null_value = 0;
  172. *buffer = atoi(m_field_buffer);
  173. }
  174. m_is_completely_read = true;
  175. return HIVE_SUCCESS;
  176. }
  177. HiveReturn HiveRowSet::getFieldAsLong(size_t column_idx, long* buffer, int* is_null_value,
  178. char* err_buf, size_t err_buf_len) {
  179. RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
  180. "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
  181. RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
  182. "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
  183. HIVE_ERROR);
  184. RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
  185. "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
  186. RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
  187. "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
  188. if (m_last_column_fetched != column_idx) {
  189. extractField(column_idx);
  190. m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
  191. m_last_column_fetched = column_idx;
  192. m_is_completely_read = false;
  193. }
  194. if (m_is_completely_read) {
  195. return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
  196. }
  197. /* If the column data is the same as the null format spec... */
  198. if (strcmp(getNullFormat(), m_field_buffer) == 0) {
  199. *is_null_value = 1;
  200. *buffer = 0;
  201. } else {
  202. *is_null_value = 0;
  203. *buffer = atol(m_field_buffer);
  204. }
  205. m_is_completely_read = true;
  206. return HIVE_SUCCESS;
  207. }
  208. HiveReturn HiveRowSet::getFieldAsULong(size_t column_idx, unsigned long* buffer,
  209. int* is_null_value, char* err_buf, size_t err_buf_len) {
  210. RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
  211. "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
  212. RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
  213. "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
  214. HIVE_ERROR);
  215. RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
  216. "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
  217. RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
  218. "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
  219. if (m_last_column_fetched != column_idx) {
  220. extractField(column_idx);
  221. m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
  222. m_last_column_fetched = column_idx;
  223. m_is_completely_read = false;
  224. }
  225. if (m_is_completely_read) {
  226. return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
  227. }
  228. /* If the column data is the same as the null format spec... */
  229. if (strcmp(getNullFormat(), m_field_buffer) == 0) {
  230. *is_null_value = 1;
  231. *buffer = 0;
  232. } else {
  233. *is_null_value = 0;
  234. *buffer = strtoul(m_field_buffer, NULL, 10);
  235. }
  236. m_is_completely_read = true;
  237. return HIVE_SUCCESS;
  238. }
  239. HiveReturn HiveRowSet::getFieldAsI64(size_t column_idx, int64_t* buffer, int* is_null_value,
  240. char* err_buf, size_t err_buf_len) {
  241. RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
  242. "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
  243. RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
  244. "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
  245. HIVE_ERROR);
  246. RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
  247. "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
  248. RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
  249. "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
  250. if (m_last_column_fetched != column_idx) {
  251. extractField(column_idx);
  252. m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
  253. m_last_column_fetched = column_idx;
  254. m_is_completely_read = false;
  255. }
  256. if (m_is_completely_read) {
  257. return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
  258. }
  259. /* If the column data is the same as the null format spec... */
  260. if (strcmp(getNullFormat(), m_field_buffer) == 0) {
  261. *is_null_value = 1;
  262. *buffer = 0;
  263. } else {
  264. *is_null_value = 0;
  265. *buffer = ATOI64(m_field_buffer);
  266. }
  267. m_is_completely_read = true;
  268. return HIVE_SUCCESS;
  269. }
  270. HiveReturn HiveRowSet::getFieldAsI64U(size_t column_idx, uint64_t* buffer, int* is_null_value,
  271. char* err_buf, size_t err_buf_len) {
  272. RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
  273. "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
  274. RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
  275. "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
  276. HIVE_ERROR);
  277. RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
  278. "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
  279. RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
  280. "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
  281. if (m_last_column_fetched != column_idx) {
  282. extractField(column_idx);
  283. m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
  284. m_last_column_fetched = column_idx;
  285. m_is_completely_read = false;
  286. }
  287. if (m_is_completely_read) {
  288. return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
  289. }
  290. /* If the column data is the same as the null format spec... */
  291. if (strcmp(getNullFormat(), m_field_buffer) == 0) {
  292. *is_null_value = 1;
  293. *buffer = 0;
  294. } else {
  295. *is_null_value = 0;
  296. *buffer = ATOI64U(m_field_buffer);
  297. }
  298. m_is_completely_read = true;
  299. return HIVE_SUCCESS;
  300. }
  301. /*************************************************************************************************
  302. * HiveSerializedRowSet Subclass Definition
  303. ************************************************************************************************/
  304. HiveSerializedRowSet::HiveSerializedRowSet() {
  305. m_row_weak_ptr = NULL;
  306. m_null_format_weak_ptr = NULL;
  307. }
  308. HiveSerializedRowSet::~HiveSerializedRowSet() {
  309. /* Nothing to deallocate */
  310. }
  311. void HiveSerializedRowSet::initialize(Apache::Hadoop::Hive::Schema& schema, string& serialized_row) {
  312. m_row_weak_ptr = &serialized_row;
  313. /* Allocate sufficient space to prevent further resizing */
  314. m_field_offsets.reserve(schema.fieldSchemas.size());
  315. initializeOffsets(schema, serialized_row); // Initialize m_field_offsets
  316. assert(m_field_offsets.size() == schema.fieldSchemas.size());
  317. assert(schema.properties[SERIALIZATION_NULL_FORMAT].length() > 0);
  318. m_null_format_weak_ptr = &(schema.properties[SERIALIZATION_NULL_FORMAT]);
  319. /* Synchronize m_field_buffer and m_last_column_fetched now that extractField() works */
  320. initFieldBuffer();
  321. }
  322. /* This method should never be called outside of the inherited HiveRowSet::reset() */
  323. void HiveSerializedRowSet::specialized_reset() {
  324. m_row_weak_ptr = NULL;
  325. m_field_offsets.clear();
  326. m_null_format_weak_ptr = NULL;
  327. }
  328. void HiveSerializedRowSet::initializeOffsets(Apache::Hadoop::Hive::Schema& schema, string& serialized_row) {
  329. m_field_offsets.push_back(0); // There will always be at least one column
  330. // Keep a temporary field_delim reference so we don't have to keep using the map
  331. string& field_delim(schema.properties[FIELD_DELIM]);
  332. assert(field_delim.length() > 0);
  333. // Assumes that field delimiters will only be one character
  334. size_t idx = serialized_row.find_first_of(field_delim);
  335. while (idx != string::npos) {
  336. // Set the field offset to the start of the following field
  337. m_field_offsets.push_back(idx + 1);
  338. idx = serialized_row.find_first_of(field_delim, idx + 1);
  339. }
  340. }
  341. size_t HiveSerializedRowSet::getColumnCount() {
  342. return m_field_offsets.size();
  343. }
  344. const char* HiveSerializedRowSet::getNullFormat() {
  345. assert(m_null_format_weak_ptr != NULL);
  346. return m_null_format_weak_ptr->c_str();
  347. }
  348. size_t HiveSerializedRowSet::getFieldLen(size_t column_idx) {
  349. assert(column_idx < getColumnCount());
  350. assert(m_row_weak_ptr != NULL);
  351. size_t len;
  352. // If this is the last column...
  353. if (column_idx == getColumnCount() - 1) {
  354. assert(m_row_weak_ptr->length() >= m_field_offsets[column_idx]);
  355. len = m_row_weak_ptr->length() - m_field_offsets[column_idx];
  356. } else {
  357. assert(m_field_offsets[column_idx + 1] > m_field_offsets[column_idx]);
  358. len = m_field_offsets[column_idx + 1] - m_field_offsets[column_idx] - 1;
  359. }
  360. /* Enforce the constraint that no data exceed MAX_BYTE_LENGTH */
  361. len = min(len, (size_t) MAX_BYTE_LENGTH);
  362. return len;
  363. }
  364. void HiveSerializedRowSet::extractField(size_t column_idx) {
  365. assert(column_idx < getColumnCount());
  366. assert(m_row_weak_ptr != NULL);
  367. /* The field buffer should always be large enough to hold the field */
  368. assert(getFieldLen(column_idx) < sizeof(m_field_buffer));
  369. /* Just safety precaution to prevent buffer overflow */
  370. /* Reduce buffer size by one to save space for null terminator */
  371. size_t extract_len = min(getFieldLen(column_idx), sizeof(m_field_buffer) - 1);
  372. size_t copied = m_row_weak_ptr->copy(m_field_buffer, extract_len, m_field_offsets[column_idx]);
  373. assert(copied == extract_len);
  374. /* Make sure the buffer is null terminated */
  375. m_field_buffer[extract_len] = '\0';
  376. }
  377. /*************************************************************************************************
  378. * HiveStringVectorRowSet Subclass Definition
  379. ************************************************************************************************/
  380. HiveStringVectorRowSet::HiveStringVectorRowSet() {
  381. m_fields_weak_ptr = NULL;
  382. m_null_format_weak_ptr = NULL;
  383. }
  384. HiveStringVectorRowSet::~HiveStringVectorRowSet() {
  385. /* Nothing to deallocate */
  386. }
  387. void HiveStringVectorRowSet::initialize(Apache::Hadoop::Hive::Schema& schema, vector<string>* fields) {
  388. assert(fields != NULL);
  389. m_fields_weak_ptr = fields;
  390. assert(schema.properties[SERIALIZATION_NULL_FORMAT].length() > 0);
  391. m_null_format_weak_ptr = &(schema.properties[SERIALIZATION_NULL_FORMAT]);
  392. /* Synchronize m_field_buffer and m_last_column_fetched now that extractField() works */
  393. initFieldBuffer();
  394. }
  395. /* This method should never be called outside of the inherited HiveRowSet::reset() */
  396. void HiveStringVectorRowSet::specialized_reset() {
  397. m_fields_weak_ptr = NULL;
  398. m_null_format_weak_ptr = NULL;
  399. }
  400. size_t HiveStringVectorRowSet::getColumnCount() {
  401. assert(m_fields_weak_ptr != NULL);
  402. return m_fields_weak_ptr->size();
  403. }
  404. const char* HiveStringVectorRowSet::getNullFormat() {
  405. assert(m_null_format_weak_ptr != NULL);
  406. return m_null_format_weak_ptr->c_str();
  407. }
  408. size_t HiveStringVectorRowSet::getFieldLen(size_t column_idx) {
  409. assert(column_idx < getColumnCount());
  410. assert(m_fields_weak_ptr != NULL);
  411. size_t len = m_fields_weak_ptr->at(column_idx).length();
  412. /* Enforce the constraint that no data exceed MAX_BYTE_LENGTH */
  413. len = min(len, (size_t) MAX_BYTE_LENGTH);
  414. return len;
  415. }
  416. void HiveStringVectorRowSet::extractField(size_t column_idx) {
  417. assert(column_idx < getColumnCount());
  418. assert(m_fields_weak_ptr != NULL);
  419. safe_strncpy(m_field_buffer, m_fields_weak_ptr->at(column_idx).c_str(), sizeof(m_field_buffer));
  420. }