/tags/release-0.0.0-rc0/hive/external/odbc/src/cpp/HiveRowSet.cpp
C++ | 465 lines | 358 code | 45 blank | 62 comment | 86 complexity | ef0ac1e322352f29e2b9a0f23dd3923d MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include <assert.h>
- #include <string.h>
- #include "HiveRowSet.h"
- #include "hiveclienthelper.h"
- /*************************************************************************************************
- * Base HiveRowSet Class Logic
- ************************************************************************************************/
- HiveRowSet::HiveRowSet() {
- m_is_completely_read = false;
- m_bytes_read = 0;
- m_last_column_fetched = 0;
- m_field_buffer[0] = '\0';
- }
- HiveRowSet::~HiveRowSet() {
- }
- void HiveRowSet::reset() {
- m_is_completely_read = false;
- m_bytes_read = 0;
- m_last_column_fetched = 0;
- m_field_buffer[0] = '\0';
- /* Non Virtual Calls Pure Virtual Idiom */
- specialized_reset(); /* Call the specialized subclass reset method */
- }
- void HiveRowSet::initFieldBuffer() {
- /* m_field_buffer should always correspond to the field indicated by m_last_column_fetched*/
- extractField(m_last_column_fetched);
- }
- HiveReturn HiveRowSet::getFieldDataLen(size_t column_idx, size_t* col_len, char* err_buf,
- size_t err_buf_len) {
- RETURN_ON_ASSERT(col_len == NULL, __FUNCTION__,
- "Pointer to col_len (output) cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
- "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
- "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
- *col_len = getFieldLen(column_idx);
- return HIVE_SUCCESS;
- }
- HiveReturn HiveRowSet::getFieldAsCString(size_t column_idx, char* buffer, size_t buffer_len,
- size_t* data_byte_size, int* is_null_value, char* err_buf,
- size_t err_buf_len) {
- RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
- "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
- "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
- HIVE_ERROR);
- RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
- "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
- "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(buffer_len == 0, __FUNCTION__,
- "Output buffer cannot have a size of zero.", err_buf, err_buf_len, HIVE_ERROR);
- if (m_last_column_fetched != column_idx) {
- extractField(column_idx);
- m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
- m_last_column_fetched = column_idx;
- m_is_completely_read = false;
- }
- if (m_is_completely_read) {
- return HIVE_NO_MORE_DATA; /* This field has already been completely fetched by a previous call*/
- }
- /* If the column data is the same as the null format spec... */
- if (strcmp(getNullFormat(), m_field_buffer) == 0) {
- /* This value must be NULL */
- *is_null_value = 1;
- if (data_byte_size != NULL) {
- *data_byte_size = 0;
- }
- buffer[0] = '\0';
- } else {
- /* This value has been determined not to be NULL */
- *is_null_value = 0;
- size_t data_total_len = getFieldLen(column_idx);
- /* Cannot read more data then the total number of bytes available */
- assert(data_total_len >= m_bytes_read);
- size_t bytes_remaining = data_total_len - m_bytes_read; // Excludes null char
- if (data_byte_size != NULL) {
- /* Save the number of remaining characters to return before this fetch */
- *data_byte_size = bytes_remaining;
- }
- /* Move pointer to the read location */
- const char* src_str_ptr = m_field_buffer + m_bytes_read;
- /* The total number of bytes to read (+1 null terminator) should be no more than the
- * size of the field buffer */
- assert(m_bytes_read + bytes_remaining + 1 <= sizeof(m_field_buffer));
- /* Copy as many characters as possible from the read location */
- size_t bytes_copied = safe_strncpy(buffer, src_str_ptr, min(buffer_len, bytes_remaining + 1)); // +1 for null terminator
- /* bytes_copied does not count the null terminator */
- m_bytes_read += bytes_copied;
- if (m_bytes_read < data_total_len) {
- return HIVE_SUCCESS_WITH_MORE_DATA; /* Data truncated; more data to return */
- }
- }
- m_is_completely_read = true;
- return HIVE_SUCCESS; /* All data successfully read */
- }
- HiveReturn HiveRowSet::getFieldAsDouble(size_t column_idx, double* buffer, int* is_null_value,
- char* err_buf, size_t err_buf_len) {
- RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
- "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
- "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
- HIVE_ERROR);
- RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
- "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
- "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
- if (m_last_column_fetched != column_idx) {
- /* Reset if this column was not fetched on the last attempt */
- extractField(column_idx);
- m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
- m_last_column_fetched = column_idx;
- m_is_completely_read = false;
- }
- if (m_is_completely_read) {
- return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
- }
- /* If the column data is the same as the nullformat spec... */
- if (strcmp(getNullFormat(), m_field_buffer) == 0) {
- *is_null_value = 1;
- *buffer = 0.0;
- } else {
- *is_null_value = 0;
- *buffer = atof(m_field_buffer);
- }
- m_is_completely_read = true;
- return HIVE_SUCCESS;
- }
- HiveReturn HiveRowSet::getFieldAsInt(size_t column_idx, int* buffer, int* is_null_value,
- char* err_buf, size_t err_buf_len) {
- RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
- "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
- "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
- HIVE_ERROR);
- RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
- "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
- "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
- if (m_last_column_fetched != column_idx) {
- extractField(column_idx);
- m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
- m_last_column_fetched = column_idx;
- m_is_completely_read = false;
- }
- if (m_is_completely_read) {
- return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
- }
- /* If the column data is the same as the null format spec... */
- if (strcmp(getNullFormat(), m_field_buffer) == 0) {
- *is_null_value = 1;
- *buffer = 0;
- } else {
- *is_null_value = 0;
- *buffer = atoi(m_field_buffer);
- }
- m_is_completely_read = true;
- return HIVE_SUCCESS;
- }
- HiveReturn HiveRowSet::getFieldAsLong(size_t column_idx, long* buffer, int* is_null_value,
- char* err_buf, size_t err_buf_len) {
- RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
- "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
- "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
- HIVE_ERROR);
- RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
- "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
- "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
- if (m_last_column_fetched != column_idx) {
- extractField(column_idx);
- m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
- m_last_column_fetched = column_idx;
- m_is_completely_read = false;
- }
- if (m_is_completely_read) {
- return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
- }
- /* If the column data is the same as the null format spec... */
- if (strcmp(getNullFormat(), m_field_buffer) == 0) {
- *is_null_value = 1;
- *buffer = 0;
- } else {
- *is_null_value = 0;
- *buffer = atol(m_field_buffer);
- }
- m_is_completely_read = true;
- return HIVE_SUCCESS;
- }
- HiveReturn HiveRowSet::getFieldAsULong(size_t column_idx, unsigned long* buffer,
- int* is_null_value, char* err_buf, size_t err_buf_len) {
- RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
- "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
- "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
- HIVE_ERROR);
- RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
- "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
- "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
- if (m_last_column_fetched != column_idx) {
- extractField(column_idx);
- m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
- m_last_column_fetched = column_idx;
- m_is_completely_read = false;
- }
- if (m_is_completely_read) {
- return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
- }
- /* If the column data is the same as the null format spec... */
- if (strcmp(getNullFormat(), m_field_buffer) == 0) {
- *is_null_value = 1;
- *buffer = 0;
- } else {
- *is_null_value = 0;
- *buffer = strtoul(m_field_buffer, NULL, 10);
- }
- m_is_completely_read = true;
- return HIVE_SUCCESS;
- }
- HiveReturn HiveRowSet::getFieldAsI64(size_t column_idx, int64_t* buffer, int* is_null_value,
- char* err_buf, size_t err_buf_len) {
- RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
- "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
- "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
- HIVE_ERROR);
- RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
- "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
- "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
- if (m_last_column_fetched != column_idx) {
- extractField(column_idx);
- m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
- m_last_column_fetched = column_idx;
- m_is_completely_read = false;
- }
- if (m_is_completely_read) {
- return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
- }
- /* If the column data is the same as the null format spec... */
- if (strcmp(getNullFormat(), m_field_buffer) == 0) {
- *is_null_value = 1;
- *buffer = 0;
- } else {
- *is_null_value = 0;
- *buffer = ATOI64(m_field_buffer);
- }
- m_is_completely_read = true;
- return HIVE_SUCCESS;
- }
- HiveReturn HiveRowSet::getFieldAsI64U(size_t column_idx, uint64_t* buffer, int* is_null_value,
- char* err_buf, size_t err_buf_len) {
- RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
- "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
- "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
- HIVE_ERROR);
- RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
- "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
- RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
- "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
- if (m_last_column_fetched != column_idx) {
- extractField(column_idx);
- m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
- m_last_column_fetched = column_idx;
- m_is_completely_read = false;
- }
- if (m_is_completely_read) {
- return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
- }
- /* If the column data is the same as the null format spec... */
- if (strcmp(getNullFormat(), m_field_buffer) == 0) {
- *is_null_value = 1;
- *buffer = 0;
- } else {
- *is_null_value = 0;
- *buffer = ATOI64U(m_field_buffer);
- }
- m_is_completely_read = true;
- return HIVE_SUCCESS;
- }
- /*************************************************************************************************
- * HiveSerializedRowSet Subclass Definition
- ************************************************************************************************/
- HiveSerializedRowSet::HiveSerializedRowSet() {
- m_row_weak_ptr = NULL;
- m_null_format_weak_ptr = NULL;
- }
- HiveSerializedRowSet::~HiveSerializedRowSet() {
- /* Nothing to deallocate */
- }
- void HiveSerializedRowSet::initialize(Apache::Hadoop::Hive::Schema& schema, string& serialized_row) {
- m_row_weak_ptr = &serialized_row;
- /* Allocate sufficient space to prevent further resizing */
- m_field_offsets.reserve(schema.fieldSchemas.size());
- initializeOffsets(schema, serialized_row); // Initialize m_field_offsets
- assert(m_field_offsets.size() == schema.fieldSchemas.size());
- assert(schema.properties[SERIALIZATION_NULL_FORMAT].length() > 0);
- m_null_format_weak_ptr = &(schema.properties[SERIALIZATION_NULL_FORMAT]);
- /* Synchronize m_field_buffer and m_last_column_fetched now that extractField() works */
- initFieldBuffer();
- }
- /* This method should never be called outside of the inherited HiveRowSet::reset() */
- void HiveSerializedRowSet::specialized_reset() {
- m_row_weak_ptr = NULL;
- m_field_offsets.clear();
- m_null_format_weak_ptr = NULL;
- }
- void HiveSerializedRowSet::initializeOffsets(Apache::Hadoop::Hive::Schema& schema, string& serialized_row) {
- m_field_offsets.push_back(0); // There will always be at least one column
- // Keep a temporary field_delim reference so we don't have to keep using the map
- string& field_delim(schema.properties[FIELD_DELIM]);
- assert(field_delim.length() > 0);
- // Assumes that field delimiters will only be one character
- size_t idx = serialized_row.find_first_of(field_delim);
- while (idx != string::npos) {
- // Set the field offset to the start of the following field
- m_field_offsets.push_back(idx + 1);
- idx = serialized_row.find_first_of(field_delim, idx + 1);
- }
- }
- size_t HiveSerializedRowSet::getColumnCount() {
- return m_field_offsets.size();
- }
- const char* HiveSerializedRowSet::getNullFormat() {
- assert(m_null_format_weak_ptr != NULL);
- return m_null_format_weak_ptr->c_str();
- }
- size_t HiveSerializedRowSet::getFieldLen(size_t column_idx) {
- assert(column_idx < getColumnCount());
- assert(m_row_weak_ptr != NULL);
- size_t len;
- // If this is the last column...
- if (column_idx == getColumnCount() - 1) {
- assert(m_row_weak_ptr->length() >= m_field_offsets[column_idx]);
- len = m_row_weak_ptr->length() - m_field_offsets[column_idx];
- } else {
- assert(m_field_offsets[column_idx + 1] > m_field_offsets[column_idx]);
- len = m_field_offsets[column_idx + 1] - m_field_offsets[column_idx] - 1;
- }
- /* Enforce the constraint that no data exceed MAX_BYTE_LENGTH */
- len = min(len, (size_t) MAX_BYTE_LENGTH);
- return len;
- }
- void HiveSerializedRowSet::extractField(size_t column_idx) {
- assert(column_idx < getColumnCount());
- assert(m_row_weak_ptr != NULL);
- /* The field buffer should always be large enough to hold the field */
- assert(getFieldLen(column_idx) < sizeof(m_field_buffer));
- /* Just safety precaution to prevent buffer overflow */
- /* Reduce buffer size by one to save space for null terminator */
- size_t extract_len = min(getFieldLen(column_idx), sizeof(m_field_buffer) - 1);
- size_t copied = m_row_weak_ptr->copy(m_field_buffer, extract_len, m_field_offsets[column_idx]);
- assert(copied == extract_len);
- /* Make sure the buffer is null terminated */
- m_field_buffer[extract_len] = '\0';
- }
- /*************************************************************************************************
- * HiveStringVectorRowSet Subclass Definition
- ************************************************************************************************/
- HiveStringVectorRowSet::HiveStringVectorRowSet() {
- m_fields_weak_ptr = NULL;
- m_null_format_weak_ptr = NULL;
- }
- HiveStringVectorRowSet::~HiveStringVectorRowSet() {
- /* Nothing to deallocate */
- }
- void HiveStringVectorRowSet::initialize(Apache::Hadoop::Hive::Schema& schema, vector<string>* fields) {
- assert(fields != NULL);
- m_fields_weak_ptr = fields;
- assert(schema.properties[SERIALIZATION_NULL_FORMAT].length() > 0);
- m_null_format_weak_ptr = &(schema.properties[SERIALIZATION_NULL_FORMAT]);
- /* Synchronize m_field_buffer and m_last_column_fetched now that extractField() works */
- initFieldBuffer();
- }
- /* This method should never be called outside of the inherited HiveRowSet::reset() */
- void HiveStringVectorRowSet::specialized_reset() {
- m_fields_weak_ptr = NULL;
- m_null_format_weak_ptr = NULL;
- }
- size_t HiveStringVectorRowSet::getColumnCount() {
- assert(m_fields_weak_ptr != NULL);
- return m_fields_weak_ptr->size();
- }
- const char* HiveStringVectorRowSet::getNullFormat() {
- assert(m_null_format_weak_ptr != NULL);
- return m_null_format_weak_ptr->c_str();
- }
- size_t HiveStringVectorRowSet::getFieldLen(size_t column_idx) {
- assert(column_idx < getColumnCount());
- assert(m_fields_weak_ptr != NULL);
- size_t len = m_fields_weak_ptr->at(column_idx).length();
- /* Enforce the constraint that no data exceed MAX_BYTE_LENGTH */
- len = min(len, (size_t) MAX_BYTE_LENGTH);
- return len;
- }
- void HiveStringVectorRowSet::extractField(size_t column_idx) {
- assert(column_idx < getColumnCount());
- assert(m_fields_weak_ptr != NULL);
- safe_strncpy(m_field_buffer, m_fields_weak_ptr->at(column_idx).c_str(), sizeof(m_field_buffer));
- }