/src/lib/storage/SimpleStoreMerger.cpp

https://github.com/gekkonerd/hyrise · C++ · 185 lines · 126 code · 37 blank · 22 comment · 18 complexity · 7de21dc087011c6a1e688b4c20606e88 MD5 · raw file

  1. // Copyright (c) 2012 Hasso-Plattner-Institut fuer Softwaresystemtechnik GmbH. All rights reserved.
  2. #include "SimpleStoreMerger.h"
  3. #include <memory>
  4. #include <set>
  5. #include "OrderPreservingDictionary.h"
  6. #include "meta_storage.h"
  7. #include "RawTable.h"
  8. #include "SimpleStore.h"
  9. namespace hyrise {
  10. namespace storage {
  11. /**
  12. * This class implements the part for merging the dictionaries between
  13. * the uncompressed delta and the compressed order preserving
  14. * dictionary. This is done using the following simple
  15. * algorithm. First a new std::set is created to store all distinct
  16. * values in a sorted order. Now a new dictionary is created based on
  17. * this set and a mapping table is build that maps the old values from
  18. * the old dictionary to the new dictionary.
  19. */
  20. struct MergeDictFunctor {
  21. struct result {
  22. std::vector<value_id_t> mapping;
  23. std::shared_ptr<AbstractDictionary> dict;
  24. };
  25. // Result type value definition
  26. typedef result value_type;
  27. c_atable_ptr_t _main;
  28. std::shared_ptr<const SimpleStore::delta_table_t> _delta;
  29. field_t _column;
  30. void prepare(c_atable_ptr_t m, std::shared_ptr<const SimpleStore::delta_table_t> d, field_t c) {
  31. _main = m;
  32. _delta = d;
  33. _column = c;
  34. };
  35. template <typename R>
  36. result operator()() {
  37. auto dict = std::dynamic_pointer_cast<OrderPreservingDictionary<R>>(_main->dictionaryAt(_column));
  38. std::set<R> data;
  39. // Build unified dictionary
  40. size_t deltaSize = _delta->size();
  41. for (size_t i = 0; i < deltaSize; ++i) {
  42. data.insert(_delta->getValue<R>(_column, i));
  43. }
  44. size_t dictSize = dict->size();
  45. for (size_t i = 0; i < dictSize; ++i)
  46. data.insert(dict->getValueForValueId(i));
  47. // Build mapping table for old dictionary
  48. auto start = data.cbegin();
  49. auto end = data.cend();
  50. size_t mapped = 0;
  51. std::vector<value_id_t> mapping;
  52. for (size_t i = 0; i < dictSize; ++i) {
  53. auto val = dict->getValueForValueId(i);
  54. // Skip until we are equal
  55. while (start != end && *start != val) {
  56. ++mapped;
  57. ++start;
  58. }
  59. if (start != end)
  60. ++start;
  61. mapping.push_back(mapped++);
  62. }
  63. auto resultDict = std::make_shared<OrderPreservingDictionary<R>>(data.size());
  64. for (auto e : data)
  65. resultDict->addValue(e);
  66. result r = {std::move(mapping), std::move(resultDict)};
  67. return r;
  68. }
  69. };
  70. /**
  71. * This class performs the mapping of old uncompressed delta values to
  72. * new valueIds in the compressed data store.
  73. */
  74. struct MapValueForValueId {
  75. typedef void value_type;
  76. atable_ptr_t _main;
  77. std::shared_ptr<AbstractDictionary> _dict;
  78. std::shared_ptr<const SimpleStore::delta_table_t> _delta;
  79. field_t _col;
  80. field_t _dstCol;
  81. void prepare(atable_ptr_t m,
  82. field_t dst,
  83. std::shared_ptr<AbstractDictionary> d,
  84. size_t col,
  85. std::shared_ptr<const SimpleStore::delta_table_t> de) {
  86. _main = m;
  87. _dstCol = dst;
  88. _dict = d;
  89. _delta = de;
  90. _col = col;
  91. }
  92. template <typename R>
  93. value_type operator()() {
  94. auto d = std::dynamic_pointer_cast<OrderPreservingDictionary<R>>(_dict);
  95. size_t tabSize = _main->size();
  96. size_t start = _main->size() - _delta->size();
  97. for (size_t row = start; row < tabSize; ++row) {
  98. _main->setValueId(_dstCol, row, ValueId{d->getValueIdForValue(_delta->getValue<R>(_col, row - start)), 0});
  99. }
  100. }
  101. };
  102. void SimpleStoreMerger::mergeValues(const std::vector<c_atable_ptr_t>& input_tables,
  103. atable_ptr_t merged_table,
  104. const column_mapping_t& column_mapping,
  105. const uint64_t newSize,
  106. bool useValid,
  107. const std::vector<bool>& valid) {
  108. if (useValid)
  109. throw std::runtime_error("SimpleStoreMerger does not support valid vectors");
  110. if (input_tables.size() != 2)
  111. throw std::runtime_error("SimpleStoreMerger does not support more than two tables");
  112. auto delta = std::dynamic_pointer_cast<const RawTable>(input_tables[1]);
  113. auto main = input_tables[0];
  114. // Prepare type handling
  115. MergeDictFunctor fun;
  116. type_switch<hyrise_basic_types> ts;
  117. std::vector<MergeDictFunctor::result> mergedDictionaries(column_mapping.size());
  118. // Extract unique values for delta
  119. for (const auto& kv : column_mapping) {
  120. const auto& col = kv.first;
  121. const auto& dst = kv.second;
  122. fun.prepare(main, delta, col);
  123. auto result = ts(main->typeOfColumn(col), fun);
  124. merged_table->setDictionaryAt(result.dict, dst);
  125. mergedDictionaries[col] = result;
  126. }
  127. // Update the values of the new Table
  128. merged_table->resize(newSize);
  129. size_t tabSize = main->size();
  130. for (size_t row = 0; row < tabSize; ++row) {
  131. for (const auto& kv : column_mapping) {
  132. const auto& col = kv.first;
  133. const auto& dst = kv.second;
  134. merged_table->setValueId(
  135. dst, row, ValueId{mergedDictionaries[col].mapping[main->getValueId(col, row).valueId], 0});
  136. }
  137. }
  138. // Map the values for the values in the uncompressed delta
  139. MapValueForValueId map;
  140. for (const auto& kv : column_mapping) {
  141. const auto& col = kv.first;
  142. const auto& dst = kv.second;
  143. map.prepare(merged_table, dst, mergedDictionaries[col].dict, col, delta);
  144. ts(merged_table->typeOfColumn(dst), map);
  145. }
  146. }
  147. }
  148. }