TPIE

2362a60
btree_builder.h
1 // -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*-
2 // vi:set ts=4 sts=4 sw=4 noet :
3 // Copyright 2015 The TPIE development team
4 //
5 // This file is part of TPIE.
6 //
7 // TPIE is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Lesser General Public License as published by the
9 // Free Software Foundation, either version 3 of the License, or (at your
10 // option) any later version.
11 //
12 // TPIE is distributed in the hope that it will be useful, but WITHOUT ANY
13 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 // License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with TPIE. If not, see <http://www.gnu.org/licenses/>
19 
20 #ifndef _TPIE_BTREE_BTREE_BUILDER_H_
21 #define _TPIE_BTREE_BTREE_BUILDER_H_
22 
23 #include <tpie/portability.h>
24 #include <tpie/btree/base.h>
25 #include <tpie/btree/node.h>
26 #include <tpie/memory.h>
27 
28 #include <cstddef>
29 #include <deque>
30 #include <vector>
31 
32 namespace tpie {
33 namespace bbits {
34 
35 template <typename T, bool b>
36 struct block_size_getter {
37  static constexpr size_t v() {return 0;}
38 };
39 
40 template <typename T>
41 struct block_size_getter<T, true> {
42  static constexpr size_t v() {return T::block_size();}
43 };
44 
45 template <typename T, typename O>
46 class builder {
47 private:
48  typedef bbits::tree<T, O> tree_type;
49  typedef bbits::tree_state<T, O> state_type;
50 
51  typedef typename state_type::key_type key_type;
52 
53  static const bool is_internal = state_type::is_internal;
54 
55  static const bool is_static = state_type::is_static;
56 
57  static const bool is_serialized = state_type::is_serialized;
58 
59  static_assert(!is_serialized || is_static, "The builder currently only supports static serialized trees");
60 
61  typedef T value_type;
62 
63  typedef typename O::C comp_type;
64 
65  typedef typename O::A augmenter_type;
66 
67  typedef typename tree_type::augment_type augment_type;
68 
69  typedef typename tree_type::size_type size_type;
70 
71  typedef typename state_type::combined_augment combined_augment;
72 
73  typedef typename state_type::store_type store_type;
74 
75  typedef typename state_type::store_type S;
76 
77  typedef typename S::leaf_type leaf_type;
78 
79  typedef typename S::internal_type internal_type;
80 
81  typedef btree_node<state_type> node_type;
82 
83  // Keeps the same information that the parent of a leaf keeps
84  struct leaf_summary {
85  leaf_type leaf;
86  combined_augment augment;
87  };
88 
89  // Keeps the same information that the parent of a node keeps
90  struct internal_summary {
91  internal_type internal;
92  combined_augment augment;
93  };
94 
95  // Construct a leaf from m
96  void construct_leaf(size_t size) {
97  tp_assert(size != 0, "we should not construct an empty leaf");
98  tp_assert(size <= m_items.size(), "we should not construct a leaf with more items then we have");
99  tp_assert(size <= S::max_leaf_size(), "we should not construct a leaf with more items then the max leaf size");
100  leaf_summary leaf;
101  leaf.leaf = m_state.store().create_leaf();
102 
103  m_state.store().set_count(leaf.leaf, size);
104 
105  for(size_t i = 0; i < size; ++i) {
106  m_state.store().set(leaf.leaf, i, m_items.front());
107  m_items.pop_front();
108  }
109 
110  leaf.augment = m_state.m_augmenter(node_type(&m_state, leaf.leaf));
111  m_state.store().flush();
112  m_leaves.push_back(leaf);
113 
114  if (is_serialized) {
115  tp_assert(m_items.empty(), "we should only construct complete leafs when serializing");
116  m_serialized_size = 0;
117  }
118  }
119 
120  void construct_internal_from_leaves(size_t size) {
121  internal_summary internal;
122  internal.internal = m_state.store().create_internal();
123 
124  m_state.store().set_count(internal.internal, size);
125 
126  for(size_t i = 0; i < size; ++i) {
127  leaf_summary child = m_leaves.front();
128  m_state.store().set(internal.internal, i, child.leaf);
129  m_state.store().set_augment(child.leaf, internal.internal, child.augment);
130  m_leaves.pop_front();
131  }
132 
133  internal.augment = m_state.m_augmenter(node_type(&m_state, internal.internal));
134  m_state.store().flush();
135 
136  // push the internal node to the deque of nodes
137  if(m_internal_nodes.size() < 1) m_internal_nodes.push_back(std::deque<internal_summary>());
138  m_internal_nodes[0].push_back(internal);
139  }
140 
141  void construct_internal_from_internal(size_t size, size_t level) {
142  // take nodes from internal_nodes[level] and construct a new node in internal_nodes[level+1]
143  internal_summary internal;
144  internal.internal = m_state.store().create_internal();
145  m_state.store().set_count(internal.internal, size);
146  for(size_t i = 0; i < size; ++i) {
147  internal_summary child = m_internal_nodes[level].front();
148  m_state.store().set(internal.internal, i, child.internal);
149  m_state.store().set_augment(child.internal, internal.internal, child.augment);
150  m_internal_nodes[level].pop_front();
151  }
152  internal.augment = m_state.m_augmenter(node_type(&m_state, internal.internal));
153  m_state.store().flush();
154 
155  // push the internal node to the deque of nodes
156  if(m_internal_nodes.size() < level+2) m_internal_nodes.push_back(std::deque<internal_summary>());
157  m_internal_nodes[level+1].push_back(internal);
158  }
159 
163  static constexpr size_t desired_leaf_size() noexcept {
164  return is_static
165  ? S::max_leaf_size()
166  : ((S::min_leaf_size() + S::max_leaf_size()) / 2);
167  }
168 
172  static constexpr size_t leaf_tipping_point() noexcept {
173  return desired_leaf_size() + S::min_leaf_size();
174  }
175 
179  static constexpr size_t desired_internal_size() noexcept {
180  return is_static
181  ? S::max_internal_size()
182  : ((S::min_internal_size() + S::max_internal_size()) / 2);
183  }
184 
188  static constexpr size_t internal_tipping_point() noexcept {
189  return desired_internal_size() + S::min_internal_size();
190  }
191 
195  void extract_nodes() {
196  construct_leaf(is_serialized?m_items.size() : desired_leaf_size());
197 
198  if(m_leaves.size() < internal_tipping_point()) return;
199  construct_internal_from_leaves(desired_internal_size());
200 
201  // Traverse the levels of the tree and try to construct internal nodes from other internal nodes
202  for(size_t i = 0; i < m_internal_nodes.size(); ++i) {
203  // if it is not possible to construct a node at this level, it is not possible at higher levels either
204  if(m_internal_nodes[i].size() < internal_tipping_point()) return;
205  // we have enough nodes to construct a new node.
206  construct_internal_from_internal(desired_internal_size(), i);
207  }
208  }
209 public:
213  template <typename X=enab>
214  explicit builder(std::string path, comp_type comp=comp_type(), augmenter_type augmenter=augmenter_type(), enable<X, !is_internal> =enab() )
215  : m_state(store_type(path, true), std::move(augmenter), typename state_type::keyextract_type())
216  , m_comp(comp)
217  , m_serialized_size(0)
218  , m_size(0)
219  {}
220 
221  template <typename X=enab>
222  explicit builder(comp_type comp=comp_type(), augmenter_type augmenter=augmenter_type(),
223  memory_bucket_ref bucket=memory_bucket_ref(), enable<X, is_internal> =enab() )
224  : m_state(store_type(bucket), std::move(augmenter), typename state_type::keyextract_type())
225  , m_comp(comp)
226  , m_serialized_size(0)
227  , m_size(0)
228  {}
229 
230 
235  void push(value_type v) {
236  ++m_size;
237  if (is_serialized) {
238  size_t s = serialized_size(v);
239  if (m_items.size() == S::max_leaf_size() ||
240  (s + m_serialized_size > block_size_getter<S, is_serialized>::v() && m_serialized_size))
241  extract_nodes();
242  m_items.push_back(v);
243  m_serialized_size += s;
244  } else {
245  m_items.push_back(v);
246 
247  // try to construct nodes from items if possible
248  if(m_items.size() < leaf_tipping_point()) return;
249  extract_nodes();
250  }
251  }
252 
256  tree_type build(const std::string & metadata = std::string()) {
257  m_state.store().set_size(m_size);
258 
259  // finish building the tree by traversing all levels and constructing leaves/nodes
260  // construct one or two leaves if neccesary
261  if(m_items.size() > 0) {
262  if(!is_serialized && m_items.size() > S::max_leaf_size()) // construct two leaves if necessary
263  construct_leaf(m_items.size()/2);
264  construct_leaf(m_items.size()); // construct a leaf with the remaining items
265  }
266 
267  // if there already exists internal nodes and there are leaves left: construct a new internal node(since there is guaranteed to be atleast S::min_internal_size leaves)
268  // if there do not exist internal nodes, then only construct an internal node if there is more than one leaf
269  if((m_internal_nodes.size() == 0 && m_leaves.size() > 1) || (m_internal_nodes.size() > 0 && m_leaves.size() > 0)) {
270  if(m_leaves.size() > 2*S::max_internal_size() ) // construct two nodes if necessary
271  construct_internal_from_leaves(m_leaves.size()/3);
272  if(m_leaves.size() > S::max_internal_size()) // construct two nodes if necessary
273  construct_internal_from_leaves(m_leaves.size()/2);
274  construct_internal_from_leaves(m_leaves.size()); // construct a node from the remaining leaves
275  }
276 
277  for(size_t i = 0; i < m_internal_nodes.size(); ++i) {
278  // if there already exists internal nodes at a higher level and there are nodes at this level left: construct a new internal node at the higher level.
279  // if there do not exist internal nodes at a higher level, then only construct an internal nodes at that level if there are more than one node at this level.
280  if((m_internal_nodes.size() == i+1 && m_internal_nodes[i].size() > 1)
281  || (m_internal_nodes.size() > i+1 && m_internal_nodes[i].size() > 0)) {
282  if(m_internal_nodes[i].size() > 2*S::max_internal_size())
283  construct_internal_from_internal(m_internal_nodes[i].size()/3, i);
284  if(m_internal_nodes[i].size() > S::max_internal_size())
285  construct_internal_from_internal(m_internal_nodes[i].size()/2, i);
286  construct_internal_from_internal(m_internal_nodes[i].size(), i);
287  }
288  }
289 
290  // find the root and set it as such
291 
292  if(m_internal_nodes.size() == 0 && m_leaves.size() == 0) // no items were pushed
293  m_state.store().set_height(0);
294  else {
295  m_state.store().set_height(m_internal_nodes.size() + 1);
296  if(m_leaves.size() == 1)
297  m_state.store().set_root(m_leaves.front().leaf);
298  else
299  m_state.store().set_root(m_internal_nodes.back().front().internal);
300  }
301  if (metadata.size()) {
302  m_state.store().flush();
303  m_state.store().set_metadata(metadata);
304  }
305  m_state.store().finalize_build();
306  return tree_type(std::move(m_state), std::move(m_comp));
307  }
308 
309 private:
310  std::deque<value_type> m_items;
311  std::deque<leaf_summary> m_leaves;
312  std::vector<std::deque<internal_summary>> m_internal_nodes;
313 
314  state_type m_state;
315  comp_type m_comp;
316  size_t m_serialized_size;
317  stream_size_type m_size;
318 };
319 
320 } //namespace bbits
321 
322 template <typename T, typename ... Opts>
323 using btree_builder = bbits::builder<T, typename bbits::OptComp<Opts...>::type>;
324 
325 } //namespace tpie
326 #endif /*_TPIE_BTREE_BTREE_BUILDER_H_*/
size_t serialized_size(const T &v)
Given a serializable, serialize it and measure its serialized size.
Memory management subsystem.
Augmented btree.
Definition: base.h:195
This file contains a few deprecated definitions for legacy code.
void push(value_type v)
Push a value to the builder.
Class storring a reference to a memory bucket.
Definition: memory.h:366
builder(std::string path, comp_type comp=comp_type(), augmenter_type augmenter=augmenter_type(), enable< X,!is_internal >=enab())
Construct a btree builder with the given storage.
Augmented btree builder.
Definition: base.h:205
tree_type build(const std::string &metadata=std::string())
Constructs and returns a btree from the value that was pushed to the builder.
Type that is useful for navigating a btree.
Definition: base.h:178
#define tp_assert(condition, message)
Definition: tpie_assert.h:48
store_type::size_type size_type
Type of the size.
Definition: btree.h:75