table.h (58554B)
1 // Copyright (c) 2018 Kenton Varda and contributors 2 // Licensed under the MIT License: 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is 9 // furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 // THE SOFTWARE. 21 22 #pragma once 23 24 #include "common.h" 25 #include "tuple.h" 26 #include "vector.h" 27 #include "function.h" 28 29 #if _MSC_VER 30 // Need _ReadWriteBarrier 31 #if _MSC_VER < 1910 32 #include <intrin.h> 33 #else 34 #include <intrin0.h> 35 #endif 36 #endif 37 38 #if KJ_DEBUG_TABLE_IMPL 39 #include "debug.h" 40 #define KJ_TABLE_IREQUIRE KJ_REQUIRE 41 #define KJ_TABLE_IASSERT KJ_ASSERT 42 #else 43 #define KJ_TABLE_IREQUIRE KJ_IREQUIRE 44 #define KJ_TABLE_IASSERT KJ_IASSERT 45 #endif 46 47 KJ_BEGIN_HEADER 48 49 namespace kj { 50 51 class String; 52 53 namespace _ { // private 54 55 template <typename Row> 56 class TableMapping; 57 template <typename Row, typename Inner> 58 using TableIterable = MappedIterable<Inner, TableMapping<Row>>; 59 template <typename Row, typename Inner> 60 using TableIterator = MappedIterator<Inner, TableMapping<Row>>; 61 62 } // namespace _ (private) 63 64 template <typename Row, typename... Indexes> 65 class Table { 66 // A table with one or more indexes. This is the KJ alternative to map, set, unordered_map, and 67 // unordered_set. 68 // 69 // Unlike a traditional map, which explicitly stores key/value pairs, a Table simply stores 70 // "rows" of arbitrary type, and then lets the application specify how these should be indexed. 71 // Rows could be indexed on a specific struct field, or they could be indexed based on a computed 72 // property. An index could be hash-based or tree-based. Multiple indexes are supported, making 73 // it easy to construct a "bimap". 74 // 75 // The table has deterministic iteration order based on the sequence of insertions and deletions. 76 // In the case of only insertions, the iteration order is the order of insertion. If deletions 77 // occur, then the current last row is moved to occupy the deleted slot. This determinism is 78 // intended to be reliable for the purpose of testing, etc. 79 // 80 // Each index is a class that looks like: 81 // 82 // class Index { 83 // public: 84 // void reserve(size_t size); 85 // // Called when Table::reserve() is called. 86 // 87 // SearchParam& keyForRow(const Row& row) const; 88 // // Given a row, return a value appropriate to pass as SearchParams to the other functions. 89 // 90 // // In all function calls below, `SearchPrams` refers to whatever parameters the index 91 // // supports for looking up a row in the table. 92 // 93 // template <typename... SearchParams> 94 // kj::Maybe<size_t> insert(kj::ArrayPtr<const Row> table, size_t pos, SearchParams&&...); 95 // // Called to indicate that we're about to insert a new row which will match the given 96 // // search parameters, and will be located at the given position. If this index disallows 97 // // duplicates and some other matching row already exists, then insert() returns the index 98 // // of that row without modifying the index. If the row does not exist, then insert() 99 // // updates the index to note that the new row is located at `pos`. Note that `table[pos]` 100 // // may not be valid yet at the time of this call; the index must go on the search params 101 // // alone. 102 // // 103 // // Insert may throw an exception, in which case the table will roll back insertion. 104 // 105 // template <typename... SearchParams> 106 // void erase(kj::ArrayPtr<const Row> table, size_t pos, SearchParams&&...); 107 // // Called to indicate that the index must remove references to row number `pos`. The 108 // // index must not attempt to access table[pos] directly -- in fact, `pos` may be equal to 109 // // `table.size()`, i.e., may be out-of-bounds (this happens when rolling back a failed 110 // // insertion). Instead, the index can use the search params to search for the row -- they 111 // // will either be the same as the params passed to insert(), or will be a single value of 112 // // type `Row&`. 113 // // 114 // // erase() called immediately after a successful insert() must not throw an exception, as 115 // // it may be called during unwind. 116 // 117 // template <typename... SearchParams> 118 // void move(kj::ArrayPtr<const Row> table, size_t oldPos, size_t newPos, SearchParams&&...); 119 // // Called when a row is about to be moved from `oldPos` to `newPos` in the table. The 120 // // index should update it to the new location. Neither `table[oldPos]` nor `table[newPos]` 121 // // is valid during the call -- use the search params to find the row. Before this call 122 // // `oldPos` is indexed and `newPos` is not -- after the call, the opposite is true. 123 // // 124 // // This should never throw; if it does the table may be corrupted. 125 // 126 // class Iterator; // Behaves like a C++ iterator over size_t values. 127 // class Iterable; // Has begin() and end() methods returning iterators. 128 // 129 // template <typename... SearchParams> 130 // Maybe<size_t> find(kj::ArrayPtr<const Row> table, SearchParams&&...) const; 131 // // Optional. Implements Table::find<Index>(...). 132 // 133 // template <typename... SearchParams> 134 // Iterator seek(kj::ArrayPtr<const Row> table, SearchParams&&...) const; 135 // // Optional. Implements Table::seek<Index>() and Table::range<Index>(...). 136 // 137 // Iterator begin() const; 138 // Iterator end() const; 139 // // Optional. Implements Table::ordered<Index>(). 140 // }; 141 142 public: 143 Table(); 144 Table(Indexes&&... indexes); 145 146 void reserve(size_t size); 147 // Pre-allocates space for a table of the given size. Normally a Table grows by re-allocating 148 // its backing array whenever more space is needed. Reserving in advance avoids redundantly 149 // re-allocating as the table grows. 150 151 size_t size() const; 152 size_t capacity() const; 153 154 void clear(); 155 156 Row* begin(); 157 Row* end(); 158 const Row* begin() const; 159 const Row* end() const; 160 161 Row& insert(Row&& row); 162 Row& insert(const Row& row); 163 // Inserts a new row. Throws an exception if this would violate the uniqueness constraints of any 164 // of the indexes. 165 166 template <typename Collection> 167 void insertAll(Collection&& collection); 168 template <typename Collection> 169 void insertAll(Collection& collection); 170 // Given an iterable collection of Rows, inserts all of them into this table. If the input is 171 // an rvalue, the rows will be moved rather than copied. 172 // 173 // If an insertion throws (e.g. because it violates a uniqueness constraint of some index), 174 // subsequent insertions do not occur, but previous insertions remain inserted. 175 176 template <typename UpdateFunc> 177 Row& upsert(Row&& row, UpdateFunc&& update); 178 template <typename UpdateFunc> 179 Row& upsert(const Row& row, UpdateFunc&& update); 180 // Tries to insert a new row. However, if a duplicate already exists (according to some index), 181 // then update(Row& existingRow, Row&& newRow) is called to modify the existing row. 182 183 template <typename Index, typename... Params> 184 kj::Maybe<Row&> find(Params&&... params); 185 template <typename Index, typename... Params> 186 kj::Maybe<const Row&> find(Params&&... params) const; 187 // Using the given index, search for a matching row. What parameters are accepted depends on the 188 // index. Not all indexes support this method -- "multimap" indexes may support only range(). 189 190 template <typename Index, typename... Params, typename Func> 191 Row& findOrCreate(Params&&... params, Func&& createFunc); 192 // Like find(), but if the row doesn't exist, call a function to create it. createFunc() must 193 // return `Row` or something that implicitly converts to `Row`. 194 // 195 // NOTE: C++ doesn't actually properly support inferring types of a parameter pack at the 196 // beginning of an argument list, but we define a hack to support it below. Don't worry about 197 // it. 198 199 template <typename Index, typename BeginKey, typename EndKey> 200 auto range(BeginKey&& begin, EndKey&& end); 201 template <typename Index, typename BeginKey, typename EndKey> 202 auto range(BeginKey&& begin, EndKey&& end) const; 203 // Using the given index, look up a range of values, returning an iterable. What parameters are 204 // accepted depends on the index. Not all indexes support this method (in particular, unordered 205 // indexes normally don't). 206 207 template <typename Index> 208 _::TableIterable<Row, Index&> ordered(); 209 template <typename Index> 210 _::TableIterable<const Row, const Index&> ordered() const; 211 // Returns an iterable over the whole table ordered using the given index. Not all indexes 212 // support this method. 213 214 template <typename Index, typename... Params> 215 auto seek(Params&&... params); 216 template <typename Index, typename... Params> 217 auto seek(Params&&... params) const; 218 // Takes same parameters as find(), but returns an iterator at the position where the search 219 // key should go. That is, this returns an iterator that points to the matching entry or, if 220 // there is no matching entry, points at the next entry after the key, in order. Or, if there 221 // is no such entry, the returned iterator is the same as ordered().end(). 222 // 223 // seek() is only supported by indexes that support ordered(). It returns the same kind of 224 // iterator that ordered() uses. 225 226 template <typename Index, typename... Params> 227 bool eraseMatch(Params&&... params); 228 // Erase the row that would be matched by `find<Index>(params)`. Returns true if there was a 229 // match. 230 231 template <typename Index, typename BeginKey, typename EndKey> 232 size_t eraseRange(BeginKey&& begin, EndKey&& end); 233 // Erase the row that would be matched by `range<Index>(params)`. Returns the number of 234 // elements erased. 235 236 void erase(Row& row); 237 // Erase the given row. 238 // 239 // WARNING: This invalidates all iterators, so you can't iterate over rows and erase them this 240 // way. Use `eraseAll()` for that. 241 242 Row release(Row& row); 243 // Remove the given row from the table and return it in one operation. 244 // 245 // WARNING: This invalidates all iterators, so you can't iterate over rows and release them this 246 // way. 247 248 template <typename Predicate, typename = decltype(instance<Predicate>()(instance<Row&>()))> 249 size_t eraseAll(Predicate&& predicate); 250 // Erase all rows for which predicate(row) returns true. This scans over the entire table. 251 252 template <typename Collection, typename = decltype(instance<Collection>().begin()), bool = true> 253 size_t eraseAll(Collection&& collection); 254 // Erase all rows in the given iterable collection of rows. This carefully marks rows for 255 // deletion in a first pass then deletes them in a second. 256 257 template <size_t index = 0, typename... Params> 258 kj::Maybe<Row&> find(Params&&... params); 259 template <size_t index = 0, typename... Params> 260 kj::Maybe<const Row&> find(Params&&... params) const; 261 template <size_t index = 0, typename... Params, typename Func> 262 Row& findOrCreate(Params&&... params, Func&& createFunc); 263 template <size_t index = 0, typename BeginKey, typename EndKey> 264 auto range(BeginKey&& begin, EndKey&& end); 265 template <size_t index = 0, typename BeginKey, typename EndKey> 266 auto range(BeginKey&& begin, EndKey&& end) const; 267 template <size_t index = 0> 268 _::TableIterable<Row, TypeOfIndex<index, Tuple<Indexes...>>&> ordered(); 269 template <size_t index = 0> 270 _::TableIterable<const Row, const TypeOfIndex<index, Tuple<Indexes...>>&> ordered() const; 271 template <size_t index = 0, typename... Params> 272 auto seek(Params&&... params); 273 template <size_t index = 0, typename... Params> 274 auto seek(Params&&... params) const; 275 template <size_t index = 0, typename... Params> 276 bool eraseMatch(Params&&... params); 277 template <size_t index = 0, typename BeginKey, typename EndKey> 278 size_t eraseRange(BeginKey&& begin, EndKey&& end); 279 // Methods which take an index type as a template parameter can also take an index number. This 280 // is useful particularly when you have multiple indexes of the same type but different runtime 281 // properties. Additionally, you can omit the template parameter altogether to use the first 282 // index. 283 284 template <size_t index = 0> 285 void verify(); 286 // Checks the integrity of indexes, throwing an exception if there are any problems. This is 287 // intended to be called within the unit test for an index. 288 289 template <typename Index, typename First, typename... Rest> 290 Row& findOrCreate(First&& first, Rest&&... rest); 291 template <size_t index = 0, typename First, typename... Rest> 292 Row& findOrCreate(First&& first, Rest&&... rest); 293 // HACK: A parameter pack can only be inferred if it lives at the end of the argument list, so 294 // the findOrCreate() definitions from earlier won't actually work. These ones will, but we 295 // have to do some annoying things inside to regroup the arguments. 296 297 private: 298 Vector<Row> rows; 299 Tuple<Indexes...> indexes; 300 301 template <size_t index = 0, bool final = (index >= sizeof...(Indexes))> 302 class Impl; 303 template <typename Func, typename... Params> 304 class FindOrCreateImpl; 305 306 template <typename ParamsTuple, typename... Params> 307 struct FindOrCreateHack; 308 309 void eraseImpl(size_t pos); 310 template <typename Collection> 311 size_t eraseAllImpl(Collection&& collection); 312 }; 313 314 template <typename Callbacks> 315 class HashIndex; 316 // A Table index based on a hash table. 317 // 318 // This implementation: 319 // * Is based on linear probing, not chaining. It is important to use a high-quality hash function. 320 // Use the KJ hashing library if possible. 321 // * Is limited to tables of 2^30 rows or less, mainly to allow for tighter packing with 32-bit 322 // integers instead of 64-bit. 323 // * Caches hash codes so that each table row need only be hashed once, and never checks equality 324 // unless hash codes have already been determined to be equal. 325 // 326 // The `Callbacks` type defines how to compute hash codes and equality. It should be defined like: 327 // 328 // class Callbacks { 329 // public: 330 // // In this interface, `SearchParams...` means whatever parameters you want to support in 331 // // a call to table.find(...). By overloading the calls to support various inputs, you can 332 // // affect what table.find(...) accepts. 333 // 334 // SearchParam& keyForRow(const Row& row); 335 // // Given a row of the table, return the SearchParams that might be passed to the other 336 // // methods to match this row. 337 // 338 // bool matches(const Row&, SearchParams&&...) const; 339 // // Returns true if the row on the left matches thes search params on the right. 340 // 341 // uint hashCode(SearchParams&&...) const; 342 // // Computes the hash code of the given search params. Matching rows (as determined by 343 // // matches()) must have the same hash code. Non-matching rows should have different hash 344 // // codes, to the maximum extent possible. Non-matching rows with the same hash code hurt 345 // // performance. 346 // }; 347 // 348 // If your `Callbacks` type has dynamic state, you may pass its constructor parameters as the 349 // constructor parameters to `HashIndex`. 350 351 template <typename Callbacks> 352 class TreeIndex; 353 // A Table index based on a B-tree. 354 // 355 // This allows sorted iteration over rows. 356 // 357 // The `Callbacks` type defines how to compare rows. It should be defined like: 358 // 359 // class Callbacks { 360 // public: 361 // // In this interface, `SearchParams...` means whatever parameters you want to support in 362 // // a call to table.find(...). By overloading the calls to support various inputs, you can 363 // // affect what table.find(...) accepts. 364 // 365 // SearchParam& keyForRow(const Row& row); 366 // // Given a row of the table, return the SearchParams that might be passed to the other 367 // // methods to match this row. 368 // 369 // bool isBefore(const Row&, SearchParams&&...) const; 370 // // Returns true if the row on the left comes before the search params on the right. 371 // 372 // bool matches(const Row&, SearchParams&&...) const; 373 // // Returns true if the row "matches" the search params. 374 // }; 375 376 // ======================================================================================= 377 // inline implementation details 378 379 namespace _ { // private 380 381 KJ_NORETURN(void throwDuplicateTableRow()); 382 383 template <typename Dst, typename Src, typename = decltype(instance<Src>().size())> 384 inline void tryReserveSize(Dst& dst, Src&& src) { dst.reserve(dst.size() + src.size()); } 385 template <typename... Params> 386 inline void tryReserveSize(Params&&...) {} 387 // If `src` has a `.size()` method, call dst.reserve(dst.size() + src.size()). 388 // Otherwise, do nothing. 389 390 template <typename Row> 391 class TableMapping { 392 public: 393 TableMapping(Row* table): table(table) {} 394 Row& map(size_t i) const { return table[i]; } 395 396 private: 397 Row* table; 398 }; 399 400 template <typename Row> 401 class TableUnmapping { 402 public: 403 TableUnmapping(Row* table): table(table) {} 404 size_t map(Row& row) const { return &row - table; } 405 size_t map(Row* row) const { return row - table; } 406 407 private: 408 Row* table; 409 }; 410 411 template <typename Iterator> 412 class IterRange { 413 public: 414 inline IterRange(Iterator b, Iterator e): b(b), e(e) {} 415 416 inline Iterator begin() const { return b; } 417 inline Iterator end() const { return e; } 418 private: 419 Iterator b; 420 Iterator e; 421 }; 422 423 template <typename Iterator> 424 inline IterRange<Decay<Iterator>> iterRange(Iterator b, Iterator e) { 425 return { b, e }; 426 } 427 428 } // namespace _ (private) 429 430 template <typename Row, typename... Indexes> 431 template <size_t index> 432 class Table<Row, Indexes...>::Impl<index, false> { 433 public: 434 static void reserve(Table<Row, Indexes...>& table, size_t size) { 435 get<index>(table.indexes).reserve(size); 436 Impl<index + 1>::reserve(table, size); 437 } 438 439 static void clear(Table<Row, Indexes...>& table) { 440 get<index>(table.indexes).clear(); 441 Impl<index + 1>::clear(table); 442 } 443 444 static kj::Maybe<size_t> insert(Table<Row, Indexes...>& table, size_t pos, Row& row, uint skip) { 445 if (skip == index) { 446 return Impl<index + 1>::insert(table, pos, row, skip); 447 } 448 auto& indexObj = get<index>(table.indexes); 449 KJ_IF_MAYBE(existing, indexObj.insert(table.rows.asPtr(), pos, indexObj.keyForRow(row))) { 450 return *existing; 451 } 452 453 bool success = false; 454 KJ_DEFER(if (!success) { 455 indexObj.erase(table.rows.asPtr(), pos, indexObj.keyForRow(row)); 456 }); 457 auto result = Impl<index + 1>::insert(table, pos, row, skip); 458 success = result == nullptr; 459 return result; 460 } 461 462 static void erase(Table<Row, Indexes...>& table, size_t pos, Row& row) { 463 auto& indexObj = get<index>(table.indexes); 464 indexObj.erase(table.rows.asPtr(), pos, indexObj.keyForRow(row)); 465 Impl<index + 1>::erase(table, pos, row); 466 } 467 468 static void move(Table<Row, Indexes...>& table, size_t oldPos, size_t newPos, Row& row) { 469 auto& indexObj = get<index>(table.indexes); 470 indexObj.move(table.rows.asPtr(), oldPos, newPos, indexObj.keyForRow(row)); 471 Impl<index + 1>::move(table, oldPos, newPos, row); 472 } 473 }; 474 475 template <typename Row, typename... Indexes> 476 template <size_t index> 477 class Table<Row, Indexes...>::Impl<index, true> { 478 public: 479 static void reserve(Table<Row, Indexes...>& table, size_t size) {} 480 static void clear(Table<Row, Indexes...>& table) {} 481 static kj::Maybe<size_t> insert(Table<Row, Indexes...>& table, size_t pos, Row& row, uint skip) { 482 return nullptr; 483 } 484 static void erase(Table<Row, Indexes...>& table, size_t pos, Row& row) {} 485 static void move(Table<Row, Indexes...>& table, size_t oldPos, size_t newPos, Row& row) {} 486 }; 487 488 template <typename Row, typename... Indexes> 489 Table<Row, Indexes...>::Table() {} 490 491 template <typename Row, typename... Indexes> 492 Table<Row, Indexes...>::Table(Indexes&&... indexes) 493 : indexes(tuple(kj::fwd<Indexes&&>(indexes)...)) {} 494 495 template <typename Row, typename... Indexes> 496 void Table<Row, Indexes...>::reserve(size_t size) { 497 rows.reserve(size); 498 Impl<>::reserve(*this, size); 499 } 500 501 template <typename Row, typename... Indexes> 502 size_t Table<Row, Indexes...>::size() const { 503 return rows.size(); 504 } 505 template <typename Row, typename... Indexes> 506 void Table<Row, Indexes...>::clear() { 507 Impl<>::clear(*this); 508 rows.clear(); 509 } 510 template <typename Row, typename... Indexes> 511 size_t Table<Row, Indexes...>::capacity() const { 512 return rows.capacity(); 513 } 514 515 template <typename Row, typename... Indexes> 516 Row* Table<Row, Indexes...>::begin() { 517 return rows.begin(); 518 } 519 template <typename Row, typename... Indexes> 520 Row* Table<Row, Indexes...>::end() { 521 return rows.end(); 522 } 523 template <typename Row, typename... Indexes> 524 const Row* Table<Row, Indexes...>::begin() const { 525 return rows.begin(); 526 } 527 template <typename Row, typename... Indexes> 528 const Row* Table<Row, Indexes...>::end() const { 529 return rows.end(); 530 } 531 532 template <typename Row, typename... Indexes> 533 Row& Table<Row, Indexes...>::insert(Row&& row) { 534 KJ_IF_MAYBE(existing, Impl<>::insert(*this, rows.size(), row, kj::maxValue)) { 535 _::throwDuplicateTableRow(); 536 } else { 537 return rows.add(kj::mv(row)); 538 } 539 } 540 template <typename Row, typename... Indexes> 541 Row& Table<Row, Indexes...>::insert(const Row& row) { 542 return insert(kj::cp(row)); 543 } 544 545 template <typename Row, typename... Indexes> 546 template <typename Collection> 547 void Table<Row, Indexes...>::insertAll(Collection&& collection) { 548 _::tryReserveSize(*this, collection); 549 for (auto& row: collection) { 550 insert(kj::mv(row)); 551 } 552 } 553 554 template <typename Row, typename... Indexes> 555 template <typename Collection> 556 void Table<Row, Indexes...>::insertAll(Collection& collection) { 557 _::tryReserveSize(*this, collection); 558 for (auto& row: collection) { 559 insert(row); 560 } 561 } 562 563 template <typename Row, typename... Indexes> 564 template <typename UpdateFunc> 565 Row& Table<Row, Indexes...>::upsert(Row&& row, UpdateFunc&& update) { 566 KJ_IF_MAYBE(existing, Impl<>::insert(*this, rows.size(), row, kj::maxValue)) { 567 update(rows[*existing], kj::mv(row)); 568 return rows[*existing]; 569 } else { 570 return rows.add(kj::mv(row)); 571 } 572 } 573 template <typename Row, typename... Indexes> 574 template <typename UpdateFunc> 575 Row& Table<Row, Indexes...>::upsert(const Row& row, UpdateFunc&& update) { 576 return upsert(kj::cp(row), kj::fwd<UpdateFunc>(update)); 577 } 578 579 template <typename Row, typename... Indexes> 580 template <typename Index, typename... Params> 581 kj::Maybe<Row&> Table<Row, Indexes...>::find(Params&&... params) { 582 return find<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...); 583 } 584 template <typename Row, typename... Indexes> 585 template <size_t index, typename... Params> 586 kj::Maybe<Row&> Table<Row, Indexes...>::find(Params&&... params) { 587 KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) { 588 return rows[*pos]; 589 } else { 590 return nullptr; 591 } 592 } 593 template <typename Row, typename... Indexes> 594 template <typename Index, typename... Params> 595 kj::Maybe<const Row&> Table<Row, Indexes...>::find(Params&&... params) const { 596 return find<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...); 597 } 598 template <typename Row, typename... Indexes> 599 template <size_t index, typename... Params> 600 kj::Maybe<const Row&> Table<Row, Indexes...>::find(Params&&... params) const { 601 KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) { 602 return rows[*pos]; 603 } else { 604 return nullptr; 605 } 606 } 607 608 template <typename Row, typename... Indexes> 609 template <typename Func, typename... Params> 610 class Table<Row, Indexes...>::FindOrCreateImpl { 611 public: 612 template <size_t index> 613 static Row& apply(Table<Row, Indexes...>& table, Params&&... params, Func&& createFunc) { 614 auto pos = table.rows.size(); 615 KJ_IF_MAYBE(existing, get<index>(table.indexes).insert(table.rows.asPtr(), pos, params...)) { 616 return table.rows[*existing]; 617 } else { 618 bool success = false; 619 KJ_DEFER({ 620 if (!success) { 621 get<index>(table.indexes).erase(table.rows.asPtr(), pos, params...); 622 } 623 }); 624 auto& newRow = table.rows.add(createFunc()); 625 KJ_DEFER({ 626 if (!success) { 627 table.rows.removeLast(); 628 } 629 }); 630 if (Table<Row, Indexes...>::template Impl<>::insert(table, pos, newRow, index) == nullptr) { 631 success = true; 632 } else { 633 _::throwDuplicateTableRow(); 634 } 635 return newRow; 636 } 637 } 638 }; 639 640 template <typename Row, typename... Indexes> 641 template <typename... T, typename U, typename V, typename... W> 642 struct Table<Row, Indexes...>::FindOrCreateHack<_::Tuple<T...>, U, V, W...> 643 : public FindOrCreateHack<_::Tuple<T..., U>, V, W...> {}; 644 template <typename Row, typename... Indexes> 645 template <typename... T, typename U> 646 struct Table<Row, Indexes...>::FindOrCreateHack<_::Tuple<T...>, U> 647 : public FindOrCreateImpl<U, T...> {}; 648 // This awful hack works around C++'s lack of support for parameter packs anywhere other than at 649 // the end of an argument list. We accumulate all of the types except for the last one into a 650 // Tuple, then forward to FindOrCreateImpl with the last parameter as the Func. 651 652 template <typename Row, typename... Indexes> 653 template <typename Index, typename First, typename... Rest> 654 Row& Table<Row, Indexes...>::findOrCreate(First&& first, Rest&&... rest) { 655 return findOrCreate<indexOfType<Index, Tuple<Indexes...>>()>( 656 kj::fwd<First>(first), kj::fwd<Rest>(rest)...); 657 } 658 template <typename Row, typename... Indexes> 659 template <size_t index, typename First, typename... Rest> 660 Row& Table<Row, Indexes...>::findOrCreate(First&& first, Rest&&... rest) { 661 return FindOrCreateHack<_::Tuple<>, First, Rest...>::template apply<index>( 662 *this, kj::fwd<First>(first), kj::fwd<Rest>(rest)...); 663 } 664 665 template <typename Row, typename... Indexes> 666 template <typename Index, typename BeginKey, typename EndKey> 667 auto Table<Row, Indexes...>::range(BeginKey&& begin, EndKey&& end) { 668 return range<indexOfType<Index, Tuple<Indexes...>>()>( 669 kj::fwd<BeginKey>(begin), kj::fwd<EndKey>(end)); 670 } 671 template <typename Row, typename... Indexes> 672 template <size_t index, typename BeginKey, typename EndKey> 673 auto Table<Row, Indexes...>::range(BeginKey&& begin, EndKey&& end) { 674 auto inner = _::iterRange(get<index>(indexes).seek(rows.asPtr(), kj::fwd<BeginKey>(begin)), 675 get<index>(indexes).seek(rows.asPtr(), kj::fwd<EndKey>(end))); 676 return _::TableIterable<Row, decltype(inner)>(kj::mv(inner), rows.begin()); 677 } 678 template <typename Row, typename... Indexes> 679 template <typename Index, typename BeginKey, typename EndKey> 680 auto Table<Row, Indexes...>::range(BeginKey&& begin, EndKey&& end) const { 681 return range<indexOfType<Index, Tuple<Indexes...>>()>( 682 kj::fwd<BeginKey>(begin), kj::fwd<EndKey>(end)); 683 } 684 template <typename Row, typename... Indexes> 685 template <size_t index, typename BeginKey, typename EndKey> 686 auto Table<Row, Indexes...>::range(BeginKey&& begin, EndKey&& end) const { 687 auto inner = _::iterRange(get<index>(indexes).seek(rows.asPtr(), kj::fwd<BeginKey>(begin)), 688 get<index>(indexes).seek(rows.asPtr(), kj::fwd<EndKey>(end))); 689 return _::TableIterable<const Row, decltype(inner)>(kj::mv(inner), rows.begin()); 690 } 691 692 template <typename Row, typename... Indexes> 693 template <typename Index> 694 _::TableIterable<Row, Index&> Table<Row, Indexes...>::ordered() { 695 return ordered<indexOfType<Index, Tuple<Indexes...>>()>(); 696 } 697 template <typename Row, typename... Indexes> 698 template <size_t index> 699 _::TableIterable<Row, TypeOfIndex<index, Tuple<Indexes...>>&> Table<Row, Indexes...>::ordered() { 700 return { get<index>(indexes), rows.begin() }; 701 } 702 template <typename Row, typename... Indexes> 703 template <typename Index> 704 _::TableIterable<const Row, const Index&> Table<Row, Indexes...>::ordered() const { 705 return ordered<indexOfType<Index, Tuple<Indexes...>>()>(); 706 } 707 template <typename Row, typename... Indexes> 708 template <size_t index> 709 _::TableIterable<const Row, const TypeOfIndex<index, Tuple<Indexes...>>&> 710 Table<Row, Indexes...>::ordered() const { 711 return { get<index>(indexes), rows.begin() }; 712 } 713 714 template <typename Row, typename... Indexes> 715 template <typename Index, typename... Params> 716 auto Table<Row, Indexes...>::seek(Params&&... params) { 717 return seek<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...); 718 } 719 template <typename Row, typename... Indexes> 720 template <size_t index, typename... Params> 721 auto Table<Row, Indexes...>::seek(Params&&... params) { 722 auto inner = get<index>(indexes).seek(rows.asPtr(), kj::fwd<Params>(params)...); 723 return _::TableIterator<Row, decltype(inner)>(kj::mv(inner), rows.begin()); 724 } 725 template <typename Row, typename... Indexes> 726 template <typename Index, typename... Params> 727 auto Table<Row, Indexes...>::seek(Params&&... params) const { 728 return seek<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...); 729 } 730 template <typename Row, typename... Indexes> 731 template <size_t index, typename... Params> 732 auto Table<Row, Indexes...>::seek(Params&&... params) const { 733 auto inner = get<index>(indexes).seek(rows.asPtr(), kj::fwd<Params>(params)...); 734 return _::TableIterator<Row, decltype(inner)>(kj::mv(inner), rows.begin()); 735 } 736 737 template <typename Row, typename... Indexes> 738 template <typename Index, typename... Params> 739 bool Table<Row, Indexes...>::eraseMatch(Params&&... params) { 740 return eraseMatch<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...); 741 } 742 template <typename Row, typename... Indexes> 743 template <size_t index, typename... Params> 744 bool Table<Row, Indexes...>::eraseMatch(Params&&... params) { 745 KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) { 746 eraseImpl(*pos); 747 return true; 748 } else { 749 return false; 750 } 751 } 752 753 template <typename Row, typename... Indexes> 754 template <typename Index, typename BeginKey, typename EndKey> 755 size_t Table<Row, Indexes...>::eraseRange(BeginKey&& begin, EndKey&& end) { 756 return eraseRange<indexOfType<Index, Tuple<Indexes...>>()>( 757 kj::fwd<BeginKey>(begin), kj::fwd<EndKey>(end)); 758 } 759 template <typename Row, typename... Indexes> 760 template <size_t index, typename BeginKey, typename EndKey> 761 size_t Table<Row, Indexes...>::eraseRange(BeginKey&& begin, EndKey&& end) { 762 auto inner = _::iterRange(get<index>(indexes).seek(rows.asPtr(), kj::fwd<BeginKey>(begin)), 763 get<index>(indexes).seek(rows.asPtr(), kj::fwd<EndKey>(end))); 764 return eraseAllImpl(inner); 765 } 766 767 template <typename Row, typename... Indexes> 768 template <size_t index> 769 void Table<Row, Indexes...>::verify() { 770 get<index>(indexes).verify(rows.asPtr()); 771 } 772 773 template <typename Row, typename... Indexes> 774 void Table<Row, Indexes...>::erase(Row& row) { 775 KJ_TABLE_IREQUIRE(&row >= rows.begin() && &row < rows.end(), "row is not a member of this table"); 776 eraseImpl(&row - rows.begin()); 777 } 778 template <typename Row, typename... Indexes> 779 void Table<Row, Indexes...>::eraseImpl(size_t pos) { 780 Impl<>::erase(*this, pos, rows[pos]); 781 size_t back = rows.size() - 1; 782 if (pos != back) { 783 Impl<>::move(*this, back, pos, rows[back]); 784 rows[pos] = kj::mv(rows[back]); 785 } 786 rows.removeLast(); 787 } 788 789 template <typename Row, typename... Indexes> 790 Row Table<Row, Indexes...>::release(Row& row) { 791 KJ_TABLE_IREQUIRE(&row >= rows.begin() && &row < rows.end(), "row is not a member of this table"); 792 size_t pos = &row - rows.begin(); 793 Impl<>::erase(*this, pos, row); 794 Row result = kj::mv(row); 795 size_t back = rows.size() - 1; 796 if (pos != back) { 797 Impl<>::move(*this, back, pos, rows[back]); 798 row = kj::mv(rows[back]); 799 } 800 rows.removeLast(); 801 return result; 802 } 803 804 template <typename Row, typename... Indexes> 805 template <typename Predicate, typename> 806 size_t Table<Row, Indexes...>::eraseAll(Predicate&& predicate) { 807 size_t count = 0; 808 for (size_t i = 0; i < rows.size();) { 809 if (predicate(rows[i])) { 810 eraseImpl(i); 811 ++count; 812 // eraseImpl() replaces the erased row with the last row, so don't increment i here; repeat 813 // with the same i. 814 } else { 815 ++i; 816 } 817 } 818 return count; 819 } 820 821 template <typename Row, typename... Indexes> 822 template <typename Collection, typename, bool> 823 size_t Table<Row, Indexes...>::eraseAll(Collection&& collection) { 824 return eraseAllImpl(MappedIterable<Collection&, _::TableUnmapping<Row>>( 825 collection, rows.begin())); 826 } 827 828 template <typename Row, typename... Indexes> 829 template <typename Collection> 830 size_t Table<Row, Indexes...>::eraseAllImpl(Collection&& collection) { 831 // We need to transform the collection of row numbers into a sequence of erasures, accounting 832 // for the fact that each erasure re-positions the last row into its slot. 833 Vector<size_t> erased; 834 _::tryReserveSize(erased, collection); 835 for (size_t pos: collection) { 836 while (pos >= rows.size() - erased.size()) { 837 // Oops, the next item to be erased is already scheduled to be moved to a different location 838 // due to a previous erasure. Figure out where it will be at this point. 839 size_t erasureNumber = rows.size() - pos - 1; 840 pos = erased[erasureNumber]; 841 } 842 erased.add(pos); 843 } 844 845 // Now we can execute the sequence of erasures. 846 for (size_t pos: erased) { 847 eraseImpl(pos); 848 } 849 850 return erased.size(); 851 } 852 853 // ----------------------------------------------------------------------------- 854 // Hash table index 855 856 namespace _ { // private 857 858 void logHashTableInconsistency(); 859 860 struct HashBucket { 861 uint hash; 862 uint value; 863 864 HashBucket() = default; 865 HashBucket(uint hash, uint pos) 866 : hash(hash), value(pos + 2) {} 867 868 inline bool isEmpty() const { return value == 0; } 869 inline bool isErased() const { return value == 1; } 870 inline bool isOccupied() const { return value >= 2; } 871 template <typename Row> 872 inline Row& getRow(ArrayPtr<Row> table) const { return table[getPos()]; } 873 template <typename Row> 874 inline const Row& getRow(ArrayPtr<const Row> table) const { return table[getPos()]; } 875 inline bool isPos(uint pos) const { return pos + 2 == value; } 876 inline uint getPos() const { 877 KJ_TABLE_IASSERT(value >= 2); 878 return value - 2; 879 } 880 inline void setEmpty() { value = 0; } 881 inline void setErased() { value = 1; } 882 inline void setPos(uint pos) { value = pos + 2; } 883 }; 884 885 inline size_t probeHash(const kj::Array<HashBucket>& buckets, size_t i) { 886 // TODO(perf): Is linear probing OK or should we do something fancier? 887 if (++i == buckets.size()) { 888 return 0; 889 } else { 890 return i; 891 } 892 } 893 894 kj::Array<HashBucket> rehash(kj::ArrayPtr<const HashBucket> oldBuckets, size_t targetSize); 895 896 uint chooseBucket(uint hash, uint count); 897 898 } // namespace _ (private) 899 900 template <typename Callbacks> 901 class HashIndex { 902 public: 903 HashIndex() = default; 904 template <typename... Params> 905 HashIndex(Params&&... params): cb(kj::fwd<Params>(params)...) {} 906 907 size_t capacity() { 908 // This method is for testing. 909 return buckets.size(); 910 } 911 912 void reserve(size_t size) { 913 if (buckets.size() < size * 2) { 914 rehash(size); 915 } 916 } 917 918 void clear() { 919 erasedCount = 0; 920 memset(buckets.begin(), 0, buckets.asBytes().size()); 921 } 922 923 template <typename Row> 924 decltype(auto) keyForRow(Row&& row) const { 925 return cb.keyForRow(kj::fwd<Row>(row)); 926 } 927 928 template <typename Row, typename... Params> 929 kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) { 930 if (buckets.size() * 2 < (table.size() + 1 + erasedCount) * 3) { 931 // Load factor is more than 2/3, let's rehash so that it's 1/3, i.e. double the buckets. 932 // Note that rehashing also cleans up erased entries, so we may not actually be doubling if 933 // there are a lot of erasures. Nevertheless, this gives us amortized constant time -- it 934 // would take at least O(table.size()) more insertions (whether or not erasures occur) 935 // before another rehash is needed. 936 rehash((table.size() + 1) * 3); 937 } 938 939 uint hashCode = cb.hashCode(params...); 940 Maybe<_::HashBucket&> erasedSlot; 941 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) { 942 auto& bucket = buckets[i]; 943 if (bucket.isEmpty()) { 944 // no duplicates found 945 KJ_IF_MAYBE(s, erasedSlot) { 946 --erasedCount; 947 *s = { hashCode, uint(pos) }; 948 } else { 949 bucket = { hashCode, uint(pos) }; 950 } 951 return nullptr; 952 } else if (bucket.isErased()) { 953 // We can fill in the erased slot. However, we have to keep searching to make sure there 954 // are no duplicates before we do that. 955 if (erasedSlot == nullptr) { 956 erasedSlot = bucket; 957 } 958 } else if (bucket.hash == hashCode && 959 cb.matches(bucket.getRow(table), params...)) { 960 // duplicate row 961 return size_t(bucket.getPos()); 962 } 963 } 964 } 965 966 template <typename Row, typename... Params> 967 void erase(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) { 968 uint hashCode = cb.hashCode(params...); 969 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) { 970 auto& bucket = buckets[i]; 971 if (bucket.isPos(pos)) { 972 // found it 973 ++erasedCount; 974 bucket.setErased(); 975 return; 976 } else if (bucket.isEmpty()) { 977 // can't find the bucket, something is very wrong 978 _::logHashTableInconsistency(); 979 return; 980 } 981 } 982 } 983 984 template <typename Row, typename... Params> 985 void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, Params&&... params) { 986 uint hashCode = cb.hashCode(params...); 987 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) { 988 auto& bucket = buckets[i]; 989 if (bucket.isPos(oldPos)) { 990 // found it 991 bucket.setPos(newPos); 992 return; 993 } else if (bucket.isEmpty()) { 994 // can't find the bucket, something is very wrong 995 _::logHashTableInconsistency(); 996 return; 997 } 998 } 999 } 1000 1001 template <typename Row, typename... Params> 1002 Maybe<size_t> find(kj::ArrayPtr<Row> table, Params&&... params) const { 1003 if (buckets.size() == 0) return nullptr; 1004 1005 uint hashCode = cb.hashCode(params...); 1006 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) { 1007 auto& bucket = buckets[i]; 1008 if (bucket.isEmpty()) { 1009 // not found. 1010 return nullptr; 1011 } else if (bucket.isErased()) { 1012 // skip, keep searching 1013 } else if (bucket.hash == hashCode && 1014 cb.matches(bucket.getRow(table), params...)) { 1015 // found 1016 return size_t(bucket.getPos()); 1017 } 1018 } 1019 } 1020 1021 // No begin() nor end() because hash tables are not usefully ordered. 1022 1023 private: 1024 Callbacks cb; 1025 size_t erasedCount = 0; 1026 Array<_::HashBucket> buckets; 1027 1028 void rehash(size_t targetSize) { 1029 buckets = _::rehash(buckets, targetSize); 1030 erasedCount = 0; 1031 } 1032 }; 1033 1034 // ----------------------------------------------------------------------------- 1035 // BTree index 1036 1037 namespace _ { // private 1038 1039 KJ_ALWAYS_INLINE(void compilerBarrier()); 1040 void compilerBarrier() { 1041 // Make sure that reads occurring before this call cannot be re-ordered to happen after 1042 // writes that occur after this call. We need this in a couple places below to prevent C++ 1043 // strict aliasing rules from breaking things. 1044 #if _MSC_VER 1045 _ReadWriteBarrier(); 1046 #else 1047 __asm__ __volatile__("": : :"memory"); 1048 #endif 1049 } 1050 1051 template <typename T> 1052 inline void acopy(T* to, T* from, size_t size) { memcpy(to, from, size * sizeof(T)); } 1053 template <typename T> 1054 inline void amove(T* to, T* from, size_t size) { memmove(to, from, size * sizeof(T)); } 1055 template <typename T> 1056 inline void azero(T* ptr, size_t size) { memset(ptr, 0, size * sizeof(T)); } 1057 // memcpy/memmove/memset variants that count size in elements, not bytes. 1058 // 1059 // TODO(cleanup): These are generally useful, put them somewhere. 1060 1061 class BTreeImpl { 1062 public: 1063 class Iterator; 1064 class MaybeUint; 1065 struct NodeUnion; 1066 struct Leaf; 1067 struct Parent; 1068 struct Freelisted; 1069 1070 class SearchKey { 1071 // Passed to methods that need to search the tree. This class allows most of the B-tree 1072 // implementation to be kept out of templates, avoiding code bloat, at the cost of some 1073 // performance trade-off. In order to lessen the performance cost of virtual calls, we design 1074 // this interface so that it only needs to be called once per tree node, rather than once per 1075 // comparison. 1076 1077 public: 1078 virtual uint search(const Parent& parent) const = 0; 1079 virtual uint search(const Leaf& leaf) const = 0; 1080 // Binary search for the first key/row in the parent/leaf that is equal to or comes after the 1081 // search key. 1082 1083 virtual bool isAfter(uint rowIndex) const = 0; 1084 // Returns true if the key comes after the value in the given row. 1085 }; 1086 1087 BTreeImpl(); 1088 ~BTreeImpl() noexcept(false); 1089 1090 KJ_DISALLOW_COPY(BTreeImpl); 1091 BTreeImpl(BTreeImpl&& other); 1092 BTreeImpl& operator=(BTreeImpl&& other); 1093 1094 void logInconsistency() const; 1095 1096 void reserve(size_t size); 1097 1098 void clear(); 1099 1100 Iterator begin() const; 1101 Iterator end() const; 1102 1103 Iterator search(const SearchKey& searchKey) const; 1104 // Find the "first" row (in sorted order) for which searchKey.isAfter(rowNumber) returns true. 1105 1106 Iterator insert(const SearchKey& searchKey); 1107 // Like search() but ensures that there is room in the leaf node to insert a new row. 1108 1109 void erase(uint row, const SearchKey& searchKey); 1110 // Erase the given row number from the tree. searchKey.isAfter() returns true for the given row 1111 // and all rows after it. 1112 1113 void renumber(uint oldRow, uint newRow, const SearchKey& searchKey); 1114 // Renumber the given row from oldRow to newRow. searchKey.isAfter() returns true for oldRow and 1115 // all rows after it. (It will not be called on newRow.) 1116 1117 void verify(size_t size, FunctionParam<bool(uint, uint)>); 1118 1119 private: 1120 NodeUnion* tree; // allocated with aligned_alloc aligned to cache lines 1121 uint treeCapacity; 1122 uint height; // height of *parent* tree -- does not include the leaf level 1123 uint freelistHead; 1124 uint freelistSize; 1125 uint beginLeaf; 1126 uint endLeaf; 1127 void growTree(uint minCapacity = 0); 1128 1129 template <typename T> 1130 struct AllocResult; 1131 1132 template <typename T> 1133 inline AllocResult<T> alloc(); 1134 inline void free(uint pos); 1135 1136 inline uint split(Parent& src, uint srcPos, Parent& dst, uint dstPos); 1137 inline uint split(Leaf& dst, uint dstPos, Leaf& src, uint srcPos); 1138 inline void merge(Parent& dst, uint dstPos, uint pivot, Parent& src); 1139 inline void merge(Leaf& dst, uint dstPos, uint pivot, Leaf& src); 1140 inline void move(Parent& dst, uint dstPos, Parent& src); 1141 inline void move(Leaf& dst, uint dstPos, Leaf& src); 1142 inline void rotateLeft( 1143 Parent& left, Parent& right, Parent& parent, uint indexInParent, MaybeUint*& fixup); 1144 inline void rotateLeft( 1145 Leaf& left, Leaf& right, Parent& parent, uint indexInParent, MaybeUint*& fixup); 1146 inline void rotateRight(Parent& left, Parent& right, Parent& parent, uint indexInParent); 1147 inline void rotateRight(Leaf& left, Leaf& right, Parent& parent, uint indexInParent); 1148 1149 template <typename Node> 1150 inline Node& insertHelper(const SearchKey& searchKey, 1151 Node& node, Parent* parent, uint indexInParent, uint pos); 1152 1153 template <typename Node> 1154 inline Node& eraseHelper( 1155 Node& node, Parent* parent, uint indexInParent, uint pos, MaybeUint*& fixup); 1156 1157 size_t verifyNode(size_t size, FunctionParam<bool(uint, uint)>&, 1158 uint pos, uint height, MaybeUint maxRow); 1159 1160 static const NodeUnion EMPTY_NODE; 1161 }; 1162 1163 class BTreeImpl::MaybeUint { 1164 // A nullable uint, using the value zero to mean null and shifting all other values up by 1. 1165 public: 1166 MaybeUint() = default; 1167 inline MaybeUint(uint i): i(i - 1) {} 1168 inline MaybeUint(decltype(nullptr)): i(0) {} 1169 1170 inline bool operator==(decltype(nullptr)) const { return i == 0; } 1171 inline bool operator==(uint j) const { return i == j + 1; } 1172 inline bool operator==(const MaybeUint& other) const { return i == other.i; } 1173 inline bool operator!=(decltype(nullptr)) const { return i != 0; } 1174 inline bool operator!=(uint j) const { return i != j + 1; } 1175 inline bool operator!=(const MaybeUint& other) const { return i != other.i; } 1176 1177 inline MaybeUint& operator=(decltype(nullptr)) { i = 0; return *this; } 1178 inline MaybeUint& operator=(uint j) { i = j + 1; return *this; } 1179 1180 inline uint operator*() const { KJ_TABLE_IREQUIRE(i != 0); return i - 1; } 1181 1182 template <typename Func> 1183 inline bool check(Func& func) const { return i != 0 && func(i - 1); } 1184 // Equivalent to *this != nullptr && func(**this) 1185 1186 kj::String toString() const; 1187 1188 private: 1189 uint i; 1190 }; 1191 1192 struct BTreeImpl::Leaf { 1193 uint next; 1194 uint prev; 1195 // Pointers to next and previous nodes at the same level, used for fast iteration. 1196 1197 static constexpr size_t NROWS = 14; 1198 MaybeUint rows[NROWS]; 1199 // Pointers to table rows, offset by 1 so that 0 is an empty value. 1200 1201 inline bool isFull() const; 1202 inline bool isMostlyFull() const; 1203 inline bool isHalfFull() const; 1204 1205 inline void insert(uint i, uint newRow) { 1206 KJ_TABLE_IREQUIRE(rows[Leaf::NROWS - 1] == nullptr); // check not full 1207 1208 amove(rows + i + 1, rows + i, Leaf::NROWS - (i + 1)); 1209 rows[i] = newRow; 1210 } 1211 1212 inline void erase(uint i) { 1213 KJ_TABLE_IREQUIRE(rows[0] != nullptr); // check not empty 1214 1215 amove(rows + i, rows + i + 1, Leaf::NROWS - (i + 1)); 1216 rows[Leaf::NROWS - 1] = nullptr; 1217 } 1218 1219 inline uint size() const { 1220 static_assert(Leaf::NROWS == 14, "logic here needs updating"); 1221 1222 // Binary search for first empty element in `rows`, or return 14 if no empty elements. We do 1223 // this in a branch-free manner. Since there are 15 possible results (0 through 14, inclusive), 1224 // this isn't a perfectly balanced binary search. We carefully choose the split points so that 1225 // there's no way we'll try to dereference row[14] or later (which would be a buffer overflow). 1226 uint i = (rows[6] != nullptr) * 7; 1227 i += (rows[i + 3] != nullptr) * 4; 1228 i += (rows[i + 1] != nullptr) * 2; 1229 i += (rows[i ] != nullptr); 1230 return i; 1231 } 1232 1233 template <typename Func> 1234 inline uint binarySearch(Func& predicate) const { 1235 // Binary search to find first row for which predicate(row) is false. 1236 1237 static_assert(Leaf::NROWS == 14, "logic here needs updating"); 1238 1239 // See comments in size(). 1240 uint i = (rows[6].check(predicate)) * 7; 1241 i += (rows[i + 3].check(predicate)) * 4; 1242 i += (rows[i + 1].check(predicate)) * 2; 1243 if (i != 6) { // don't redundantly check row 6 1244 i += (rows[i ].check(predicate)); 1245 } 1246 return i; 1247 } 1248 }; 1249 1250 struct BTreeImpl::Parent { 1251 uint unused; 1252 // Not used. May be arbitrarily non-zero due to overlap with Freelisted::nextOffset. 1253 1254 static constexpr size_t NKEYS = 7; 1255 MaybeUint keys[NKEYS]; 1256 // Pointers to table rows, offset by 1 so that 0 is an empty value. 1257 // 1258 // Each keys[i] specifies the table row which is the "last" row found under children[i]. 1259 // 1260 // Note that `keys` has size 7 but `children` has size 8. `children[8]`'s "last row" is not 1261 // recorded here, because the Parent's Parent records it instead. (Or maybe the Parent's Parent's 1262 // Parent, if this Parent is `children[8]` of its own Parent. And so on.) 1263 1264 static constexpr size_t NCHILDREN = NKEYS + 1; 1265 uint children[NCHILDREN]; 1266 // Pointers to children. Not offset because the root is always at position 0, and a pointer 1267 // to the root would be nonsensical. 1268 1269 inline bool isFull() const; 1270 inline bool isMostlyFull() const; 1271 inline bool isHalfFull() const; 1272 inline void initRoot(uint key, uint leftChild, uint rightChild); 1273 inline void insertAfter(uint i, uint splitKey, uint child); 1274 inline void eraseAfter(uint i); 1275 1276 inline uint keyCount() const { 1277 static_assert(Parent::NKEYS == 7, "logic here needs updating"); 1278 1279 // Binary search for first empty element in `keys`, or return 7 if no empty elements. We do 1280 // this in a branch-free manner. Since there are 8 possible results (0 through 7, inclusive), 1281 // this is a perfectly balanced binary search. 1282 uint i = (keys[3] != nullptr) * 4; 1283 i += (keys[i + 1] != nullptr) * 2; 1284 i += (keys[i ] != nullptr); 1285 return i; 1286 } 1287 1288 template <typename Func> 1289 inline uint binarySearch(Func& predicate) const { 1290 // Binary search to find first key for which predicate(key) is false. 1291 1292 static_assert(Parent::NKEYS == 7, "logic here needs updating"); 1293 1294 // See comments in size(). 1295 uint i = (keys[3].check(predicate)) * 4; 1296 i += (keys[i + 1].check(predicate)) * 2; 1297 i += (keys[i ].check(predicate)); 1298 return i; 1299 } 1300 }; 1301 1302 struct BTreeImpl::Freelisted { 1303 int nextOffset; 1304 // The next node in the freelist is at: this + 1 + nextOffset 1305 // 1306 // Hence, newly-allocated space can initialize this to zero. 1307 1308 uint zero[15]; 1309 // Freelisted entries are always zero'd. 1310 }; 1311 1312 struct BTreeImpl::NodeUnion { 1313 union { 1314 Freelisted freelist; 1315 // If this node is in the freelist. 1316 1317 Leaf leaf; 1318 // If this node is a leaf. 1319 1320 Parent parent; 1321 // If this node is not a leaf. 1322 }; 1323 1324 inline operator Leaf&() { return leaf; } 1325 inline operator Parent&() { return parent; } 1326 inline operator const Leaf&() const { return leaf; } 1327 inline operator const Parent&() const { return parent; } 1328 }; 1329 1330 static_assert(sizeof(BTreeImpl::Parent) == 64, 1331 "BTreeImpl::Parent should be optimized to fit a cache line"); 1332 static_assert(sizeof(BTreeImpl::Leaf) == 64, 1333 "BTreeImpl::Leaf should be optimized to fit a cache line"); 1334 static_assert(sizeof(BTreeImpl::Freelisted) == 64, 1335 "BTreeImpl::Freelisted should be optimized to fit a cache line"); 1336 static_assert(sizeof(BTreeImpl::NodeUnion) == 64, 1337 "BTreeImpl::NodeUnion should be optimized to fit a cache line"); 1338 1339 bool BTreeImpl::Leaf::isFull() const { 1340 return rows[Leaf::NROWS - 1] != nullptr; 1341 } 1342 bool BTreeImpl::Leaf::isMostlyFull() const { 1343 return rows[Leaf::NROWS / 2] != nullptr; 1344 } 1345 bool BTreeImpl::Leaf::isHalfFull() const { 1346 KJ_TABLE_IASSERT(rows[Leaf::NROWS / 2 - 1] != nullptr); 1347 return rows[Leaf::NROWS / 2] == nullptr; 1348 } 1349 1350 bool BTreeImpl::Parent::isFull() const { 1351 return keys[Parent::NKEYS - 1] != nullptr; 1352 } 1353 bool BTreeImpl::Parent::isMostlyFull() const { 1354 return keys[Parent::NKEYS / 2] != nullptr; 1355 } 1356 bool BTreeImpl::Parent::isHalfFull() const { 1357 KJ_TABLE_IASSERT(keys[Parent::NKEYS / 2 - 1] != nullptr); 1358 return keys[Parent::NKEYS / 2] == nullptr; 1359 } 1360 1361 class BTreeImpl::Iterator { 1362 public: 1363 Iterator(const NodeUnion* tree, const Leaf* leaf, uint row) 1364 : tree(tree), leaf(leaf), row(row) {} 1365 1366 size_t operator*() const { 1367 KJ_TABLE_IREQUIRE(row < Leaf::NROWS && leaf->rows[row] != nullptr, 1368 "tried to dereference end() iterator"); 1369 return *leaf->rows[row]; 1370 } 1371 1372 inline Iterator& operator++() { 1373 KJ_TABLE_IREQUIRE(leaf->rows[row] != nullptr, "B-tree iterator overflow"); 1374 ++row; 1375 if (row >= Leaf::NROWS || leaf->rows[row] == nullptr) { 1376 if (leaf->next == 0) { 1377 // at end; stay on current leaf 1378 } else { 1379 leaf = &tree[leaf->next].leaf; 1380 row = 0; 1381 } 1382 } 1383 return *this; 1384 } 1385 inline Iterator operator++(int) { 1386 Iterator other = *this; 1387 ++*this; 1388 return other; 1389 } 1390 1391 inline Iterator& operator--() { 1392 if (row == 0) { 1393 KJ_TABLE_IREQUIRE(leaf->prev != 0, "B-tree iterator underflow"); 1394 leaf = &tree[leaf->prev].leaf; 1395 row = leaf->size() - 1; 1396 } else { 1397 --row; 1398 } 1399 return *this; 1400 } 1401 inline Iterator operator--(int) { 1402 Iterator other = *this; 1403 --*this; 1404 return other; 1405 } 1406 1407 inline bool operator==(const Iterator& other) const { 1408 return leaf == other.leaf && row == other.row; 1409 } 1410 inline bool operator!=(const Iterator& other) const { 1411 return leaf != other.leaf || row != other.row; 1412 } 1413 1414 bool isEnd() { 1415 return row == Leaf::NROWS || leaf->rows[row] == nullptr; 1416 } 1417 1418 void insert(BTreeImpl& impl, uint newRow) { 1419 KJ_TABLE_IASSERT(impl.tree == tree); 1420 const_cast<Leaf*>(leaf)->insert(row, newRow); 1421 } 1422 1423 void erase(BTreeImpl& impl) { 1424 KJ_TABLE_IASSERT(impl.tree == tree); 1425 const_cast<Leaf*>(leaf)->erase(row); 1426 } 1427 1428 void replace(BTreeImpl& impl, uint newRow) { 1429 KJ_TABLE_IASSERT(impl.tree == tree); 1430 const_cast<Leaf*>(leaf)->rows[row] = newRow; 1431 } 1432 1433 private: 1434 const NodeUnion* tree; 1435 const Leaf* leaf; 1436 uint row; 1437 }; 1438 1439 inline BTreeImpl::Iterator BTreeImpl::begin() const { 1440 return { tree, &tree[beginLeaf].leaf, 0 }; 1441 } 1442 inline BTreeImpl::Iterator BTreeImpl::end() const { 1443 auto& leaf = tree[endLeaf].leaf; 1444 return { tree, &leaf, leaf.size() }; 1445 } 1446 1447 } // namespace _ (private) 1448 1449 template <typename Callbacks> 1450 class TreeIndex { 1451 public: 1452 TreeIndex() = default; 1453 template <typename... Params> 1454 TreeIndex(Params&&... params): cb(kj::fwd<Params>(params)...) {} 1455 1456 template <typename Row> 1457 void verify(kj::ArrayPtr<Row> table) { 1458 impl.verify(table.size(), [&](uint i, uint j) { 1459 return cb.isBefore(table[i], table[j]); 1460 }); 1461 } 1462 1463 inline void reserve(size_t size) { impl.reserve(size); } 1464 inline void clear() { impl.clear(); } 1465 inline auto begin() const { return impl.begin(); } 1466 inline auto end() const { return impl.end(); } 1467 1468 template <typename Row> 1469 decltype(auto) keyForRow(Row&& row) const { 1470 return cb.keyForRow(kj::fwd<Row>(row)); 1471 } 1472 1473 template <typename Row, typename... Params> 1474 kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) { 1475 auto iter = impl.insert(searchKey(table, params...)); 1476 1477 if (!iter.isEnd() && cb.matches(table[*iter], params...)) { 1478 return *iter; 1479 } else { 1480 iter.insert(impl, pos); 1481 return nullptr; 1482 } 1483 } 1484 1485 template <typename Row, typename... Params> 1486 void erase(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) { 1487 impl.erase(pos, searchKey(table, params...)); 1488 } 1489 1490 template <typename Row, typename... Params> 1491 void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, Params&&... params) { 1492 impl.renumber(oldPos, newPos, searchKey(table, params...)); 1493 } 1494 1495 template <typename Row, typename... Params> 1496 Maybe<size_t> find(kj::ArrayPtr<Row> table, Params&&... params) const { 1497 auto iter = impl.search(searchKey(table, params...)); 1498 1499 if (!iter.isEnd() && cb.matches(table[*iter], params...)) { 1500 return size_t(*iter); 1501 } else { 1502 return nullptr; 1503 } 1504 } 1505 1506 template <typename Row, typename... Params> 1507 _::BTreeImpl::Iterator seek(kj::ArrayPtr<Row> table, Params&&... params) const { 1508 return impl.search(searchKey(table, params...)); 1509 } 1510 1511 private: 1512 Callbacks cb; 1513 _::BTreeImpl impl; 1514 1515 template <typename Predicate> 1516 class SearchKeyImpl: public _::BTreeImpl::SearchKey { 1517 public: 1518 SearchKeyImpl(Predicate&& predicate) 1519 : predicate(kj::mv(predicate)) {} 1520 1521 uint search(const _::BTreeImpl::Parent& parent) const override { 1522 return parent.binarySearch(predicate); 1523 } 1524 uint search(const _::BTreeImpl::Leaf& leaf) const override { 1525 return leaf.binarySearch(predicate); 1526 } 1527 bool isAfter(uint rowIndex) const override { 1528 return predicate(rowIndex); 1529 } 1530 1531 private: 1532 Predicate predicate; 1533 }; 1534 1535 template <typename Row, typename... Params> 1536 inline auto searchKey(kj::ArrayPtr<Row>& table, Params&... params) const { 1537 auto predicate = [&](uint i) { return cb.isBefore(table[i], params...); }; 1538 return SearchKeyImpl<decltype(predicate)>(kj::mv(predicate)); 1539 } 1540 }; 1541 1542 // ----------------------------------------------------------------------------- 1543 // Insertion order index 1544 1545 class InsertionOrderIndex { 1546 // Table index which allows iterating over elements in order of insertion. This index cannot 1547 // be used for Table::find(), but can be used for Table::ordered(). 1548 1549 struct Link; 1550 public: 1551 InsertionOrderIndex(); 1552 InsertionOrderIndex(const InsertionOrderIndex&) = delete; 1553 InsertionOrderIndex& operator=(const InsertionOrderIndex&) = delete; 1554 InsertionOrderIndex(InsertionOrderIndex&& other); 1555 InsertionOrderIndex& operator=(InsertionOrderIndex&& other); 1556 ~InsertionOrderIndex() noexcept(false); 1557 1558 class Iterator { 1559 public: 1560 Iterator(const Link* links, uint pos) 1561 : links(links), pos(pos) {} 1562 1563 inline size_t operator*() const { 1564 KJ_TABLE_IREQUIRE(pos != 0, "can't derefrence end() iterator"); 1565 return pos - 1; 1566 }; 1567 1568 inline Iterator& operator++() { 1569 pos = links[pos].next; 1570 return *this; 1571 } 1572 inline Iterator operator++(int) { 1573 Iterator result = *this; 1574 ++*this; 1575 return result; 1576 } 1577 inline Iterator& operator--() { 1578 pos = links[pos].prev; 1579 return *this; 1580 } 1581 inline Iterator operator--(int) { 1582 Iterator result = *this; 1583 --*this; 1584 return result; 1585 } 1586 1587 inline bool operator==(const Iterator& other) const { 1588 return pos == other.pos; 1589 } 1590 inline bool operator!=(const Iterator& other) const { 1591 return pos != other.pos; 1592 } 1593 1594 private: 1595 const Link* links; 1596 uint pos; 1597 }; 1598 1599 template <typename Row> 1600 Row& keyForRow(Row& row) const { return row; } 1601 1602 void reserve(size_t size); 1603 void clear(); 1604 inline Iterator begin() const { return Iterator(links, links[0].next); } 1605 inline Iterator end() const { return Iterator(links, 0); } 1606 1607 template <typename Row> 1608 kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, const Row& row) { 1609 return insertImpl(pos); 1610 } 1611 1612 template <typename Row> 1613 void erase(kj::ArrayPtr<Row> table, size_t pos, const Row& row) { 1614 eraseImpl(pos); 1615 } 1616 1617 template <typename Row> 1618 void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, const Row& row) { 1619 return moveImpl(oldPos, newPos); 1620 } 1621 1622 private: 1623 struct Link { 1624 uint next; 1625 uint prev; 1626 }; 1627 1628 uint capacity; 1629 Link* links; 1630 // links[0] is special: links[0].next points to the first link, links[0].prev points to the last. 1631 // links[n+1] corresponds to row n. 1632 1633 kj::Maybe<size_t> insertImpl(size_t pos); 1634 void eraseImpl(size_t pos); 1635 void moveImpl(size_t oldPos, size_t newPos); 1636 1637 static const Link EMPTY_LINK; 1638 }; 1639 1640 } // namespace kj 1641 1642 KJ_END_HEADER