btree.gno
38.39 Kb · 1114 lines
1//////////
2//
3// Copyright 2014 Google Inc.
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7//
8// Copyright 2024 New Tendermint
9//
10// This Gno port of the original Go BTree is substantially rewritten/reimplemented
11// from the original, primarily for clarity of code, clarity of documentation,
12// and for compatibility with Gno.
13//
14// Authors:
15// Original version authors -- https://github.com/google/btree/graphs/contributors
16// Kirk Haines <wyhaines@gmail.com>
17//
18//////////
19
20// Package btree implements in-memory B-Trees of arbitrary degree.
21//
22// It has a flatter structure than an equivalent red-black or other binary tree,
23// which may yield better memory usage and/or performance.
24package btree
25
26import "sort"
27
28//////////
29//
30// Types
31//
32//////////
33
34// BTreeOption is a function interface for setting options on a btree with `New()`.
35type BTreeOption func(*BTree)
36
37// BTree is an implementation of a B-Tree.
38//
39// BTree stores Record instances in an ordered structure, allowing easy insertion,
40// removal, and iteration.
41type BTree struct {
42 degree int
43 length int
44 root *node
45 cowCtx *copyOnWriteContext
46}
47
48// Any type that implements this interface can be stored in the BTree. This allows considerable
49//
50// flexiblity in storage within the BTree.
51type Record interface {
52 // Less compares self to `than`, returning true if self is less than `than`
53 Less(than Record) bool
54}
55
56// records is the storage within a node. It is expressed as a slice of Record, where a Record
57// is any struct that implements the Record interface.
58type records []Record
59
60// node is an internal node in a tree.
61//
62// It must at all times maintain on of the two conditions:
63// - len(children) == 0, len(records) unconstrained
64// - len(children) == len(records) + 1
65type node struct {
66 records records
67 children children
68 cowCtx *copyOnWriteContext
69}
70
71// children is the list of child nodes below the current node. It is a slice of nodes.
72type children []*node
73
74// FreeNodeList represents a slice of nodes which are available for reuse. The default
75// behavior of New() is for each BTree instance to have its own FreeNodeList. However,
76// it is possible for multiple instances of BTree to share the same tree. If one uses
77// New(WithFreeNodeList()) to create a tree, one may pass an existing FreeNodeList, allowing
78// multiple trees to use a single list. In an application with multiple trees, it might
79// be more efficient to allocate a single FreeNodeList with a significant initial capacity,
80// and then have all of the trees use that same large FreeNodeList.
81type FreeNodeList struct {
82 nodes []*node
83}
84
85// copyOnWriteContext manages node ownership and ensures that cloned trees
86// maintain isolation from each other when a node is changed.
87//
88// Ownership Rules:
89// - Each node is associated with a specific copyOnWriteContext.
90// - A tree can modify a node directly only if the tree's context matches the node's context.
91// - If a tree attempts to modify a node with a different context, it must create a
92// new, writable copy of that node (i.e., perform a clone) before making changes.
93//
94// Write Operation Invariant:
95// - During any write operation, the current node being modified must have the same
96// context as the tree requesting the write.
97// - To maintain this invariant, before descending into a child node, the system checks
98// if the child’s context matches the tree's context.
99// - If the contexts match, the node can be modified in place.
100// - If the contexts do not match, a mutable copy of the child node is created with the
101// correct context before proceeding.
102//
103// Practical Implications:
104// - The node currently being modified inherits the requesting tree's context, allowing
105// in-place modifications.
106// - Child nodes may initially have different contexts. Before any modification, these
107// children are copied to ensure they share the correct context, enabling safe and
108// isolated updates without affecting other trees that might be referencing the original nodes.
109//
110// Example Usage:
111// When a tree performs a write operation (e.g., inserting or deleting a node), it uses
112// its copyOnWriteContext to determine whether it can modify nodes directly or needs to
113// create copies. This mechanism ensures that trees can share nodes efficiently while
114// maintaining data integrity.
115type copyOnWriteContext struct {
116 nodes *FreeNodeList
117}
118
119// Record implements an interface with a single function, Less. Any type that implements
120// RecordIterator allows callers of all of the iteration functions for the BTree
121// to evaluate an element of the tree as it is traversed. The function will receive
122// a stored element from the tree. The function must return either a true or a false value.
123// True indicates that iteration should continue, while false indicates that it should halt.
124type RecordIterator func(i Record) bool
125
126//////////
127//
128// Functions
129//
130//////////
131
132// NewFreeNodeList creates a new free list.
133// size is the maximum size of the returned free list.
134func NewFreeNodeList(size int) *FreeNodeList {
135 return &FreeNodeList{nodes: make([]*node, 0, size)}
136}
137
138func (freeList *FreeNodeList) newNode() (nodeInstance *node) {
139 index := len(freeList.nodes) - 1
140 if index < 0 {
141 return new(node)
142 }
143 nodeInstance = freeList.nodes[index]
144 freeList.nodes[index] = nil
145 freeList.nodes = freeList.nodes[:index]
146
147 return nodeInstance
148}
149
150// freeNode adds the given node to the list, returning true if it was added
151// and false if it was discarded.
152
153func (freeList *FreeNodeList) freeNode(nodeInstance *node) (nodeWasAdded bool) {
154 if len(freeList.nodes) < cap(freeList.nodes) {
155 freeList.nodes = append(freeList.nodes, nodeInstance)
156 nodeWasAdded = true
157 }
158 return
159}
160
161// A default size for the free node list. We might want to run some benchmarks to see if
162// there are any pros or cons to this size versus other sizes. This seems to be a reasonable
163// compromise to reduce GC pressure by reusing nodes where possible, without stacking up too
164// much baggage in a given tree.
165const DefaultFreeNodeListSize = 32
166
167// WithDegree sets the degree of the B-Tree.
168func WithDegree(degree int) BTreeOption {
169 return func(bt *BTree) {
170 if degree <= 1 {
171 panic("Degrees less than 1 do not make any sense for a BTree. Please provide a degree of 1 or greater.")
172 }
173 bt.degree = degree
174 }
175}
176
177// WithFreeNodeList sets a custom free node list for the B-Tree.
178func WithFreeNodeList(freeList *FreeNodeList) BTreeOption {
179 return func(bt *BTree) {
180 bt.cowCtx = ©OnWriteContext{nodes: freeList}
181 }
182}
183
184// New creates a new B-Tree with optional configurations. If configuration is not provided,
185// it will default to 16 element nodes. Degree may not be less than 1 (which effectively
186// makes the tree into a binary tree).
187//
188// `New(WithDegree(2))`, for example, will create a 2-3-4 tree (each node contains 1-3 records
189// and 2-4 children).
190//
191// `New(WithFreeNodeList(NewFreeNodeList(64)))` will create a tree with a degree of 16, and
192// with a free node list with a size of 64.
193func New(options ...BTreeOption) *BTree {
194 btree := &BTree{
195 degree: 16, // default degree
196 cowCtx: ©OnWriteContext{nodes: NewFreeNodeList(DefaultFreeNodeListSize)},
197 }
198 for _, opt := range options {
199 opt(btree)
200 }
201 return btree
202}
203
204// insertAt inserts a value into the given index, pushing all subsequent values
205// forward.
206func (recordsSlice *records) insertAt(index int, newRecord Record) {
207 originalLength := len(*recordsSlice)
208
209 // Extend the slice by one element
210 *recordsSlice = append(*recordsSlice, nil)
211
212 // Move elements from the end to avoid overwriting during the copy
213 // TODO: Make this work with slice appends, instead. It should be faster?
214 if index < originalLength {
215 for position := originalLength; position > index; position-- {
216 (*recordsSlice)[position] = (*recordsSlice)[position-1]
217 }
218 }
219
220 // Insert the new record
221 (*recordsSlice)[index] = newRecord
222}
223
224// removeAt removes a Record from the records slice at the specified index.
225// It shifts subsequent records to fill the gap and returns the removed Record.
226func (recordSlicePointer *records) removeAt(index int) Record {
227 recordSlice := *recordSlicePointer
228 removedRecord := recordSlice[index]
229 copy(recordSlice[index:], recordSlice[index+1:])
230 recordSlice[len(recordSlice)-1] = nil
231 *recordSlicePointer = recordSlice[:len(recordSlice)-1]
232
233 return removedRecord
234}
235
236// Pop removes and returns the last Record from the records slice.
237// It also clears the reference to the removed Record to aid garbage collection.
238func (r *records) pop() Record {
239 recordSlice := *r
240 lastIndex := len(recordSlice) - 1
241 removedRecord := recordSlice[lastIndex]
242 recordSlice[lastIndex] = nil
243 *r = recordSlice[:lastIndex]
244 return removedRecord
245}
246
247// This slice is intended only as a supply of records for the truncate function
248// that follows, and it should not be changed or altered.
249var emptyRecords = make(records, 32)
250
251// truncate reduces the length of the slice to the specified index,
252// and clears the elements beyond that index to prevent memory leaks.
253// The index must be less than or equal to the current length of the slice.
254func (originalSlice *records) truncate(index int) {
255 // Split the slice into the part to keep and the part to clear.
256 recordsToKeep := (*originalSlice)[:index]
257 recordsToClear := (*originalSlice)[index:]
258
259 // Update the original slice to only contain the records to keep.
260 *originalSlice = recordsToKeep
261
262 // Clear the memory of the part that was truncated.
263 for len(recordsToClear) > 0 {
264 // Copy empty values from `emptyRecords` to the recordsToClear slice.
265 // This effectively "clears" the memory by overwriting elements.
266 numCleared := copy(recordsToClear, emptyRecords)
267 recordsToClear = recordsToClear[numCleared:]
268 }
269}
270
271// Find determines the appropriate index at which a given Record should be inserted
272// into the sorted records slice. If the Record already exists in the slice,
273// the method returns its index and sets found to true.
274//
275// Parameters:
276// - record: The Record to search for within the records slice.
277//
278// Returns:
279// - insertIndex: The index at which the Record should be inserted.
280// - found: A boolean indicating whether the Record already exists in the slice.
281func (recordsSlice records) find(record Record) (insertIndex int, found bool) {
282 totalRecords := len(recordsSlice)
283
284 // Perform a binary search to find the insertion point for the record
285 insertionPoint := sort.Search(totalRecords, func(currentIndex int) bool {
286 return record.Less(recordsSlice[currentIndex])
287 })
288
289 if insertionPoint > 0 {
290 previousRecord := recordsSlice[insertionPoint-1]
291
292 if !previousRecord.Less(record) {
293 return insertionPoint - 1, true
294 }
295 }
296
297 return insertionPoint, false
298}
299
300// insertAt inserts a value into the given index, pushing all subsequent values
301// forward.
302func (childSlice *children) insertAt(index int, n *node) {
303 originalLength := len(*childSlice)
304
305 // Extend the slice by one element
306 *childSlice = append(*childSlice, nil)
307
308 // Move elements from the end to avoid overwriting during the copy
309 if index < originalLength {
310 for i := originalLength; i > index; i-- {
311 (*childSlice)[i] = (*childSlice)[i-1]
312 }
313 }
314
315 // Insert the new record
316 (*childSlice)[index] = n
317}
318
319// removeAt removes a Record from the records slice at the specified index.
320// It shifts subsequent records to fill the gap and returns the removed Record.
321func (childSlicePointer *children) removeAt(index int) *node {
322 childSlice := *childSlicePointer
323 removedChild := childSlice[index]
324 copy(childSlice[index:], childSlice[index+1:])
325 childSlice[len(childSlice)-1] = nil
326 *childSlicePointer = childSlice[:len(childSlice)-1]
327
328 return removedChild
329}
330
331// Pop removes and returns the last Record from the records slice.
332// It also clears the reference to the removed Record to aid garbage collection.
333func (childSlicePointer *children) pop() *node {
334 childSlice := *childSlicePointer
335 lastIndex := len(childSlice) - 1
336 removedChild := childSlice[lastIndex]
337 childSlice[lastIndex] = nil
338 *childSlicePointer = childSlice[:lastIndex]
339 return removedChild
340}
341
342// This slice is intended only as a supply of records for the truncate function
343// that follows, and it should not be changed or altered.
344var emptyChildren = make(children, 32)
345
346// truncate reduces the length of the slice to the specified index,
347// and clears the elements beyond that index to prevent memory leaks.
348// The index must be less than or equal to the current length of the slice.
349func (originalSlice *children) truncate(index int) {
350 // Split the slice into the part to keep and the part to clear.
351 childrenToKeep := (*originalSlice)[:index]
352 childrenToClear := (*originalSlice)[index:]
353
354 // Update the original slice to only contain the records to keep.
355 *originalSlice = childrenToKeep
356
357 // Clear the memory of the part that was truncated.
358 for len(childrenToClear) > 0 {
359 // Copy empty values from `emptyChildren` to the recordsToClear slice.
360 // This effectively "clears" the memory by overwriting elements.
361 numCleared := copy(childrenToClear, emptyChildren)
362
363 // Slice recordsToClear to exclude the elements that were just cleared.
364 childrenToClear = childrenToClear[numCleared:]
365 }
366}
367
368// mutableFor creates a mutable copy of the node if the current node does not
369// already belong to the provided copy-on-write context (COW). If the node is
370// already associated with the given COW context, it returns the current node.
371//
372// Parameters:
373// - cowCtx: The copy-on-write context that should own the returned node.
374//
375// Returns:
376// - A pointer to the mutable node associated with the given COW context.
377//
378// If the current node belongs to a different COW context, this function:
379// - Allocates a new node using the provided context.
380// - Copies the node’s records and children slices into the newly allocated node.
381// - Returns the new node which is now owned by the given COW context.
382func (n *node) mutableFor(cowCtx *copyOnWriteContext) *node {
383 // If the current node is already owned by the provided context, return it as-is.
384 if n.cowCtx == cowCtx {
385 return n
386 }
387
388 // Create a new node in the provided context.
389 newNode := cowCtx.newNode()
390
391 // Copy the records from the current node into the new node.
392 newNode.records = append(newNode.records[:0], n.records...)
393
394 // Copy the children from the current node into the new node.
395 newNode.children = append(newNode.children[:0], n.children...)
396
397 return newNode
398}
399
400// mutableChild ensures that the child node at the given index is mutable and
401// associated with the same COW context as the parent node. If the child node
402// belongs to a different context, a copy of the child is created and stored in the
403// parent node.
404//
405// Parameters:
406// - i: The index of the child node to be made mutable.
407//
408// Returns:
409// - A pointer to the mutable child node.
410func (n *node) mutableChild(i int) *node {
411 // Ensure that the child at index `i` is mutable and belongs to the same context as the parent.
412 mutableChildNode := n.children[i].mutableFor(n.cowCtx)
413 // Update the child node reference in the current node to the mutable version.
414 n.children[i] = mutableChildNode
415 return mutableChildNode
416}
417
418// split splits the given node at the given index. The current node shrinks,
419// and this function returns the record that existed at that index and a new node
420// containing all records/children after it.
421func (n *node) split(i int) (Record, *node) {
422 record := n.records[i]
423 next := n.cowCtx.newNode()
424 next.records = append(next.records, n.records[i+1:]...)
425 n.records.truncate(i)
426 if len(n.children) > 0 {
427 next.children = append(next.children, n.children[i+1:]...)
428 n.children.truncate(i + 1)
429 }
430 return record, next
431}
432
433// maybeSplitChild checks if a child should be split, and if so splits it.
434// Returns whether or not a split occurred.
435func (n *node) maybeSplitChild(i, maxRecords int) bool {
436 if len(n.children[i].records) < maxRecords {
437 return false
438 }
439 first := n.mutableChild(i)
440 record, second := first.split(maxRecords / 2)
441 n.records.insertAt(i, record)
442 n.children.insertAt(i+1, second)
443 return true
444}
445
446// insert adds a record to the subtree rooted at the current node, ensuring that no node in the subtree
447// exceeds the maximum number of allowed records (`maxRecords`). If an equivalent record is already present,
448// it replaces the existing one and returns it; otherwise, it returns nil.
449//
450// Parameters:
451// - record: The record to be inserted.
452// - maxRecords: The maximum number of records allowed per node.
453//
454// Returns:
455// - The record that was replaced if an equivalent record already existed, otherwise nil.
456func (n *node) insert(record Record, maxRecords int) Record {
457 // Find the position where the new record should be inserted and check if an equivalent record already exists.
458 insertionIndex, recordExists := n.records.find(record)
459
460 if recordExists {
461 // If an equivalent record is found, replace it and return the old record.
462 existingRecord := n.records[insertionIndex]
463 n.records[insertionIndex] = record
464 return existingRecord
465 }
466
467 // If the current node is a leaf (has no children), insert the new record at the calculated index.
468 if len(n.children) == 0 {
469 n.records.insertAt(insertionIndex, record)
470 return nil
471 }
472
473 // Check if the child node at the insertion index needs to be split due to exceeding maxRecords.
474 if n.maybeSplitChild(insertionIndex, maxRecords) {
475 // If a split occurred, compare the new record with the record moved up to the current node.
476 splitRecord := n.records[insertionIndex]
477 switch {
478 case record.Less(splitRecord):
479 // The new record belongs to the first (left) split node; no change to insertion index.
480 case splitRecord.Less(record):
481 // The new record belongs to the second (right) split node; move the insertion index to the next position.
482 insertionIndex++
483 default:
484 // If the record is equivalent to the split record, replace it and return the old record.
485 existingRecord := n.records[insertionIndex]
486 n.records[insertionIndex] = record
487 return existingRecord
488 }
489 }
490
491 // Recursively insert the record into the appropriate child node, now guaranteed to have space.
492 return n.mutableChild(insertionIndex).insert(record, maxRecords)
493}
494
495// get finds the given key in the subtree and returns it.
496func (n *node) get(key Record) Record {
497 i, found := n.records.find(key)
498 if found {
499 return n.records[i]
500 } else if len(n.children) > 0 {
501 return n.children[i].get(key)
502 }
503 return nil
504}
505
506// min returns the first record in the subtree.
507func min(n *node) Record {
508 if n == nil {
509 return nil
510 }
511 for len(n.children) > 0 {
512 n = n.children[0]
513 }
514 if len(n.records) == 0 {
515 return nil
516 }
517 return n.records[0]
518}
519
520// max returns the last record in the subtree.
521func max(n *node) Record {
522 if n == nil {
523 return nil
524 }
525 for len(n.children) > 0 {
526 n = n.children[len(n.children)-1]
527 }
528 if len(n.records) == 0 {
529 return nil
530 }
531 return n.records[len(n.records)-1]
532}
533
534// toRemove details what record to remove in a node.remove call.
535type toRemove int
536
537const (
538 removeRecord toRemove = iota // removes the given record
539 removeMin // removes smallest record in the subtree
540 removeMax // removes largest record in the subtree
541)
542
543// remove removes a record from the subtree rooted at the current node.
544//
545// Parameters:
546// - record: The record to be removed (can be nil when the removal type indicates min or max).
547// - minRecords: The minimum number of records a node should have after removal.
548// - typ: The type of removal operation to perform (removeMin, removeMax, or removeRecord).
549//
550// Returns:
551// - The record that was removed, or nil if no such record was found.
552func (n *node) remove(record Record, minRecords int, removalType toRemove) Record {
553 var targetIndex int
554 var recordFound bool
555
556 // Determine the index of the record to remove based on the removal type.
557 switch removalType {
558 case removeMax:
559 // If this node is a leaf, remove and return the last record.
560 if len(n.children) == 0 {
561 return n.records.pop()
562 }
563 targetIndex = len(n.records) // The last record index for removing max.
564
565 case removeMin:
566 // If this node is a leaf, remove and return the first record.
567 if len(n.children) == 0 {
568 return n.records.removeAt(0)
569 }
570 targetIndex = 0 // The first record index for removing min.
571
572 case removeRecord:
573 // Locate the index of the record to be removed.
574 targetIndex, recordFound = n.records.find(record)
575 if len(n.children) == 0 {
576 if recordFound {
577 return n.records.removeAt(targetIndex)
578 }
579 return nil // The record was not found in the leaf node.
580 }
581
582 default:
583 panic("invalid removal type")
584 }
585
586 // If the current node has children, handle the removal recursively.
587 if len(n.children[targetIndex].records) <= minRecords {
588 // If the target child node has too few records, grow it before proceeding with removal.
589 return n.growChildAndRemove(targetIndex, record, minRecords, removalType)
590 }
591
592 // Get a mutable reference to the child node at the target index.
593 targetChild := n.mutableChild(targetIndex)
594
595 // If the record to be removed was found in the current node:
596 if recordFound {
597 // Replace the current record with its predecessor from the child node, and return the removed record.
598 replacedRecord := n.records[targetIndex]
599 n.records[targetIndex] = targetChild.remove(nil, minRecords, removeMax)
600 return replacedRecord
601 }
602
603 // Recursively remove the record from the child node.
604 return targetChild.remove(record, minRecords, removalType)
605}
606
607// growChildAndRemove grows child 'i' to make sure it's possible to remove an
608// record from it while keeping it at minRecords, then calls remove to actually
609// remove it.
610//
611// Most documentation says we have to do two sets of special casing:
612// 1. record is in this node
613// 2. record is in child
614//
615// In both cases, we need to handle the two subcases:
616//
617// A) node has enough values that it can spare one
618// B) node doesn't have enough values
619//
620// For the latter, we have to check:
621//
622// a) left sibling has node to spare
623// b) right sibling has node to spare
624// c) we must merge
625//
626// To simplify our code here, we handle cases #1 and #2 the same:
627// If a node doesn't have enough records, we make sure it does (using a,b,c).
628// We then simply redo our remove call, and the second time (regardless of
629// whether we're in case 1 or 2), we'll have enough records and can guarantee
630// that we hit case A.
631func (n *node) growChildAndRemove(i int, record Record, minRecords int, typ toRemove) Record {
632 if i > 0 && len(n.children[i-1].records) > minRecords {
633 // Steal from left child
634 child := n.mutableChild(i)
635 stealFrom := n.mutableChild(i - 1)
636 stolenRecord := stealFrom.records.pop()
637 child.records.insertAt(0, n.records[i-1])
638 n.records[i-1] = stolenRecord
639 if len(stealFrom.children) > 0 {
640 child.children.insertAt(0, stealFrom.children.pop())
641 }
642 } else if i < len(n.records) && len(n.children[i+1].records) > minRecords {
643 // steal from right child
644 child := n.mutableChild(i)
645 stealFrom := n.mutableChild(i + 1)
646 stolenRecord := stealFrom.records.removeAt(0)
647 child.records = append(child.records, n.records[i])
648 n.records[i] = stolenRecord
649 if len(stealFrom.children) > 0 {
650 child.children = append(child.children, stealFrom.children.removeAt(0))
651 }
652 } else {
653 if i >= len(n.records) {
654 i--
655 }
656 child := n.mutableChild(i)
657 // merge with right child
658 mergeRecord := n.records.removeAt(i)
659 mergeChild := n.children.removeAt(i + 1).mutableFor(n.cowCtx)
660 child.records = append(child.records, mergeRecord)
661 child.records = append(child.records, mergeChild.records...)
662 child.children = append(child.children, mergeChild.children...)
663 n.cowCtx.freeNode(mergeChild)
664 }
665 return n.remove(record, minRecords, typ)
666}
667
668type direction int
669
670const (
671 descend = direction(-1)
672 ascend = direction(+1)
673)
674
675// iterate provides a simple method for iterating over elements in the tree.
676//
677// When ascending, the 'start' should be less than 'stop' and when descending,
678// the 'start' should be greater than 'stop'. Setting 'includeStart' to true
679// will force the iterator to include the first record when it equals 'start',
680// thus creating a "greaterOrEqual" or "lessThanEqual" rather than just a
681// "greaterThan" or "lessThan" queries.
682func (n *node) iterate(dir direction, start, stop Record, includeStart bool, hit bool, iter RecordIterator) (bool, bool) {
683 var ok, found bool
684 var index int
685 switch dir {
686 case ascend:
687 if start != nil {
688 index, _ = n.records.find(start)
689 }
690 for i := index; i < len(n.records); i++ {
691 if len(n.children) > 0 {
692 if hit, ok = n.children[i].iterate(dir, start, stop, includeStart, hit, iter); !ok {
693 return hit, false
694 }
695 }
696 if !includeStart && !hit && start != nil && !start.Less(n.records[i]) {
697 hit = true
698 continue
699 }
700 hit = true
701 if stop != nil && !n.records[i].Less(stop) {
702 return hit, false
703 }
704 if !iter(n.records[i]) {
705 return hit, false
706 }
707 }
708 if len(n.children) > 0 {
709 if hit, ok = n.children[len(n.children)-1].iterate(dir, start, stop, includeStart, hit, iter); !ok {
710 return hit, false
711 }
712 }
713 case descend:
714 if start != nil {
715 index, found = n.records.find(start)
716 if !found {
717 index = index - 1
718 }
719 } else {
720 index = len(n.records) - 1
721 }
722 for i := index; i >= 0; i-- {
723 if start != nil && !n.records[i].Less(start) {
724 if !includeStart || hit || start.Less(n.records[i]) {
725 continue
726 }
727 }
728 if len(n.children) > 0 {
729 if hit, ok = n.children[i+1].iterate(dir, start, stop, includeStart, hit, iter); !ok {
730 return hit, false
731 }
732 }
733 if stop != nil && !stop.Less(n.records[i]) {
734 return hit, false // continue
735 }
736 hit = true
737 if !iter(n.records[i]) {
738 return hit, false
739 }
740 }
741 if len(n.children) > 0 {
742 if hit, ok = n.children[0].iterate(dir, start, stop, includeStart, hit, iter); !ok {
743 return hit, false
744 }
745 }
746 }
747 return hit, true
748}
749
750func (tree *BTree) Iterate(dir direction, start, stop Record, includeStart bool, hit bool, iter RecordIterator) (bool, bool) {
751 return tree.root.iterate(dir, start, stop, includeStart, hit, iter)
752}
753
754// Clone creates a new BTree instance that shares the current tree's structure using a copy-on-write (COW) approach.
755//
756// How Cloning Works:
757// - The cloned tree (`clonedTree`) shares the current tree’s nodes in a read-only state. This means that no additional memory
758// is allocated for shared nodes, and read operations on the cloned tree are as fast as on the original tree.
759// - When either the original tree (`t`) or the cloned tree (`clonedTree`) needs to perform a write operation (such as an insert, delete, etc.),
760// a new copy of the affected nodes is created on-demand. This ensures that modifications to one tree do not affect the other.
761//
762// Performance Implications:
763// - **Clone Creation:** The creation of a clone is inexpensive since it only involves copying references to the original tree's nodes
764// and creating new copy-on-write contexts.
765// - **Read Operations:** Reading from either the original tree or the cloned tree has no additional performance overhead compared to the original tree.
766// - **Write Operations:** The first write operation on either tree may experience a slight slow-down due to the allocation of new nodes,
767// but subsequent write operations will perform at the same speed as if the tree were not cloned.
768//
769// Returns:
770// - A new BTree instance (`clonedTree`) that shares the original tree's structure.
771func (t *BTree) Clone() *BTree {
772 // Create two independent copy-on-write contexts, one for the original tree (`t`) and one for the cloned tree.
773 originalContext := *t.cowCtx
774 clonedContext := *t.cowCtx
775
776 // Create a shallow copy of the current tree, which will be the new cloned tree.
777 clonedTree := *t
778
779 // Assign the new contexts to their respective trees.
780 t.cowCtx = &originalContext
781 clonedTree.cowCtx = &clonedContext
782
783 return &clonedTree
784}
785
786// maxRecords returns the max number of records to allow per node.
787func (t *BTree) maxRecords() int {
788 return t.degree*2 - 1
789}
790
791// minRecords returns the min number of records to allow per node (ignored for the
792// root node).
793func (t *BTree) minRecords() int {
794 return t.degree - 1
795}
796
797func (c *copyOnWriteContext) newNode() (n *node) {
798 n = c.nodes.newNode()
799 n.cowCtx = c
800 return
801}
802
803type freeType int
804
805const (
806 ftFreelistFull freeType = iota // node was freed (available for GC, not stored in nodes)
807 ftStored // node was stored in the nodes for later use
808 ftNotOwned // node was ignored by COW, since it's owned by another one
809)
810
811// freeNode frees a node within a given COW context, if it's owned by that
812// context. It returns what happened to the node (see freeType const
813// documentation).
814func (c *copyOnWriteContext) freeNode(n *node) freeType {
815 if n.cowCtx == c {
816 // clear to allow GC
817 n.records.truncate(0)
818 n.children.truncate(0)
819 n.cowCtx = nil
820 if c.nodes.freeNode(n) {
821 return ftStored
822 } else {
823 return ftFreelistFull
824 }
825 } else {
826 return ftNotOwned
827 }
828}
829
830// Insert adds the given record to the B-tree. If a record already exists in the tree with the same value,
831// it is replaced, and the old record is returned. Otherwise, it returns nil.
832//
833// Notes:
834// - The function panics if a nil record is provided as input.
835// - If the root node is empty, a new root node is created and the record is inserted.
836//
837// Parameters:
838// - record: The record to be inserted into the B-tree.
839//
840// Returns:
841// - The replaced record if an equivalent record already exists, or nil if no replacement occurred.
842func (t *BTree) Insert(record Record) Record {
843 if record == nil {
844 panic("nil record cannot be added to BTree")
845 }
846
847 // If the tree is empty (no root), create a new root node and insert the record.
848 if t.root == nil {
849 t.root = t.cowCtx.newNode()
850 t.root.records = append(t.root.records, record)
851 t.length++
852 return nil
853 }
854
855 // Ensure that the root node is mutable (associated with the current tree's copy-on-write context).
856 t.root = t.root.mutableFor(t.cowCtx)
857
858 // If the root node is full (contains the maximum number of records), split the root.
859 if len(t.root.records) >= t.maxRecords() {
860 // Split the root node, promoting the middle record and creating a new child node.
861 middleRecord, newChildNode := t.root.split(t.maxRecords() / 2)
862
863 // Create a new root node to hold the promoted middle record.
864 oldRoot := t.root
865 t.root = t.cowCtx.newNode()
866 t.root.records = append(t.root.records, middleRecord)
867 t.root.children = append(t.root.children, oldRoot, newChildNode)
868 }
869
870 // Insert the new record into the subtree rooted at the current root node.
871 replacedRecord := t.root.insert(record, t.maxRecords())
872
873 // If no record was replaced, increase the tree's length.
874 if replacedRecord == nil {
875 t.length++
876 }
877
878 return replacedRecord
879}
880
881// Delete removes an record equal to the passed in record from the tree, returning
882// it. If no such record exists, returns nil.
883func (t *BTree) Delete(record Record) Record {
884 return t.deleteRecord(record, removeRecord)
885}
886
887// DeleteMin removes the smallest record in the tree and returns it.
888// If no such record exists, returns nil.
889func (t *BTree) DeleteMin() Record {
890 return t.deleteRecord(nil, removeMin)
891}
892
893// Shift is identical to DeleteMin. If the tree is thought of as an ordered list, then Shift()
894// removes the element at the start of the list, the smallest element, and returns it.
895func (t *BTree) Shift() Record {
896 return t.deleteRecord(nil, removeMin)
897}
898
899// DeleteMax removes the largest record in the tree and returns it.
900// If no such record exists, returns nil.
901func (t *BTree) DeleteMax() Record {
902 return t.deleteRecord(nil, removeMax)
903}
904
905// Pop is identical to DeleteMax. If the tree is thought of as an ordered list, then Shift()
906// removes the element at the end of the list, the largest element, and returns it.
907func (t *BTree) Pop() Record {
908 return t.deleteRecord(nil, removeMax)
909}
910
911// deleteRecord removes a record from the B-tree based on the specified removal type (removeMin, removeMax, or removeRecord).
912// It returns the removed record if it was found, or nil if no matching record was found.
913//
914// Parameters:
915// - record: The record to be removed (can be nil if the removal type indicates min or max).
916// - removalType: The type of removal operation to perform (removeMin, removeMax, or removeRecord).
917//
918// Returns:
919// - The removed record if it existed in the tree, or nil if it was not found.
920func (t *BTree) deleteRecord(record Record, removalType toRemove) Record {
921 // If the tree is empty or the root has no records, return nil.
922 if t.root == nil || len(t.root.records) == 0 {
923 return nil
924 }
925
926 // Ensure the root node is mutable (associated with the tree's copy-on-write context).
927 t.root = t.root.mutableFor(t.cowCtx)
928
929 // Attempt to remove the specified record from the root node.
930 removedRecord := t.root.remove(record, t.minRecords(), removalType)
931
932 // Check if the root node has become empty but still has children.
933 // In this case, the tree height should be reduced, making the first child the new root.
934 if len(t.root.records) == 0 && len(t.root.children) > 0 {
935 oldRoot := t.root
936 t.root = t.root.children[0]
937 // Free the old root node, as it is no longer needed.
938 t.cowCtx.freeNode(oldRoot)
939 }
940
941 // If a record was successfully removed, decrease the tree's length.
942 if removedRecord != nil {
943 t.length--
944 }
945
946 return removedRecord
947}
948
949// AscendRange calls the iterator for every value in the tree within the range
950// [greaterOrEqual, lessThan), until iterator returns false.
951func (t *BTree) AscendRange(greaterOrEqual, lessThan Record, iterator RecordIterator) {
952 if t.root == nil {
953 return
954 }
955 t.root.iterate(ascend, greaterOrEqual, lessThan, true, false, iterator)
956}
957
958// AscendLessThan calls the iterator for every value in the tree within the range
959// [first, pivot), until iterator returns false.
960func (t *BTree) AscendLessThan(pivot Record, iterator RecordIterator) {
961 if t.root == nil {
962 return
963 }
964 t.root.iterate(ascend, nil, pivot, false, false, iterator)
965}
966
967// AscendGreaterOrEqual calls the iterator for every value in the tree within
968// the range [pivot, last], until iterator returns false.
969func (t *BTree) AscendGreaterOrEqual(pivot Record, iterator RecordIterator) {
970 if t.root == nil {
971 return
972 }
973 t.root.iterate(ascend, pivot, nil, true, false, iterator)
974}
975
976// Ascend calls the iterator for every value in the tree within the range
977// [first, last], until iterator returns false.
978func (t *BTree) Ascend(iterator RecordIterator) {
979 if t.root == nil {
980 return
981 }
982 t.root.iterate(ascend, nil, nil, false, false, iterator)
983}
984
985// DescendRange calls the iterator for every value in the tree within the range
986// [lessOrEqual, greaterThan), until iterator returns false.
987func (t *BTree) DescendRange(lessOrEqual, greaterThan Record, iterator RecordIterator) {
988 if t.root == nil {
989 return
990 }
991 t.root.iterate(descend, lessOrEqual, greaterThan, true, false, iterator)
992}
993
994// DescendLessOrEqual calls the iterator for every value in the tree within the range
995// [pivot, first], until iterator returns false.
996func (t *BTree) DescendLessOrEqual(pivot Record, iterator RecordIterator) {
997 if t.root == nil {
998 return
999 }
1000 t.root.iterate(descend, pivot, nil, true, false, iterator)
1001}
1002
1003// DescendGreaterThan calls the iterator for every value in the tree within
1004// the range [last, pivot), until iterator returns false.
1005func (t *BTree) DescendGreaterThan(pivot Record, iterator RecordIterator) {
1006 if t.root == nil {
1007 return
1008 }
1009 t.root.iterate(descend, nil, pivot, false, false, iterator)
1010}
1011
1012// Descend calls the iterator for every value in the tree within the range
1013// [last, first], until iterator returns false.
1014func (t *BTree) Descend(iterator RecordIterator) {
1015 if t.root == nil {
1016 return
1017 }
1018 t.root.iterate(descend, nil, nil, false, false, iterator)
1019}
1020
1021// Get looks for the key record in the tree, returning it. It returns nil if
1022// unable to find that record.
1023func (t *BTree) Get(key Record) Record {
1024 if t.root == nil {
1025 return nil
1026 }
1027 return t.root.get(key)
1028}
1029
1030// Min returns the smallest record in the tree, or nil if the tree is empty.
1031func (t *BTree) Min() Record {
1032 return min(t.root)
1033}
1034
1035// Max returns the largest record in the tree, or nil if the tree is empty.
1036func (t *BTree) Max() Record {
1037 return max(t.root)
1038}
1039
1040// Has returns true if the given key is in the tree.
1041func (t *BTree) Has(key Record) bool {
1042 return t.Get(key) != nil
1043}
1044
1045// Len returns the number of records currently in the tree.
1046func (t *BTree) Len() int {
1047 return t.length
1048}
1049
1050// Clear removes all elements from the B-tree.
1051//
1052// Parameters:
1053// - addNodesToFreelist:
1054// - If true, the tree's nodes are added to the freelist during the clearing process,
1055// up to the freelist's capacity.
1056// - If false, the root node is simply dereferenced, allowing Go's garbage collector
1057// to reclaim the memory.
1058//
1059// Benefits:
1060// - **Performance:**
1061// - Significantly faster than deleting each element individually, as it avoids the overhead
1062// of searching and updating the tree structure for each deletion.
1063// - More efficient than creating a new tree, since it reuses existing nodes by adding them
1064// to the freelist instead of discarding them to the garbage collector.
1065//
1066// Time Complexity:
1067// - **O(1):**
1068// - When `addNodesToFreelist` is false.
1069// - When `addNodesToFreelist` is true but the freelist is already full.
1070// - **O(freelist size):**
1071// - When adding nodes to the freelist up to its capacity.
1072// - **O(tree size):**
1073// - When iterating through all nodes to add to the freelist, but none can be added due to
1074// ownership by another tree.
1075
1076func (tree *BTree) Clear(addNodesToFreelist bool) {
1077 if tree.root != nil && addNodesToFreelist {
1078 tree.root.reset(tree.cowCtx)
1079 }
1080 tree.root = nil
1081 tree.length = 0
1082}
1083
1084// reset adds all nodes in the current subtree to the freelist.
1085//
1086// The function operates recursively:
1087// - It first attempts to reset all child nodes.
1088// - If the freelist becomes full at any point, the process stops immediately.
1089//
1090// Parameters:
1091// - copyOnWriteCtx: The copy-on-write context managing the freelist.
1092//
1093// Returns:
1094// - true: Indicates that the parent node should continue attempting to reset its nodes.
1095// - false: Indicates that the freelist is full and no further nodes should be added.
1096//
1097// Usage:
1098// This method is called during the `Clear` operation of the B-tree to efficiently reuse
1099// nodes by adding them to the freelist, thereby avoiding unnecessary allocations and reducing
1100// garbage collection overhead.
1101func (currentNode *node) reset(copyOnWriteCtx *copyOnWriteContext) bool {
1102 // Iterate through each child node and attempt to reset it.
1103 for _, childNode := range currentNode.children {
1104 // If any child reset operation signals that the freelist is full, stop the process.
1105 if !childNode.reset(copyOnWriteCtx) {
1106 return false
1107 }
1108 }
1109
1110 // Attempt to add the current node to the freelist.
1111 // If the freelist is full after this operation, indicate to the parent to stop.
1112 freelistStatus := copyOnWriteCtx.freeNode(currentNode)
1113 return freelistStatus != ftFreelistFull
1114}