btree.gno

38.39 Kb · 1114 lines
   1//////////
   2//
   3// Copyright 2014 Google Inc.
   4//
   5// Licensed under the Apache License, Version 2.0 (the "License");
   6// you may not use this file except in compliance with the License.
   7//
   8// Copyright 2024 New Tendermint
   9//
  10// This Gno port of the original Go BTree is substantially rewritten/reimplemented
  11// from the original, primarily for clarity of code, clarity of documentation,
  12// and for compatibility with Gno.
  13//
  14// Authors:
  15//   Original version authors -- https://github.com/google/btree/graphs/contributors
  16//   Kirk Haines <wyhaines@gmail.com>
  17//
  18//////////
  19
  20// Package btree implements in-memory B-Trees of arbitrary degree.
  21//
  22// It has a flatter structure than an equivalent red-black or other binary tree,
  23// which may yield better memory usage and/or performance.
  24package btree
  25
  26import "sort"
  27
  28//////////
  29//
  30// Types
  31//
  32//////////
  33
  34// BTreeOption is a function interface for setting options on a btree with `New()`.
  35type BTreeOption func(*BTree)
  36
  37// BTree is an implementation of a B-Tree.
  38//
  39// BTree stores Record instances in an ordered structure, allowing easy insertion,
  40// removal, and iteration.
  41type BTree struct {
  42	degree int
  43	length int
  44	root   *node
  45	cowCtx *copyOnWriteContext
  46}
  47
  48//	Any type that implements this interface can be stored in the BTree. This allows considerable
  49//
  50// flexiblity in storage within the BTree.
  51type Record interface {
  52	// Less compares self to `than`, returning true if self is less than `than`
  53	Less(than Record) bool
  54}
  55
  56// records is the storage within a node. It is expressed as a slice of Record, where a Record
  57// is any struct that implements the Record interface.
  58type records []Record
  59
  60// node is an internal node in a tree.
  61//
  62// It must at all times maintain on of the two conditions:
  63//   - len(children) == 0, len(records) unconstrained
  64//   - len(children) == len(records) + 1
  65type node struct {
  66	records  records
  67	children children
  68	cowCtx   *copyOnWriteContext
  69}
  70
  71// children is the list of child nodes below the current node. It is a slice of nodes.
  72type children []*node
  73
  74// FreeNodeList represents a slice of nodes which are available for reuse. The default
  75// behavior of New() is for each BTree instance to have its own FreeNodeList. However,
  76// it is possible for multiple instances of BTree to share the same tree. If one uses
  77// New(WithFreeNodeList()) to create a tree, one may pass an existing FreeNodeList, allowing
  78// multiple trees to use a single list. In an application with multiple trees, it might
  79// be more efficient to allocate a single FreeNodeList with a significant initial capacity,
  80// and then have all of the trees use that same large FreeNodeList.
  81type FreeNodeList struct {
  82	nodes []*node
  83}
  84
  85// copyOnWriteContext manages node ownership and ensures that cloned trees
  86// maintain isolation from each other when a node is changed.
  87//
  88// Ownership Rules:
  89//   - Each node is associated with a specific copyOnWriteContext.
  90//   - A tree can modify a node directly only if the tree's context matches the node's context.
  91//   - If a tree attempts to modify a node with a different context, it must create a
  92//     new, writable copy of that node (i.e., perform a clone) before making changes.
  93//
  94// Write Operation Invariant:
  95//   - During any write operation, the current node being modified must have the same
  96//     context as the tree requesting the write.
  97//   - To maintain this invariant, before descending into a child node, the system checks
  98//     if the child’s context matches the tree's context.
  99//   - If the contexts match, the node can be modified in place.
 100//   - If the contexts do not match, a mutable copy of the child node is created with the
 101//     correct context before proceeding.
 102//
 103// Practical Implications:
 104//   - The node currently being modified inherits the requesting tree's context, allowing
 105//     in-place modifications.
 106//   - Child nodes may initially have different contexts. Before any modification, these
 107//     children are copied to ensure they share the correct context, enabling safe and
 108//     isolated updates without affecting other trees that might be referencing the original nodes.
 109//
 110// Example Usage:
 111// When a tree performs a write operation (e.g., inserting or deleting a node), it uses
 112// its copyOnWriteContext to determine whether it can modify nodes directly or needs to
 113// create copies. This mechanism ensures that trees can share nodes efficiently while
 114// maintaining data integrity.
 115type copyOnWriteContext struct {
 116	nodes *FreeNodeList
 117}
 118
 119// Record implements an interface with a single function, Less. Any type that implements
 120// RecordIterator allows callers of all of the iteration functions for the BTree
 121// to evaluate an element of the tree as it is traversed. The function will receive
 122// a stored element from the tree. The function must return either a true or a false value.
 123// True indicates that iteration should continue, while false indicates that it should halt.
 124type RecordIterator func(i Record) bool
 125
 126//////////
 127//
 128// Functions
 129//
 130//////////
 131
 132// NewFreeNodeList creates a new free list.
 133// size is the maximum size of the returned free list.
 134func NewFreeNodeList(size int) *FreeNodeList {
 135	return &FreeNodeList{nodes: make([]*node, 0, size)}
 136}
 137
 138func (freeList *FreeNodeList) newNode() (nodeInstance *node) {
 139	index := len(freeList.nodes) - 1
 140	if index < 0 {
 141		return new(node)
 142	}
 143	nodeInstance = freeList.nodes[index]
 144	freeList.nodes[index] = nil
 145	freeList.nodes = freeList.nodes[:index]
 146
 147	return nodeInstance
 148}
 149
 150// freeNode adds the given node to the list, returning true if it was added
 151// and false if it was discarded.
 152
 153func (freeList *FreeNodeList) freeNode(nodeInstance *node) (nodeWasAdded bool) {
 154	if len(freeList.nodes) < cap(freeList.nodes) {
 155		freeList.nodes = append(freeList.nodes, nodeInstance)
 156		nodeWasAdded = true
 157	}
 158	return
 159}
 160
 161// A default size for the free node list. We might want to run some benchmarks to see if
 162// there are any pros or cons to this size versus other sizes. This seems to be a reasonable
 163// compromise to reduce GC pressure by reusing nodes where possible, without stacking up too
 164// much baggage in a given tree.
 165const DefaultFreeNodeListSize = 32
 166
 167// WithDegree sets the degree of the B-Tree.
 168func WithDegree(degree int) BTreeOption {
 169	return func(bt *BTree) {
 170		if degree <= 1 {
 171			panic("Degrees less than 1 do not make any sense for a BTree. Please provide a degree of 1 or greater.")
 172		}
 173		bt.degree = degree
 174	}
 175}
 176
 177// WithFreeNodeList sets a custom free node list for the B-Tree.
 178func WithFreeNodeList(freeList *FreeNodeList) BTreeOption {
 179	return func(bt *BTree) {
 180		bt.cowCtx = &copyOnWriteContext{nodes: freeList}
 181	}
 182}
 183
 184// New creates a new B-Tree with optional configurations. If configuration is not provided,
 185// it will default to 16 element nodes. Degree may not be less than 1 (which effectively
 186// makes the tree into a binary tree).
 187//
 188// `New(WithDegree(2))`, for example, will create a 2-3-4 tree (each node contains 1-3 records
 189// and 2-4 children).
 190//
 191// `New(WithFreeNodeList(NewFreeNodeList(64)))` will create a tree with a degree of 16, and
 192// with a free node list with a size of 64.
 193func New(options ...BTreeOption) *BTree {
 194	btree := &BTree{
 195		degree: 16, // default degree
 196		cowCtx: &copyOnWriteContext{nodes: NewFreeNodeList(DefaultFreeNodeListSize)},
 197	}
 198	for _, opt := range options {
 199		opt(btree)
 200	}
 201	return btree
 202}
 203
 204// insertAt inserts a value into the given index, pushing all subsequent values
 205// forward.
 206func (recordsSlice *records) insertAt(index int, newRecord Record) {
 207	originalLength := len(*recordsSlice)
 208
 209	// Extend the slice by one element
 210	*recordsSlice = append(*recordsSlice, nil)
 211
 212	// Move elements from the end to avoid overwriting during the copy
 213	// TODO: Make this work with slice appends, instead. It should be faster?
 214	if index < originalLength {
 215		for position := originalLength; position > index; position-- {
 216			(*recordsSlice)[position] = (*recordsSlice)[position-1]
 217		}
 218	}
 219
 220	// Insert the new record
 221	(*recordsSlice)[index] = newRecord
 222}
 223
 224// removeAt removes a Record from the records slice at the specified index.
 225// It shifts subsequent records to fill the gap and returns the removed Record.
 226func (recordSlicePointer *records) removeAt(index int) Record {
 227	recordSlice := *recordSlicePointer
 228	removedRecord := recordSlice[index]
 229	copy(recordSlice[index:], recordSlice[index+1:])
 230	recordSlice[len(recordSlice)-1] = nil
 231	*recordSlicePointer = recordSlice[:len(recordSlice)-1]
 232
 233	return removedRecord
 234}
 235
 236// Pop removes and returns the last Record from the records slice.
 237// It also clears the reference to the removed Record to aid garbage collection.
 238func (r *records) pop() Record {
 239	recordSlice := *r
 240	lastIndex := len(recordSlice) - 1
 241	removedRecord := recordSlice[lastIndex]
 242	recordSlice[lastIndex] = nil
 243	*r = recordSlice[:lastIndex]
 244	return removedRecord
 245}
 246
 247// This slice is intended only as a supply of records for the truncate function
 248// that follows, and it should not be changed or altered.
 249var emptyRecords = make(records, 32)
 250
 251// truncate reduces the length of the slice to the specified index,
 252// and clears the elements beyond that index to prevent memory leaks.
 253// The index must be less than or equal to the current length of the slice.
 254func (originalSlice *records) truncate(index int) {
 255	// Split the slice into the part to keep and the part to clear.
 256	recordsToKeep := (*originalSlice)[:index]
 257	recordsToClear := (*originalSlice)[index:]
 258
 259	// Update the original slice to only contain the records to keep.
 260	*originalSlice = recordsToKeep
 261
 262	// Clear the memory of the part that was truncated.
 263	for len(recordsToClear) > 0 {
 264		// Copy empty values from `emptyRecords` to the recordsToClear slice.
 265		// This effectively "clears" the memory by overwriting elements.
 266		numCleared := copy(recordsToClear, emptyRecords)
 267		recordsToClear = recordsToClear[numCleared:]
 268	}
 269}
 270
 271// Find determines the appropriate index at which a given Record should be inserted
 272// into the sorted records slice. If the Record already exists in the slice,
 273// the method returns its index and sets found to true.
 274//
 275// Parameters:
 276// - record: The Record to search for within the records slice.
 277//
 278// Returns:
 279// - insertIndex: The index at which the Record should be inserted.
 280// - found: A boolean indicating whether the Record already exists in the slice.
 281func (recordsSlice records) find(record Record) (insertIndex int, found bool) {
 282	totalRecords := len(recordsSlice)
 283
 284	// Perform a binary search to find the insertion point for the record
 285	insertionPoint := sort.Search(totalRecords, func(currentIndex int) bool {
 286		return record.Less(recordsSlice[currentIndex])
 287	})
 288
 289	if insertionPoint > 0 {
 290		previousRecord := recordsSlice[insertionPoint-1]
 291
 292		if !previousRecord.Less(record) {
 293			return insertionPoint - 1, true
 294		}
 295	}
 296
 297	return insertionPoint, false
 298}
 299
 300// insertAt inserts a value into the given index, pushing all subsequent values
 301// forward.
 302func (childSlice *children) insertAt(index int, n *node) {
 303	originalLength := len(*childSlice)
 304
 305	// Extend the slice by one element
 306	*childSlice = append(*childSlice, nil)
 307
 308	// Move elements from the end to avoid overwriting during the copy
 309	if index < originalLength {
 310		for i := originalLength; i > index; i-- {
 311			(*childSlice)[i] = (*childSlice)[i-1]
 312		}
 313	}
 314
 315	// Insert the new record
 316	(*childSlice)[index] = n
 317}
 318
 319// removeAt removes a Record from the records slice at the specified index.
 320// It shifts subsequent records to fill the gap and returns the removed Record.
 321func (childSlicePointer *children) removeAt(index int) *node {
 322	childSlice := *childSlicePointer
 323	removedChild := childSlice[index]
 324	copy(childSlice[index:], childSlice[index+1:])
 325	childSlice[len(childSlice)-1] = nil
 326	*childSlicePointer = childSlice[:len(childSlice)-1]
 327
 328	return removedChild
 329}
 330
 331// Pop removes and returns the last Record from the records slice.
 332// It also clears the reference to the removed Record to aid garbage collection.
 333func (childSlicePointer *children) pop() *node {
 334	childSlice := *childSlicePointer
 335	lastIndex := len(childSlice) - 1
 336	removedChild := childSlice[lastIndex]
 337	childSlice[lastIndex] = nil
 338	*childSlicePointer = childSlice[:lastIndex]
 339	return removedChild
 340}
 341
 342// This slice is intended only as a supply of records for the truncate function
 343// that follows, and it should not be changed or altered.
 344var emptyChildren = make(children, 32)
 345
 346// truncate reduces the length of the slice to the specified index,
 347// and clears the elements beyond that index to prevent memory leaks.
 348// The index must be less than or equal to the current length of the slice.
 349func (originalSlice *children) truncate(index int) {
 350	// Split the slice into the part to keep and the part to clear.
 351	childrenToKeep := (*originalSlice)[:index]
 352	childrenToClear := (*originalSlice)[index:]
 353
 354	// Update the original slice to only contain the records to keep.
 355	*originalSlice = childrenToKeep
 356
 357	// Clear the memory of the part that was truncated.
 358	for len(childrenToClear) > 0 {
 359		// Copy empty values from `emptyChildren` to the recordsToClear slice.
 360		// This effectively "clears" the memory by overwriting elements.
 361		numCleared := copy(childrenToClear, emptyChildren)
 362
 363		// Slice recordsToClear to exclude the elements that were just cleared.
 364		childrenToClear = childrenToClear[numCleared:]
 365	}
 366}
 367
 368// mutableFor creates a mutable copy of the node if the current node does not
 369// already belong to the provided copy-on-write context (COW). If the node is
 370// already associated with the given COW context, it returns the current node.
 371//
 372// Parameters:
 373// - cowCtx: The copy-on-write context that should own the returned node.
 374//
 375// Returns:
 376// - A pointer to the mutable node associated with the given COW context.
 377//
 378// If the current node belongs to a different COW context, this function:
 379// - Allocates a new node using the provided context.
 380// - Copies the node’s records and children slices into the newly allocated node.
 381// - Returns the new node which is now owned by the given COW context.
 382func (n *node) mutableFor(cowCtx *copyOnWriteContext) *node {
 383	// If the current node is already owned by the provided context, return it as-is.
 384	if n.cowCtx == cowCtx {
 385		return n
 386	}
 387
 388	// Create a new node in the provided context.
 389	newNode := cowCtx.newNode()
 390
 391	// Copy the records from the current node into the new node.
 392	newNode.records = append(newNode.records[:0], n.records...)
 393
 394	// Copy the children from the current node into the new node.
 395	newNode.children = append(newNode.children[:0], n.children...)
 396
 397	return newNode
 398}
 399
 400// mutableChild ensures that the child node at the given index is mutable and
 401// associated with the same COW context as the parent node. If the child node
 402// belongs to a different context, a copy of the child is created and stored in the
 403// parent node.
 404//
 405// Parameters:
 406// - i: The index of the child node to be made mutable.
 407//
 408// Returns:
 409// - A pointer to the mutable child node.
 410func (n *node) mutableChild(i int) *node {
 411	// Ensure that the child at index `i` is mutable and belongs to the same context as the parent.
 412	mutableChildNode := n.children[i].mutableFor(n.cowCtx)
 413	// Update the child node reference in the current node to the mutable version.
 414	n.children[i] = mutableChildNode
 415	return mutableChildNode
 416}
 417
 418// split splits the given node at the given index.  The current node shrinks,
 419// and this function returns the record that existed at that index and a new node
 420// containing all records/children after it.
 421func (n *node) split(i int) (Record, *node) {
 422	record := n.records[i]
 423	next := n.cowCtx.newNode()
 424	next.records = append(next.records, n.records[i+1:]...)
 425	n.records.truncate(i)
 426	if len(n.children) > 0 {
 427		next.children = append(next.children, n.children[i+1:]...)
 428		n.children.truncate(i + 1)
 429	}
 430	return record, next
 431}
 432
 433// maybeSplitChild checks if a child should be split, and if so splits it.
 434// Returns whether or not a split occurred.
 435func (n *node) maybeSplitChild(i, maxRecords int) bool {
 436	if len(n.children[i].records) < maxRecords {
 437		return false
 438	}
 439	first := n.mutableChild(i)
 440	record, second := first.split(maxRecords / 2)
 441	n.records.insertAt(i, record)
 442	n.children.insertAt(i+1, second)
 443	return true
 444}
 445
 446// insert adds a record to the subtree rooted at the current node, ensuring that no node in the subtree
 447// exceeds the maximum number of allowed records (`maxRecords`). If an equivalent record is already present,
 448// it replaces the existing one and returns it; otherwise, it returns nil.
 449//
 450// Parameters:
 451// - record: The record to be inserted.
 452// - maxRecords: The maximum number of records allowed per node.
 453//
 454// Returns:
 455// - The record that was replaced if an equivalent record already existed, otherwise nil.
 456func (n *node) insert(record Record, maxRecords int) Record {
 457	// Find the position where the new record should be inserted and check if an equivalent record already exists.
 458	insertionIndex, recordExists := n.records.find(record)
 459
 460	if recordExists {
 461		// If an equivalent record is found, replace it and return the old record.
 462		existingRecord := n.records[insertionIndex]
 463		n.records[insertionIndex] = record
 464		return existingRecord
 465	}
 466
 467	// If the current node is a leaf (has no children), insert the new record at the calculated index.
 468	if len(n.children) == 0 {
 469		n.records.insertAt(insertionIndex, record)
 470		return nil
 471	}
 472
 473	// Check if the child node at the insertion index needs to be split due to exceeding maxRecords.
 474	if n.maybeSplitChild(insertionIndex, maxRecords) {
 475		// If a split occurred, compare the new record with the record moved up to the current node.
 476		splitRecord := n.records[insertionIndex]
 477		switch {
 478		case record.Less(splitRecord):
 479			// The new record belongs to the first (left) split node; no change to insertion index.
 480		case splitRecord.Less(record):
 481			// The new record belongs to the second (right) split node; move the insertion index to the next position.
 482			insertionIndex++
 483		default:
 484			// If the record is equivalent to the split record, replace it and return the old record.
 485			existingRecord := n.records[insertionIndex]
 486			n.records[insertionIndex] = record
 487			return existingRecord
 488		}
 489	}
 490
 491	// Recursively insert the record into the appropriate child node, now guaranteed to have space.
 492	return n.mutableChild(insertionIndex).insert(record, maxRecords)
 493}
 494
 495// get finds the given key in the subtree and returns it.
 496func (n *node) get(key Record) Record {
 497	i, found := n.records.find(key)
 498	if found {
 499		return n.records[i]
 500	} else if len(n.children) > 0 {
 501		return n.children[i].get(key)
 502	}
 503	return nil
 504}
 505
 506// min returns the first record in the subtree.
 507func min(n *node) Record {
 508	if n == nil {
 509		return nil
 510	}
 511	for len(n.children) > 0 {
 512		n = n.children[0]
 513	}
 514	if len(n.records) == 0 {
 515		return nil
 516	}
 517	return n.records[0]
 518}
 519
 520// max returns the last record in the subtree.
 521func max(n *node) Record {
 522	if n == nil {
 523		return nil
 524	}
 525	for len(n.children) > 0 {
 526		n = n.children[len(n.children)-1]
 527	}
 528	if len(n.records) == 0 {
 529		return nil
 530	}
 531	return n.records[len(n.records)-1]
 532}
 533
 534// toRemove details what record to remove in a node.remove call.
 535type toRemove int
 536
 537const (
 538	removeRecord toRemove = iota // removes the given record
 539	removeMin                    // removes smallest record in the subtree
 540	removeMax                    // removes largest record in the subtree
 541)
 542
 543// remove removes a record from the subtree rooted at the current node.
 544//
 545// Parameters:
 546// - record: The record to be removed (can be nil when the removal type indicates min or max).
 547// - minRecords: The minimum number of records a node should have after removal.
 548// - typ: The type of removal operation to perform (removeMin, removeMax, or removeRecord).
 549//
 550// Returns:
 551// - The record that was removed, or nil if no such record was found.
 552func (n *node) remove(record Record, minRecords int, removalType toRemove) Record {
 553	var targetIndex int
 554	var recordFound bool
 555
 556	// Determine the index of the record to remove based on the removal type.
 557	switch removalType {
 558	case removeMax:
 559		// If this node is a leaf, remove and return the last record.
 560		if len(n.children) == 0 {
 561			return n.records.pop()
 562		}
 563		targetIndex = len(n.records) // The last record index for removing max.
 564
 565	case removeMin:
 566		// If this node is a leaf, remove and return the first record.
 567		if len(n.children) == 0 {
 568			return n.records.removeAt(0)
 569		}
 570		targetIndex = 0 // The first record index for removing min.
 571
 572	case removeRecord:
 573		// Locate the index of the record to be removed.
 574		targetIndex, recordFound = n.records.find(record)
 575		if len(n.children) == 0 {
 576			if recordFound {
 577				return n.records.removeAt(targetIndex)
 578			}
 579			return nil // The record was not found in the leaf node.
 580		}
 581
 582	default:
 583		panic("invalid removal type")
 584	}
 585
 586	// If the current node has children, handle the removal recursively.
 587	if len(n.children[targetIndex].records) <= minRecords {
 588		// If the target child node has too few records, grow it before proceeding with removal.
 589		return n.growChildAndRemove(targetIndex, record, minRecords, removalType)
 590	}
 591
 592	// Get a mutable reference to the child node at the target index.
 593	targetChild := n.mutableChild(targetIndex)
 594
 595	// If the record to be removed was found in the current node:
 596	if recordFound {
 597		// Replace the current record with its predecessor from the child node, and return the removed record.
 598		replacedRecord := n.records[targetIndex]
 599		n.records[targetIndex] = targetChild.remove(nil, minRecords, removeMax)
 600		return replacedRecord
 601	}
 602
 603	// Recursively remove the record from the child node.
 604	return targetChild.remove(record, minRecords, removalType)
 605}
 606
 607// growChildAndRemove grows child 'i' to make sure it's possible to remove an
 608// record from it while keeping it at minRecords, then calls remove to actually
 609// remove it.
 610//
 611// Most documentation says we have to do two sets of special casing:
 612//  1. record is in this node
 613//  2. record is in child
 614//
 615// In both cases, we need to handle the two subcases:
 616//
 617//	A) node has enough values that it can spare one
 618//	B) node doesn't have enough values
 619//
 620// For the latter, we have to check:
 621//
 622//	a) left sibling has node to spare
 623//	b) right sibling has node to spare
 624//	c) we must merge
 625//
 626// To simplify our code here, we handle cases #1 and #2 the same:
 627// If a node doesn't have enough records, we make sure it does (using a,b,c).
 628// We then simply redo our remove call, and the second time (regardless of
 629// whether we're in case 1 or 2), we'll have enough records and can guarantee
 630// that we hit case A.
 631func (n *node) growChildAndRemove(i int, record Record, minRecords int, typ toRemove) Record {
 632	if i > 0 && len(n.children[i-1].records) > minRecords {
 633		// Steal from left child
 634		child := n.mutableChild(i)
 635		stealFrom := n.mutableChild(i - 1)
 636		stolenRecord := stealFrom.records.pop()
 637		child.records.insertAt(0, n.records[i-1])
 638		n.records[i-1] = stolenRecord
 639		if len(stealFrom.children) > 0 {
 640			child.children.insertAt(0, stealFrom.children.pop())
 641		}
 642	} else if i < len(n.records) && len(n.children[i+1].records) > minRecords {
 643		// steal from right child
 644		child := n.mutableChild(i)
 645		stealFrom := n.mutableChild(i + 1)
 646		stolenRecord := stealFrom.records.removeAt(0)
 647		child.records = append(child.records, n.records[i])
 648		n.records[i] = stolenRecord
 649		if len(stealFrom.children) > 0 {
 650			child.children = append(child.children, stealFrom.children.removeAt(0))
 651		}
 652	} else {
 653		if i >= len(n.records) {
 654			i--
 655		}
 656		child := n.mutableChild(i)
 657		// merge with right child
 658		mergeRecord := n.records.removeAt(i)
 659		mergeChild := n.children.removeAt(i + 1).mutableFor(n.cowCtx)
 660		child.records = append(child.records, mergeRecord)
 661		child.records = append(child.records, mergeChild.records...)
 662		child.children = append(child.children, mergeChild.children...)
 663		n.cowCtx.freeNode(mergeChild)
 664	}
 665	return n.remove(record, minRecords, typ)
 666}
 667
 668type direction int
 669
 670const (
 671	descend = direction(-1)
 672	ascend  = direction(+1)
 673)
 674
 675// iterate provides a simple method for iterating over elements in the tree.
 676//
 677// When ascending, the 'start' should be less than 'stop' and when descending,
 678// the 'start' should be greater than 'stop'. Setting 'includeStart' to true
 679// will force the iterator to include the first record when it equals 'start',
 680// thus creating a "greaterOrEqual" or "lessThanEqual" rather than just a
 681// "greaterThan" or "lessThan" queries.
 682func (n *node) iterate(dir direction, start, stop Record, includeStart bool, hit bool, iter RecordIterator) (bool, bool) {
 683	var ok, found bool
 684	var index int
 685	switch dir {
 686	case ascend:
 687		if start != nil {
 688			index, _ = n.records.find(start)
 689		}
 690		for i := index; i < len(n.records); i++ {
 691			if len(n.children) > 0 {
 692				if hit, ok = n.children[i].iterate(dir, start, stop, includeStart, hit, iter); !ok {
 693					return hit, false
 694				}
 695			}
 696			if !includeStart && !hit && start != nil && !start.Less(n.records[i]) {
 697				hit = true
 698				continue
 699			}
 700			hit = true
 701			if stop != nil && !n.records[i].Less(stop) {
 702				return hit, false
 703			}
 704			if !iter(n.records[i]) {
 705				return hit, false
 706			}
 707		}
 708		if len(n.children) > 0 {
 709			if hit, ok = n.children[len(n.children)-1].iterate(dir, start, stop, includeStart, hit, iter); !ok {
 710				return hit, false
 711			}
 712		}
 713	case descend:
 714		if start != nil {
 715			index, found = n.records.find(start)
 716			if !found {
 717				index = index - 1
 718			}
 719		} else {
 720			index = len(n.records) - 1
 721		}
 722		for i := index; i >= 0; i-- {
 723			if start != nil && !n.records[i].Less(start) {
 724				if !includeStart || hit || start.Less(n.records[i]) {
 725					continue
 726				}
 727			}
 728			if len(n.children) > 0 {
 729				if hit, ok = n.children[i+1].iterate(dir, start, stop, includeStart, hit, iter); !ok {
 730					return hit, false
 731				}
 732			}
 733			if stop != nil && !stop.Less(n.records[i]) {
 734				return hit, false //	continue
 735			}
 736			hit = true
 737			if !iter(n.records[i]) {
 738				return hit, false
 739			}
 740		}
 741		if len(n.children) > 0 {
 742			if hit, ok = n.children[0].iterate(dir, start, stop, includeStart, hit, iter); !ok {
 743				return hit, false
 744			}
 745		}
 746	}
 747	return hit, true
 748}
 749
 750func (tree *BTree) Iterate(dir direction, start, stop Record, includeStart bool, hit bool, iter RecordIterator) (bool, bool) {
 751	return tree.root.iterate(dir, start, stop, includeStart, hit, iter)
 752}
 753
 754// Clone creates a new BTree instance that shares the current tree's structure using a copy-on-write (COW) approach.
 755//
 756// How Cloning Works:
 757//   - The cloned tree (`clonedTree`) shares the current tree’s nodes in a read-only state. This means that no additional memory
 758//     is allocated for shared nodes, and read operations on the cloned tree are as fast as on the original tree.
 759//   - When either the original tree (`t`) or the cloned tree (`clonedTree`) needs to perform a write operation (such as an insert, delete, etc.),
 760//     a new copy of the affected nodes is created on-demand. This ensures that modifications to one tree do not affect the other.
 761//
 762// Performance Implications:
 763//   - **Clone Creation:** The creation of a clone is inexpensive since it only involves copying references to the original tree's nodes
 764//     and creating new copy-on-write contexts.
 765//   - **Read Operations:** Reading from either the original tree or the cloned tree has no additional performance overhead compared to the original tree.
 766//   - **Write Operations:** The first write operation on either tree may experience a slight slow-down due to the allocation of new nodes,
 767//     but subsequent write operations will perform at the same speed as if the tree were not cloned.
 768//
 769// Returns:
 770// - A new BTree instance (`clonedTree`) that shares the original tree's structure.
 771func (t *BTree) Clone() *BTree {
 772	// Create two independent copy-on-write contexts, one for the original tree (`t`) and one for the cloned tree.
 773	originalContext := *t.cowCtx
 774	clonedContext := *t.cowCtx
 775
 776	// Create a shallow copy of the current tree, which will be the new cloned tree.
 777	clonedTree := *t
 778
 779	// Assign the new contexts to their respective trees.
 780	t.cowCtx = &originalContext
 781	clonedTree.cowCtx = &clonedContext
 782
 783	return &clonedTree
 784}
 785
 786// maxRecords returns the max number of records to allow per node.
 787func (t *BTree) maxRecords() int {
 788	return t.degree*2 - 1
 789}
 790
 791// minRecords returns the min number of records to allow per node (ignored for the
 792// root node).
 793func (t *BTree) minRecords() int {
 794	return t.degree - 1
 795}
 796
 797func (c *copyOnWriteContext) newNode() (n *node) {
 798	n = c.nodes.newNode()
 799	n.cowCtx = c
 800	return
 801}
 802
 803type freeType int
 804
 805const (
 806	ftFreelistFull freeType = iota // node was freed (available for GC, not stored in nodes)
 807	ftStored                       // node was stored in the nodes for later use
 808	ftNotOwned                     // node was ignored by COW, since it's owned by another one
 809)
 810
 811// freeNode frees a node within a given COW context, if it's owned by that
 812// context.  It returns what happened to the node (see freeType const
 813// documentation).
 814func (c *copyOnWriteContext) freeNode(n *node) freeType {
 815	if n.cowCtx == c {
 816		// clear to allow GC
 817		n.records.truncate(0)
 818		n.children.truncate(0)
 819		n.cowCtx = nil
 820		if c.nodes.freeNode(n) {
 821			return ftStored
 822		} else {
 823			return ftFreelistFull
 824		}
 825	} else {
 826		return ftNotOwned
 827	}
 828}
 829
 830// Insert adds the given record to the B-tree. If a record already exists in the tree with the same value,
 831// it is replaced, and the old record is returned. Otherwise, it returns nil.
 832//
 833// Notes:
 834// - The function panics if a nil record is provided as input.
 835// - If the root node is empty, a new root node is created and the record is inserted.
 836//
 837// Parameters:
 838// - record: The record to be inserted into the B-tree.
 839//
 840// Returns:
 841// - The replaced record if an equivalent record already exists, or nil if no replacement occurred.
 842func (t *BTree) Insert(record Record) Record {
 843	if record == nil {
 844		panic("nil record cannot be added to BTree")
 845	}
 846
 847	// If the tree is empty (no root), create a new root node and insert the record.
 848	if t.root == nil {
 849		t.root = t.cowCtx.newNode()
 850		t.root.records = append(t.root.records, record)
 851		t.length++
 852		return nil
 853	}
 854
 855	// Ensure that the root node is mutable (associated with the current tree's copy-on-write context).
 856	t.root = t.root.mutableFor(t.cowCtx)
 857
 858	// If the root node is full (contains the maximum number of records), split the root.
 859	if len(t.root.records) >= t.maxRecords() {
 860		// Split the root node, promoting the middle record and creating a new child node.
 861		middleRecord, newChildNode := t.root.split(t.maxRecords() / 2)
 862
 863		// Create a new root node to hold the promoted middle record.
 864		oldRoot := t.root
 865		t.root = t.cowCtx.newNode()
 866		t.root.records = append(t.root.records, middleRecord)
 867		t.root.children = append(t.root.children, oldRoot, newChildNode)
 868	}
 869
 870	// Insert the new record into the subtree rooted at the current root node.
 871	replacedRecord := t.root.insert(record, t.maxRecords())
 872
 873	// If no record was replaced, increase the tree's length.
 874	if replacedRecord == nil {
 875		t.length++
 876	}
 877
 878	return replacedRecord
 879}
 880
 881// Delete removes an record equal to the passed in record from the tree, returning
 882// it.  If no such record exists, returns nil.
 883func (t *BTree) Delete(record Record) Record {
 884	return t.deleteRecord(record, removeRecord)
 885}
 886
 887// DeleteMin removes the smallest record in the tree and returns it.
 888// If no such record exists, returns nil.
 889func (t *BTree) DeleteMin() Record {
 890	return t.deleteRecord(nil, removeMin)
 891}
 892
 893// Shift is identical to DeleteMin. If the tree is thought of as an ordered list, then Shift()
 894// removes the element at the start of the list, the smallest element, and returns it.
 895func (t *BTree) Shift() Record {
 896	return t.deleteRecord(nil, removeMin)
 897}
 898
 899// DeleteMax removes the largest record in the tree and returns it.
 900// If no such record exists, returns nil.
 901func (t *BTree) DeleteMax() Record {
 902	return t.deleteRecord(nil, removeMax)
 903}
 904
 905// Pop is identical to DeleteMax. If the tree is thought of as an ordered list, then Shift()
 906// removes the element at the end of the list, the largest element, and returns it.
 907func (t *BTree) Pop() Record {
 908	return t.deleteRecord(nil, removeMax)
 909}
 910
 911// deleteRecord removes a record from the B-tree based on the specified removal type (removeMin, removeMax, or removeRecord).
 912// It returns the removed record if it was found, or nil if no matching record was found.
 913//
 914// Parameters:
 915// - record: The record to be removed (can be nil if the removal type indicates min or max).
 916// - removalType: The type of removal operation to perform (removeMin, removeMax, or removeRecord).
 917//
 918// Returns:
 919// - The removed record if it existed in the tree, or nil if it was not found.
 920func (t *BTree) deleteRecord(record Record, removalType toRemove) Record {
 921	// If the tree is empty or the root has no records, return nil.
 922	if t.root == nil || len(t.root.records) == 0 {
 923		return nil
 924	}
 925
 926	// Ensure the root node is mutable (associated with the tree's copy-on-write context).
 927	t.root = t.root.mutableFor(t.cowCtx)
 928
 929	// Attempt to remove the specified record from the root node.
 930	removedRecord := t.root.remove(record, t.minRecords(), removalType)
 931
 932	// Check if the root node has become empty but still has children.
 933	// In this case, the tree height should be reduced, making the first child the new root.
 934	if len(t.root.records) == 0 && len(t.root.children) > 0 {
 935		oldRoot := t.root
 936		t.root = t.root.children[0]
 937		// Free the old root node, as it is no longer needed.
 938		t.cowCtx.freeNode(oldRoot)
 939	}
 940
 941	// If a record was successfully removed, decrease the tree's length.
 942	if removedRecord != nil {
 943		t.length--
 944	}
 945
 946	return removedRecord
 947}
 948
 949// AscendRange calls the iterator for every value in the tree within the range
 950// [greaterOrEqual, lessThan), until iterator returns false.
 951func (t *BTree) AscendRange(greaterOrEqual, lessThan Record, iterator RecordIterator) {
 952	if t.root == nil {
 953		return
 954	}
 955	t.root.iterate(ascend, greaterOrEqual, lessThan, true, false, iterator)
 956}
 957
 958// AscendLessThan calls the iterator for every value in the tree within the range
 959// [first, pivot), until iterator returns false.
 960func (t *BTree) AscendLessThan(pivot Record, iterator RecordIterator) {
 961	if t.root == nil {
 962		return
 963	}
 964	t.root.iterate(ascend, nil, pivot, false, false, iterator)
 965}
 966
 967// AscendGreaterOrEqual calls the iterator for every value in the tree within
 968// the range [pivot, last], until iterator returns false.
 969func (t *BTree) AscendGreaterOrEqual(pivot Record, iterator RecordIterator) {
 970	if t.root == nil {
 971		return
 972	}
 973	t.root.iterate(ascend, pivot, nil, true, false, iterator)
 974}
 975
 976// Ascend calls the iterator for every value in the tree within the range
 977// [first, last], until iterator returns false.
 978func (t *BTree) Ascend(iterator RecordIterator) {
 979	if t.root == nil {
 980		return
 981	}
 982	t.root.iterate(ascend, nil, nil, false, false, iterator)
 983}
 984
 985// DescendRange calls the iterator for every value in the tree within the range
 986// [lessOrEqual, greaterThan), until iterator returns false.
 987func (t *BTree) DescendRange(lessOrEqual, greaterThan Record, iterator RecordIterator) {
 988	if t.root == nil {
 989		return
 990	}
 991	t.root.iterate(descend, lessOrEqual, greaterThan, true, false, iterator)
 992}
 993
 994// DescendLessOrEqual calls the iterator for every value in the tree within the range
 995// [pivot, first], until iterator returns false.
 996func (t *BTree) DescendLessOrEqual(pivot Record, iterator RecordIterator) {
 997	if t.root == nil {
 998		return
 999	}
1000	t.root.iterate(descend, pivot, nil, true, false, iterator)
1001}
1002
1003// DescendGreaterThan calls the iterator for every value in the tree within
1004// the range [last, pivot), until iterator returns false.
1005func (t *BTree) DescendGreaterThan(pivot Record, iterator RecordIterator) {
1006	if t.root == nil {
1007		return
1008	}
1009	t.root.iterate(descend, nil, pivot, false, false, iterator)
1010}
1011
1012// Descend calls the iterator for every value in the tree within the range
1013// [last, first], until iterator returns false.
1014func (t *BTree) Descend(iterator RecordIterator) {
1015	if t.root == nil {
1016		return
1017	}
1018	t.root.iterate(descend, nil, nil, false, false, iterator)
1019}
1020
1021// Get looks for the key record in the tree, returning it.  It returns nil if
1022// unable to find that record.
1023func (t *BTree) Get(key Record) Record {
1024	if t.root == nil {
1025		return nil
1026	}
1027	return t.root.get(key)
1028}
1029
1030// Min returns the smallest record in the tree, or nil if the tree is empty.
1031func (t *BTree) Min() Record {
1032	return min(t.root)
1033}
1034
1035// Max returns the largest record in the tree, or nil if the tree is empty.
1036func (t *BTree) Max() Record {
1037	return max(t.root)
1038}
1039
1040// Has returns true if the given key is in the tree.
1041func (t *BTree) Has(key Record) bool {
1042	return t.Get(key) != nil
1043}
1044
1045// Len returns the number of records currently in the tree.
1046func (t *BTree) Len() int {
1047	return t.length
1048}
1049
1050// Clear removes all elements from the B-tree.
1051//
1052// Parameters:
1053// - addNodesToFreelist:
1054//     - If true, the tree's nodes are added to the freelist during the clearing process,
1055//       up to the freelist's capacity.
1056//     - If false, the root node is simply dereferenced, allowing Go's garbage collector
1057//       to reclaim the memory.
1058//
1059// Benefits:
1060// - **Performance:**
1061//     - Significantly faster than deleting each element individually, as it avoids the overhead
1062//       of searching and updating the tree structure for each deletion.
1063//     - More efficient than creating a new tree, since it reuses existing nodes by adding them
1064//       to the freelist instead of discarding them to the garbage collector.
1065//
1066// Time Complexity:
1067// - **O(1):**
1068//     - When `addNodesToFreelist` is false.
1069//     - When `addNodesToFreelist` is true but the freelist is already full.
1070// - **O(freelist size):**
1071//     - When adding nodes to the freelist up to its capacity.
1072// - **O(tree size):**
1073//     - When iterating through all nodes to add to the freelist, but none can be added due to
1074//       ownership by another tree.
1075
1076func (tree *BTree) Clear(addNodesToFreelist bool) {
1077	if tree.root != nil && addNodesToFreelist {
1078		tree.root.reset(tree.cowCtx)
1079	}
1080	tree.root = nil
1081	tree.length = 0
1082}
1083
1084// reset adds all nodes in the current subtree to the freelist.
1085//
1086// The function operates recursively:
1087// - It first attempts to reset all child nodes.
1088// - If the freelist becomes full at any point, the process stops immediately.
1089//
1090// Parameters:
1091// - copyOnWriteCtx: The copy-on-write context managing the freelist.
1092//
1093// Returns:
1094// - true: Indicates that the parent node should continue attempting to reset its nodes.
1095// - false: Indicates that the freelist is full and no further nodes should be added.
1096//
1097// Usage:
1098// This method is called during the `Clear` operation of the B-tree to efficiently reuse
1099// nodes by adding them to the freelist, thereby avoiding unnecessary allocations and reducing
1100// garbage collection overhead.
1101func (currentNode *node) reset(copyOnWriteCtx *copyOnWriteContext) bool {
1102	// Iterate through each child node and attempt to reset it.
1103	for _, childNode := range currentNode.children {
1104		// If any child reset operation signals that the freelist is full, stop the process.
1105		if !childNode.reset(copyOnWriteCtx) {
1106			return false
1107		}
1108	}
1109
1110	// Attempt to add the current node to the freelist.
1111	// If the freelist is full after this operation, indicate to the parent to stop.
1112	freelistStatus := copyOnWriteCtx.freeNode(currentNode)
1113	return freelistStatus != ftFreelistFull
1114}