diff.gno

5.51 Kb ยท 217 lines
  1// The diff package implements the Myers diff algorithm to compute the edit distance
  2// and generate a minimal edit script between two strings.
  3//
  4// Edit distance, also known as Levenshtein distance, is a measure of the similarity
  5// between two strings. It is defined as the minimum number of single-character edits (insertions,
  6// deletions, or substitutions) required to change one string into the other.
  7package diff
  8
  9import (
 10	"strings"
 11)
 12
 13// EditType represents the type of edit operation in a diff.
 14type EditType uint8
 15
 16const (
 17	// EditKeep indicates that a character is unchanged in both strings.
 18	EditKeep EditType = iota
 19
 20	// EditInsert indicates that a character was inserted in the new string.
 21	EditInsert
 22
 23	// EditDelete indicates that a character was deleted from the old string.
 24	EditDelete
 25)
 26
 27// Edit represent a single edit operation in a diff.
 28type Edit struct {
 29	// Type is the kind of edit operation.
 30	Type EditType
 31
 32	// Char is the character involved in the edit operation.
 33	Char rune
 34}
 35
 36// MyersDiff computes the difference between two strings using Myers' diff algorithm.
 37// It returns a slice of Edit operations that transform the old string into the new string.
 38// This implementation finds the shortest edit script (SES) that represents the minimal
 39// set of operations to transform one string into the other.
 40//
 41// The function handles both ASCII and non-ASCII characters correctly.
 42//
 43// Time complexity: O((N+M)D), where N and M are the lengths of the input strings,
 44// and D is the size of the minimum edit script.
 45//
 46// Space complexity: O((N+M)D)
 47//
 48// In the worst case, where the strings are completely different, D can be as large as N+M,
 49// leading to a time and space complexity of O((N+M)^2). However, for strings with many
 50// common substrings, the performance is much better, often closer to O(N+M).
 51//
 52// Parameters:
 53//   - old: the original string.
 54//   - new: the modified string.
 55//
 56// Returns:
 57//   - A slice of Edit operations representing the minimum difference between the two strings.
 58func MyersDiff(old, new string) []Edit {
 59	oldRunes, newRunes := []rune(old), []rune(new)
 60	n, m := len(oldRunes), len(newRunes)
 61
 62	if n == 0 && m == 0 {
 63		return []Edit{}
 64	}
 65
 66	// old is empty
 67	if n == 0 {
 68		edits := make([]Edit, m)
 69		for i, r := range newRunes {
 70			edits[i] = Edit{Type: EditInsert, Char: r}
 71		}
 72		return edits
 73	}
 74
 75	if m == 0 {
 76		edits := make([]Edit, n)
 77		for i, r := range oldRunes {
 78			edits[i] = Edit{Type: EditDelete, Char: r}
 79		}
 80		return edits
 81	}
 82
 83	max := n + m
 84	v := make([]int, 2*max+1)
 85	var trace [][]int
 86search:
 87	for d := 0; d <= max; d++ {
 88		// iterate through diagonals
 89		for k := -d; k <= d; k += 2 {
 90			var x int
 91			if k == -d || (k != d && v[max+k-1] < v[max+k+1]) {
 92				x = v[max+k+1] // move down
 93			} else {
 94				x = v[max+k-1] + 1 // move right
 95			}
 96			y := x - k
 97
 98			// extend the path as far as possible with matching characters
 99			for x < n && y < m && oldRunes[x] == newRunes[y] {
100				x++
101				y++
102			}
103
104			v[max+k] = x
105
106			// check if we've reached the end of both strings
107			if x == n && y == m {
108				trace = append(trace, append([]int(nil), v...))
109				break search
110			}
111		}
112		trace = append(trace, append([]int(nil), v...))
113	}
114
115	// backtrack to construct the edit script
116	edits := make([]Edit, 0, n+m)
117	x, y := n, m
118	for d := len(trace) - 1; d >= 0; d-- {
119		vPrev := trace[d]
120		k := x - y
121		var prevK int
122		if k == -d || (k != d && vPrev[max+k-1] < vPrev[max+k+1]) {
123			prevK = k + 1
124		} else {
125			prevK = k - 1
126		}
127		prevX := vPrev[max+prevK]
128		prevY := prevX - prevK
129
130		// add keep edits for matching characters
131		for x > prevX && y > prevY {
132			if x > 0 && y > 0 {
133				edits = append([]Edit{{Type: EditKeep, Char: oldRunes[x-1]}}, edits...)
134			}
135			x--
136			y--
137		}
138		if y > prevY {
139			if y > 0 {
140				edits = append([]Edit{{Type: EditInsert, Char: newRunes[y-1]}}, edits...)
141			}
142			y--
143		} else if x > prevX {
144			if x > 0 {
145				edits = append([]Edit{{Type: EditDelete, Char: oldRunes[x-1]}}, edits...)
146			}
147			x--
148		}
149	}
150
151	return edits
152}
153
154// Format converts a slice of Edit operations into a human-readable string representation.
155// It groups consecutive edits of the same type and formats them as follows:
156//   - Unchanged characters are left as-is
157//   - Inserted characters are wrapped in [+...]
158//   - Deleted characters are wrapped in [-...]
159//
160// This function is useful for visualizing the differences between two strings
161// in a compact and intuitive format.
162//
163// Parameters:
164//   - edits: A slice of Edit operations, typically produced by MyersDiff
165//
166// Returns:
167//   - A formatted string representing the diff
168//
169// Example output:
170//
171//	For the diff between "abcd" and "acbd", the output might be:
172//	"a[-b]c[+b]d"
173//
174// Note:
175//
176//	The function assumes that the input slice of edits is in the correct order.
177//	An empty input slice will result in an empty string.
178func Format(edits []Edit) string {
179	if len(edits) == 0 {
180		return ""
181	}
182
183	var (
184		result       strings.Builder
185		currentType  EditType
186		currentChars strings.Builder
187	)
188
189	flushCurrent := func() {
190		if currentChars.Len() > 0 {
191			switch currentType {
192			case EditKeep:
193				result.WriteString(currentChars.String())
194			case EditInsert:
195				result.WriteString("[+")
196				result.WriteString(currentChars.String())
197				result.WriteByte(']')
198			case EditDelete:
199				result.WriteString("[-")
200				result.WriteString(currentChars.String())
201				result.WriteByte(']')
202			}
203			currentChars.Reset()
204		}
205	}
206
207	for _, edit := range edits {
208		if edit.Type != currentType {
209			flushCurrent()
210			currentType = edit.Type
211		}
212		currentChars.WriteRune(edit.Char)
213	}
214	flushCurrent()
215
216	return result.String()
217}