diff.gno
5.51 Kb ยท 217 lines
1// The diff package implements the Myers diff algorithm to compute the edit distance
2// and generate a minimal edit script between two strings.
3//
4// Edit distance, also known as Levenshtein distance, is a measure of the similarity
5// between two strings. It is defined as the minimum number of single-character edits (insertions,
6// deletions, or substitutions) required to change one string into the other.
7package diff
8
9import (
10 "strings"
11)
12
13// EditType represents the type of edit operation in a diff.
14type EditType uint8
15
16const (
17 // EditKeep indicates that a character is unchanged in both strings.
18 EditKeep EditType = iota
19
20 // EditInsert indicates that a character was inserted in the new string.
21 EditInsert
22
23 // EditDelete indicates that a character was deleted from the old string.
24 EditDelete
25)
26
27// Edit represent a single edit operation in a diff.
28type Edit struct {
29 // Type is the kind of edit operation.
30 Type EditType
31
32 // Char is the character involved in the edit operation.
33 Char rune
34}
35
36// MyersDiff computes the difference between two strings using Myers' diff algorithm.
37// It returns a slice of Edit operations that transform the old string into the new string.
38// This implementation finds the shortest edit script (SES) that represents the minimal
39// set of operations to transform one string into the other.
40//
41// The function handles both ASCII and non-ASCII characters correctly.
42//
43// Time complexity: O((N+M)D), where N and M are the lengths of the input strings,
44// and D is the size of the minimum edit script.
45//
46// Space complexity: O((N+M)D)
47//
48// In the worst case, where the strings are completely different, D can be as large as N+M,
49// leading to a time and space complexity of O((N+M)^2). However, for strings with many
50// common substrings, the performance is much better, often closer to O(N+M).
51//
52// Parameters:
53// - old: the original string.
54// - new: the modified string.
55//
56// Returns:
57// - A slice of Edit operations representing the minimum difference between the two strings.
58func MyersDiff(old, new string) []Edit {
59 oldRunes, newRunes := []rune(old), []rune(new)
60 n, m := len(oldRunes), len(newRunes)
61
62 if n == 0 && m == 0 {
63 return []Edit{}
64 }
65
66 // old is empty
67 if n == 0 {
68 edits := make([]Edit, m)
69 for i, r := range newRunes {
70 edits[i] = Edit{Type: EditInsert, Char: r}
71 }
72 return edits
73 }
74
75 if m == 0 {
76 edits := make([]Edit, n)
77 for i, r := range oldRunes {
78 edits[i] = Edit{Type: EditDelete, Char: r}
79 }
80 return edits
81 }
82
83 max := n + m
84 v := make([]int, 2*max+1)
85 var trace [][]int
86search:
87 for d := 0; d <= max; d++ {
88 // iterate through diagonals
89 for k := -d; k <= d; k += 2 {
90 var x int
91 if k == -d || (k != d && v[max+k-1] < v[max+k+1]) {
92 x = v[max+k+1] // move down
93 } else {
94 x = v[max+k-1] + 1 // move right
95 }
96 y := x - k
97
98 // extend the path as far as possible with matching characters
99 for x < n && y < m && oldRunes[x] == newRunes[y] {
100 x++
101 y++
102 }
103
104 v[max+k] = x
105
106 // check if we've reached the end of both strings
107 if x == n && y == m {
108 trace = append(trace, append([]int(nil), v...))
109 break search
110 }
111 }
112 trace = append(trace, append([]int(nil), v...))
113 }
114
115 // backtrack to construct the edit script
116 edits := make([]Edit, 0, n+m)
117 x, y := n, m
118 for d := len(trace) - 1; d >= 0; d-- {
119 vPrev := trace[d]
120 k := x - y
121 var prevK int
122 if k == -d || (k != d && vPrev[max+k-1] < vPrev[max+k+1]) {
123 prevK = k + 1
124 } else {
125 prevK = k - 1
126 }
127 prevX := vPrev[max+prevK]
128 prevY := prevX - prevK
129
130 // add keep edits for matching characters
131 for x > prevX && y > prevY {
132 if x > 0 && y > 0 {
133 edits = append([]Edit{{Type: EditKeep, Char: oldRunes[x-1]}}, edits...)
134 }
135 x--
136 y--
137 }
138 if y > prevY {
139 if y > 0 {
140 edits = append([]Edit{{Type: EditInsert, Char: newRunes[y-1]}}, edits...)
141 }
142 y--
143 } else if x > prevX {
144 if x > 0 {
145 edits = append([]Edit{{Type: EditDelete, Char: oldRunes[x-1]}}, edits...)
146 }
147 x--
148 }
149 }
150
151 return edits
152}
153
154// Format converts a slice of Edit operations into a human-readable string representation.
155// It groups consecutive edits of the same type and formats them as follows:
156// - Unchanged characters are left as-is
157// - Inserted characters are wrapped in [+...]
158// - Deleted characters are wrapped in [-...]
159//
160// This function is useful for visualizing the differences between two strings
161// in a compact and intuitive format.
162//
163// Parameters:
164// - edits: A slice of Edit operations, typically produced by MyersDiff
165//
166// Returns:
167// - A formatted string representing the diff
168//
169// Example output:
170//
171// For the diff between "abcd" and "acbd", the output might be:
172// "a[-b]c[+b]d"
173//
174// Note:
175//
176// The function assumes that the input slice of edits is in the correct order.
177// An empty input slice will result in an empty string.
178func Format(edits []Edit) string {
179 if len(edits) == 0 {
180 return ""
181 }
182
183 var (
184 result strings.Builder
185 currentType EditType
186 currentChars strings.Builder
187 )
188
189 flushCurrent := func() {
190 if currentChars.Len() > 0 {
191 switch currentType {
192 case EditKeep:
193 result.WriteString(currentChars.String())
194 case EditInsert:
195 result.WriteString("[+")
196 result.WriteString(currentChars.String())
197 result.WriteByte(']')
198 case EditDelete:
199 result.WriteString("[-")
200 result.WriteString(currentChars.String())
201 result.WriteByte(']')
202 }
203 currentChars.Reset()
204 }
205 }
206
207 for _, edit := range edits {
208 if edit.Type != currentType {
209 flushCurrent()
210 currentType = edit.Type
211 }
212 currentChars.WriteRune(edit.Char)
213 }
214 flushCurrent()
215
216 return result.String()
217}