forked from creachadair/mds
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmdiff.go
299 lines (265 loc) · 9.32 KB
/
mdiff.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
// Package mdiff supports creating textual diffs.
//
// To create a diff between two slices of strings, call:
//
// diff := mdiff.New(lhs, rhs)
//
// The diff.Chunks field contains the disjoint chunks of the input where edits
// have been applied. The complete edit sequence is in diff.Edits.
//
// By default, a diff does not include any context lines. To add up to n lines
// of context, call:
//
// diff.AddContext(n)
//
// This adds additional edits to the head and tail of each chunk containing the
// context lines, if any, that were found in the input. Adding context may
// cause chunks to overlap. To remove the overlap, call:
//
// diff.Unify()
//
// This modifies the diff in-place to merge adjacent and overlapping chunks, so
// that their contexts are not repeated.
//
// These operations can be chained to produce a (unified) diff with context:
//
// diff := mdiff.New(lhs, rhs).AddContext(3).Unify()
//
// # Output
//
// To write a diff in textual format, use one of the formatting functions. For
// example, use [Format] to write an old-style Unix diff output to stdout:
//
// mdiff.Format(os.Stdout, diff, nil)
//
// The [FormatContext] and [FormatUnified] functions allow rendering a diff in
// those formats instead. Use [FileInfo] to tell the formatter the names and
// timestamps to use for their file headers:
//
// mdiff.FormatUnified(os.Stdout, diff, &mdiff.FileInfo{
// Left: "dir/original.go",
// Right: "dir/patched.go",
// })
//
// If the options are omitted, the formatters defined by this package provide
// default placeholders. You can also implement your own function using the
// same signature. It is up to the implementation how to handle defaults.
//
// # Reading Patches
//
// To read patches formatted as text, use the [Read], [ReadUnified], and
// [ReadGitPatch] functions. These functions consume the contents of an
// [io.Reader] and return (on successe) one or more [Patch] values containing
// the diff chunks encoded in the patch. For example, to read a Unix diff:
//
// p, err := mdiff.Read(input)
//
// Unlike a [Diff], a patch does not contain the complete text of the input,
// nor any edit operations apart from those described in the patch itself.
//
// The [ReadGitPatch] function reads a concatenated sequence of [patches] in
// the format generated by Git commands like "git diff -p". Git metadata such
// as commit tags, headers, and so on, are discarded. Note also that this
// function does not support the "combined" Git patch format.
//
// [patches]: https://git-scm.com/docs/diff-format#generate_patch_text_with_p
package mdiff
import (
"slices"
"github.com/creachadair/mds/slice"
)
// Edit is an edit operation on strings. It is exported here so the caller
// does not need to import the slice package directly.
type Edit = slice.Edit[string]
// A Diff represents the difference between two slices of strings.
type Diff struct {
// The left and right inputs. These fields alias the slices passed to New.
Left, Right []string
// The diff chunks, in order. If the inputs are identical, Chunks is empty.
Chunks []*Chunk
// The sequence of edits, in order, applied to transform Left into Right.
Edits []Edit
}
// New constructs a Diff from the specified string slices.
// A diff constructed by New has 0 lines of context.
func New(lhs, rhs []string) *Diff {
es := slice.EditScript(lhs, rhs)
out := []*Chunk{{LStart: 1, RStart: 1, LEnd: 1, REnd: 1}}
cur := out[0]
lcur, rcur := 1, 1
addl := func(n int) { lcur += n; cur.LEnd += n }
addr := func(n int) { rcur += n; cur.REnd += n }
for _, e := range es {
// If there is a gap after the previous chunk, start a new one, unless
// the previous chunk is empty in which case take it over.
if lcur > cur.LEnd || rcur > cur.REnd {
if cur.LEnd != cur.LStart || cur.REnd != cur.RStart {
cur = new(Chunk)
out = append(out, cur)
}
cur.LStart, cur.LEnd = lcur, lcur
cur.RStart, cur.REnd = rcur, rcur
}
switch e.Op {
case slice.OpDrop:
addl(len(e.X))
case slice.OpCopy:
addr(len(e.Y))
case slice.OpReplace:
addl(len(e.X))
addr(len(e.Y))
case slice.OpEmit:
// Don't count emitted lines against the chunk size,
// and don't append emits to the edit list.
lcur += len(e.X)
rcur += len(e.X)
continue
}
cur.Edits = append(cur.Edits, e)
}
// If the last chunk empty, remove it entirely.
if cur.LEnd == cur.LStart && cur.REnd == cur.RStart {
out = out[:len(out)-1]
}
return &Diff{Left: lhs, Right: rhs, Chunks: out, Edits: es}
}
// AddContext updates d so that each chunk has up to n lines of context before
// and after, to the extent possible. Context lines are those that are equal on
// both sides of the diff. AddContext returns d.
//
// Adding context may result in overlapping chunks. Call Unify to merge
// overlapping chunks.
func (d *Diff) AddContext(n int) *Diff {
if n <= 0 || len(d.Chunks) == 0 {
return d
}
// Gather lines of context, add Emit operations to each chunk corresponding
// to those lines, and update the line ranges.
for _, c := range d.Chunks {
pre, post := d.findContext(c, n)
if len(pre) != 0 {
c.Edits = append([]Edit{{Op: slice.OpEmit, X: pre}}, c.Edits...)
c.LStart -= len(pre)
c.RStart -= len(pre)
}
if len(post) != 0 {
c.Edits = append(c.Edits, Edit{Op: slice.OpEmit, X: post})
c.LEnd += len(post)
c.REnd += len(post)
}
}
return d
}
// Unify updates d in-place to merge chunks that adjoin or overlap. For a Diff
// returned by New, this is a no-op; however AddContext may cause chunks to
// abut or to overlap. Unify returns d.
//
// Unify updates the edits of any merged chunks, but does not modify the
// original edit sequence in d.Edits.
func (d *Diff) Unify() *Diff { d.Chunks = UnifyChunks(d.Chunks); return d }
// findContext returns slices of up to n strings before and after the specified
// chunk that are equal on the left and right sides of the diff. Either or
// both slices may be empty if there are no such lines.
func (d *Diff) findContext(c *Chunk, n int) (pre, post []string) {
lcur, rcur := c.LStart-1, c.RStart-1
lend, rend := c.LEnd-1, c.REnd-1
for i := 1; i <= n; i++ {
p, q := lcur-i, rcur-i
if p < 0 || q < 0 || d.Left[p] != d.Right[q] {
break
}
pre = append(pre, d.Left[p]) // they are equal, so pick one
}
slices.Reverse(pre) // we walked backward from the start
for i := 0; i < n; i++ {
p, q := lend+i, rend+i
if p >= len(d.Left) || q >= len(d.Right) || d.Left[p] != d.Right[q] {
break
}
post = append(post, d.Left[p])
}
return
}
// A Chunk is a contiguous region within a diff covered by one or more
// consecutive edit operations.
type Chunk struct {
// The edits applied within this chunk, in order.
Edits []Edit
// The starting and ending lines of this chunk in the left input.
// Lines are 1-based, and the range includes start but excludes end.
LStart, LEnd int
// The starting and ending lines of this chunk in the right input.
// Lines are 1-based and the range includes start but excludes end.
RStart, REnd int
}
// UnifyChunks modifies the chunks in cs to merge adjoining or overlapping
// chunks, and returns a slice of the modified chunks.
func UnifyChunks(cs []*Chunk) []*Chunk {
if len(cs) == 0 {
return nil
}
merged := []*Chunk{cs[0]}
for _, c := range cs[1:] {
last := slice.At(merged, -1)
// If c does not abut or overlap last, there is nothing to do.
if c.LStart > last.LEnd {
merged = append(merged, c)
continue
}
lap := last.LEnd - c.LStart
end, start := slice.PtrAt(last.Edits, -1), slice.PtrAt(c.Edits, 0)
// If the chunks strictly overlap, it means one at least chunk has a
// context edit that runs into the other's span (possibly both).
//
// Cut off the overlapping lines from the context edit, and if doing so
// results in an empty context, remove that edit from the chunk.
//
// Note that it is safe to modify the context edits here, as they were
// constructed explicitly by AddContext and do not share state with the
// original script edits.
if lap > 0 {
if end.Op == slice.OpEmit { // last has post-context
if lap >= len(end.X) { // remove the whole edit
last.Edits = last.Edits[:len(last.Edits)-1]
end = slice.PtrAt(last.Edits, -1)
} else {
end.X = end.X[:len(end.X)-lap] // drop the overlap
}
// Fix up the range.
last.LEnd -= lap
last.REnd -= lap
} else if start.Op == slice.OpEmit { // start has pre-context
if lap >= len(start.X) { // remove the whole edit
c.Edits = c.Edits[1:]
start = slice.PtrAt(c.Edits, 0)
} else {
start.X = start.X[lap:] // drop the overlap
}
// Fix up the range.
c.LStart += lap
c.RStart += lap
}
// Reaching here, the two must now abut properly.
if c.LStart < last.LEnd {
panic("diff: context merge did not work correctly")
}
}
// If both chunks have context edits at the boundary, combine them into a
// single edit at the end of last. Any overlap has already been fixed.
if end.Op == slice.OpEmit && start.Op == slice.OpEmit {
// Move the edited lines from the head of c, and adjust the ends.
end.X = append(end.X, start.X...)
last.LEnd += len(start.X)
last.REnd += len(start.X)
// Discard the edit from the head of c, and adjust the starts.
c.Edits = c.Edits[1:]
c.LStart += len(start.X)
c.RStart += len(start.X)
}
// Merge.
last.LEnd = c.LEnd
last.REnd = c.REnd
last.Edits = append(last.Edits, c.Edits...)
}
return merged
}