forked from creachadair/mds
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreader.go
392 lines (361 loc) · 10.1 KB
/
reader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
package mdiff
import (
"bufio"
"errors"
"fmt"
"io"
"strconv"
"strings"
"time"
"github.com/creachadair/mds/slice"
)
// A Patch is the parsed representation of a diff read from text format.
type Patch struct {
FileInfo *FileInfo // nil if no file header was present
Chunks []*Chunk
}
// ReadGitPatch reads a sequence of unified diff [patches] in the format
// produced by "git diff -p" with default settings. The commit metadata and
// header lines are ignored.
//
// [patches]: https://git-scm.com/docs/diff-format#generate_patch_text_with_p
func ReadGitPatch(r io.Reader) ([]*Patch, error) {
var out []*Patch
rd := &diffReader{br: bufio.NewReader(r)}
for {
// Look for the "diff --git ..." line.
if err := scanToPrefix(rd, "diff "); err == io.EOF {
if len(out) == 0 {
return nil, errors.New("no patches found")
}
return out, nil
}
// Skip headers until the "--- " patch header.
if err := scanToPrefix(rd, "--- "); err == io.EOF {
return nil, fmt.Errorf("line %d: missing patch header", rd.ln)
} else if err != nil {
return nil, fmt.Errorf("line %d: %w", rd.ln, err)
}
if err := readUnifiedHeader(rd); err != nil {
return nil, fmt.Errorf("line %d: read patch header: %w", rd.ln, err)
} else if rd.fileInfo == nil {
return nil, fmt.Errorf("line %d: incomplete patch header", rd.ln)
}
for {
err := readUnifiedChunk(rd)
if err == io.EOF || errors.Is(err, errUnexpectedPrefix) {
out = append(out, &Patch{Chunks: rd.chunks})
rd.chunks = nil
break
} else if err != nil {
return nil, err
}
// get more
}
// An unexpected prefix we will handle on the next iteration.
}
}
// ReadUnified reads a unified diff patch from r.
func ReadUnified(r io.Reader) (*Patch, error) {
rd := &diffReader{br: bufio.NewReader(r)}
if err := readUnified(rd); err != nil {
return nil, err
}
return &Patch{FileInfo: rd.fileInfo, Chunks: rd.chunks}, nil
}
// Read reads an old-style "normal" Unix diff patch from r.
func Read(r io.Reader) (*Patch, error) {
rd := &diffReader{br: bufio.NewReader(r)}
if err := readNormal(rd); err != nil {
return nil, err
}
return &Patch{Chunks: rd.chunks}, nil
}
// A diffReader provides common plumbing for reading a text diff. It keeps
// track of line numbers and one line of lookahead, and accumulates information
// about a file header, if one is present.
type diffReader struct {
br *bufio.Reader
ln int
saved *string
fileInfo *FileInfo
chunks []*Chunk
}
// readline reads the next available line from the input, or returns the pushed
// back lookahead line if one is available.
func (r *diffReader) readline() (string, error) {
if r.saved != nil {
out := *r.saved
r.saved = nil
return out, nil
}
line, err := r.br.ReadString('\n')
if err == io.EOF {
if line == "" {
return "", err
}
r.ln++
return line, nil
} else if err != nil {
return "", err
}
r.ln++
return strings.TrimSuffix(line, "\n"), nil
}
// unread pushes s on the front of the line buffer. Only one line of pushback
// is supported.
func (r *diffReader) unread(s string) { r.saved = &s }
func parseFileLine(s string, timeFormat ...string) (string, time.Time) {
name, rest, ok := strings.Cut(s, "\t")
if ok {
for _, tf := range timeFormat {
if ts, err := time.Parse(tf, rest); err == nil {
return name, ts
}
}
}
return name, time.Time{}
}
// parseSpan parses a string in the format "xM,N" where x is an arbitrary
// string prefix and M and N are positive integer values.
// If the string has the format "xM' only, parseSpan returns M, 0.
func parseSpan(tag, s string) (lo, hi int, err error) {
rest, ok := strings.CutPrefix(s, tag)
if !ok {
return 0, 0, fmt.Errorf("missing %q prefix", tag)
}
lohi := strings.SplitN(rest, ",", 2)
lo, err = strconv.Atoi(lohi[0])
if err != nil {
return 0, 0, err
}
if len(lohi) == 1 {
return lo, 0, nil
}
hi, err = strconv.Atoi(lohi[1])
if err != nil {
return 0, 0, err
}
return lo, hi, nil
}
// readUnified reads a unified diff from r, with an optional header.
func readUnified(r *diffReader) error {
if err := readUnifiedHeader(r); err != nil {
return fmt.Errorf("diff header: %w", err)
}
for {
err := readUnifiedChunk(r)
if err == io.EOF {
return nil
} else if err != nil {
return err
}
}
}
// readUnifiedHeader reads a unified diff header from r.
func readUnifiedHeader(r *diffReader) error {
lline, err := r.readline()
if err != nil {
return err
}
lhs, ok := strings.CutPrefix(lline, "--- ")
if !ok {
r.unread(lline)
return nil
}
var fi FileInfo
fi.Left, fi.LeftTime = parseFileLine(lhs, TimeFormat)
rline, err := r.readline()
if err != nil {
return err
}
rhs, ok := strings.CutPrefix(rline, "+++ ")
if !ok {
return errors.New("missing right header")
}
fi.Right, fi.RightTime = parseFileLine(rhs, TimeFormat)
r.fileInfo = &fi
return nil
}
// readUnifiedChunk reads a single unified diff chunk from r.
func readUnifiedChunk(r *diffReader) error {
line, err := r.readline()
if err != nil {
return err
}
// Unified diff headers are "@@ -lspan +rspan @@".
// But git diff adds additional stuff after the second "@@" to give the
// reader context. To support that, we relax the format check slightly.
parts := strings.Fields(line)
if len(parts) < 4 || parts[0] != "@@" || parts[3] != "@@" {
return fmt.Errorf("line %d: invalid chunk header %q", r.ln, line)
}
llo, lhi, err := parseSpan("-", parts[1])
if err != nil {
return fmt.Errorf("line %d: left span: %w", r.ln, err)
}
rlo, rhi, err := parseSpan("+", parts[2])
if err != nil {
return fmt.Errorf("line %d: right span: %w", r.ln, err)
}
ch := &Chunk{LStart: llo, LEnd: llo + lhi, RStart: rlo, REnd: rlo + rhi}
add := func(op slice.EditOp, text string) {
if len(ch.Edits) == 0 || ch.Edits[len(ch.Edits)-1].Op != op {
ch.Edits = append(ch.Edits, Edit{Op: op})
}
e := slice.PtrAt(ch.Edits, -1)
switch op {
case slice.OpDrop, slice.OpEmit:
e.X = append(e.X, text)
case slice.OpCopy:
e.Y = append(e.Y, text)
default:
panic("unexpected operator " + string(op))
}
}
nextLine:
for {
line, err := r.readline()
if err == io.EOF {
break // end of input, end of chunk
} else if err != nil {
return err
} else if line == "" {
return fmt.Errorf("line %d: unexpected blank line", r.ln)
}
switch line[0] {
case ' ': // context
add(slice.OpEmit, line[1:])
case '-': // deletion from lhs
add(slice.OpDrop, line[1:])
case '+': // addition from rhs
add(slice.OpCopy, line[1:])
case '@': // another diff chunk
r.unread(line)
break nextLine
default:
// Something else, maybe the start of a new patch or something.
// Report an error, but save the line and the chunk in case the caller
// knows what to do about it in context.
r.unread(line)
r.chunks = append(r.chunks, ch)
return fmt.Errorf("line %d: %w %c", r.ln, errUnexpectedPrefix, line[0])
}
}
r.chunks = append(r.chunks, ch)
return nil
}
// errUnexpectedPrefix is a sentinel error reported by readUnifiedChunk to
// report a line that is not part of a chunk.
var errUnexpectedPrefix = errors.New("unexpected prefix")
// readNormal reads a "normal" Unix diff patch from r.
func readNormal(r *diffReader) error {
for {
line, err := r.readline()
if err == io.EOF {
return nil
} else if err != nil {
return err
} else if line == "" {
return fmt.Errorf("line %d: unexpected blank line", r.ln)
}
var lspec, cmd, rspec string
if x, y, ok := strings.Cut(line, "a"); ok { // add lines from rhs
lspec, cmd, rspec = x, "a", y
} else if x, y, ok := strings.Cut(line, "c"); ok { // replace lines
lspec, cmd, rspec = x, "c", y
} else if x, y, ok := strings.Cut(line, "d"); ok { // delete lines from lhs
lspec, cmd, rspec = x, "d", y
} else {
return fmt.Errorf("line %d: invalid change command %q", r.ln, line)
}
llo, lhi, err := parseSpan("", lspec)
if err != nil {
return fmt.Errorf("line %d: invalid line range %q: %w", r.ln, lspec, err)
} else if lhi == 0 {
lhi = llo // m, 0 → m, m
}
lhi++
rlo, rhi, err := parseSpan("", rspec)
if err != nil {
return fmt.Errorf("line %d: invalid line range %q: %w", r.ln, rspec, err)
} else if rhi == 0 {
rhi = rlo // n, 0 → n, n
}
rhi++
sln := r.ln
e, err := readNormalEdit(r)
if err != nil {
return err
}
switch cmd {
case "a":
e.Op = slice.OpCopy
llo++ // Adds happen after the marked line.
case "c":
e.Op = slice.OpReplace
case "d":
e.Op = slice.OpDrop
rlo++ // Deletes happen after the marked line.
}
// Cross-check the number of lines reported in the change spec with the
// number we actually read out of the chunk data.
if n := rhi - rlo; len(e.Y) != n && (cmd == "a" || cmd == "c") {
return fmt.Errorf("line %d: add got %d lines, want %d", sln, len(e.Y), n)
}
if n := lhi - llo; len(e.X) != n && (cmd == "c" || cmd == "d") {
return fmt.Errorf("line %d: delete got %d lines, want %d", sln, len(e.X), n)
}
r.chunks = append(r.chunks, &Chunk{
Edits: []Edit{e},
LStart: llo, LEnd: lhi,
RStart: rlo, REnd: rhi,
})
}
}
func readNormalEdit(r *diffReader) (Edit, error) {
var e Edit
var below bool // whether we have seen a "---" separator
for {
line, err := r.readline()
if err == io.EOF {
break
} else if err != nil {
return Edit{}, err
}
if rst, ok := strings.CutPrefix(line, "< "); ok {
if below || len(e.Y) != 0 {
return Edit{}, fmt.Errorf("line %d: unexpected delete line %q", r.ln, line)
}
e.X = append(e.X, rst)
} else if rst, ok := strings.CutPrefix(line, "> "); ok {
if len(e.X) != 0 && !below {
return Edit{}, fmt.Errorf("line %d: unexpected insert line %q", r.ln, line)
}
e.Y = append(e.Y, rst)
} else if line == "---" {
if below {
return Edit{}, fmt.Errorf("line %d: unexpected --- separator", r.ln)
}
below = true
} else {
r.unread(line)
break
}
}
return e, nil
}
// scanToPrefix reads forward to a line starting with pfx, and returns nil.
// The matching line is unread so the caller can reuse it.
func scanToPrefix(r *diffReader, prefix string) error {
for {
line, err := r.readline()
if err != nil {
return err // may be io.EOF, caller will check
}
if strings.HasPrefix(line, prefix) {
r.unread(line)
return nil
}
}
}