buffer.gno

8.39 Kb ยท 462 lines
  1package json
  2
  3import (
  4	"errors"
  5	"io"
  6
  7	"gno.land/p/demo/ufmt"
  8)
  9
 10type buffer struct {
 11	data   []byte
 12	length int
 13	index  int
 14
 15	last  States
 16	state States
 17	class Classes
 18}
 19
 20// newBuffer creates a new buffer with the given data
 21func newBuffer(data []byte) *buffer {
 22	return &buffer{
 23		data:   data,
 24		length: len(data),
 25		last:   GO,
 26		state:  GO,
 27	}
 28}
 29
 30// first retrieves the first non-whitespace (or other escaped) character in the buffer.
 31func (b *buffer) first() (byte, error) {
 32	for ; b.index < b.length; b.index++ {
 33		c := b.data[b.index]
 34
 35		if !(c == whiteSpace || c == carriageReturn || c == newLine || c == tab) {
 36			return c, nil
 37		}
 38	}
 39
 40	return 0, io.EOF
 41}
 42
 43// current returns the byte of the current index.
 44func (b *buffer) current() (byte, error) {
 45	if b.index >= b.length {
 46		return 0, io.EOF
 47	}
 48
 49	return b.data[b.index], nil
 50}
 51
 52// next moves to the next byte and returns it.
 53func (b *buffer) next() (byte, error) {
 54	b.index++
 55	return b.current()
 56}
 57
 58// step just moves to the next position.
 59func (b *buffer) step() error {
 60	_, err := b.next()
 61	return err
 62}
 63
 64// move moves the index by the given position.
 65func (b *buffer) move(pos int) error {
 66	newIndex := b.index + pos
 67
 68	if newIndex > b.length {
 69		return io.EOF
 70	}
 71
 72	b.index = newIndex
 73
 74	return nil
 75}
 76
 77// slice returns the slice from the current index to the given position.
 78func (b *buffer) slice(pos int) ([]byte, error) {
 79	end := b.index + pos
 80
 81	if end > b.length {
 82		return nil, io.EOF
 83	}
 84
 85	return b.data[b.index:end], nil
 86}
 87
 88// sliceFromIndices returns a slice of the buffer's data starting from 'start' up to (but not including) 'stop'.
 89func (b *buffer) sliceFromIndices(start, stop int) []byte {
 90	if start > b.length {
 91		start = b.length
 92	}
 93
 94	if stop > b.length {
 95		stop = b.length
 96	}
 97
 98	return b.data[start:stop]
 99}
100
101// skip moves the index to skip the given byte.
102func (b *buffer) skip(bs byte) error {
103	for b.index < b.length {
104		if b.data[b.index] == bs && !b.backslash() {
105			return nil
106		}
107
108		b.index++
109	}
110
111	return io.EOF
112}
113
114// skipAndReturnIndex moves the buffer index forward by one and returns the new index.
115func (b *buffer) skipAndReturnIndex() (int, error) {
116	err := b.step()
117	if err != nil {
118		return 0, err
119	}
120
121	return b.index, nil
122}
123
124// skipUntil moves the buffer index forward until it encounters a byte contained in the endTokens set.
125func (b *buffer) skipUntil(endTokens map[byte]bool) (int, error) {
126	for b.index < b.length {
127		currentByte, err := b.current()
128		if err != nil {
129			return b.index, err
130		}
131
132		// Check if the current byte is in the set of end tokens.
133		if _, exists := endTokens[currentByte]; exists {
134			return b.index, nil
135		}
136
137		b.index++
138	}
139
140	return b.index, io.EOF
141}
142
143// significantTokens is a map where the keys are the significant characters in a JSON path.
144// The values in the map are all true, which allows us to use the map as a set for quick lookups.
145var significantTokens = [256]bool{
146	dot:          true, // access properties of an object
147	dollarSign:   true, // root object
148	atSign:       true, // current object
149	bracketOpen:  true, // start of an array index or filter expression
150	bracketClose: true, // end of an array index or filter expression
151}
152
153// filterTokens stores the filter expression tokens.
154var filterTokens = [256]bool{
155	aesterisk: true, // wildcard
156	andSign:   true,
157	orSign:    true,
158}
159
160// skipToNextSignificantToken advances the buffer index to the next significant character.
161// Significant characters are defined based on the JSON path syntax.
162func (b *buffer) skipToNextSignificantToken() {
163	for b.index < b.length {
164		current := b.data[b.index]
165
166		if significantTokens[current] {
167			break
168		}
169
170		b.index++
171	}
172}
173
174// backslash checks to see if the number of backslashes before the current index is odd.
175//
176// This is used to check if the current character is escaped. However, unlike the "unescape" function,
177// "backslash" only serves to check the number of backslashes.
178func (b *buffer) backslash() bool {
179	if b.index == 0 {
180		return false
181	}
182
183	count := 0
184	for i := b.index - 1; ; i-- {
185		if b.data[i] != backSlash {
186			break
187		}
188
189		count++
190
191		if i == 0 {
192			break
193		}
194	}
195
196	return count%2 != 0
197}
198
199// numIndex holds a map of valid numeric characters
200var numIndex = [256]bool{
201	'0': true,
202	'1': true,
203	'2': true,
204	'3': true,
205	'4': true,
206	'5': true,
207	'6': true,
208	'7': true,
209	'8': true,
210	'9': true,
211	'.': true,
212	'e': true,
213	'E': true,
214}
215
216// pathToken checks if the current token is a valid JSON path token.
217func (b *buffer) pathToken() error {
218	var stack []byte
219
220	inToken := false
221	inNumber := false
222	first := b.index
223
224	for b.index < b.length {
225		c := b.data[b.index]
226
227		switch {
228		case c == doubleQuote || c == singleQuote:
229			inToken = true
230			if err := b.step(); err != nil {
231				return errors.New("error stepping through buffer")
232			}
233
234			if err := b.skip(c); err != nil {
235				return errUnmatchedQuotePath
236			}
237
238			if b.index >= b.length {
239				return errUnmatchedQuotePath
240			}
241
242		case c == bracketOpen || c == parenOpen:
243			inToken = true
244			stack = append(stack, c)
245
246		case c == bracketClose || c == parenClose:
247			inToken = true
248			if len(stack) == 0 || (c == bracketClose && stack[len(stack)-1] != bracketOpen) || (c == parenClose && stack[len(stack)-1] != parenOpen) {
249				return errUnmatchedParenthesis
250			}
251
252			stack = stack[:len(stack)-1]
253
254		case pathStateContainsValidPathToken(c):
255			inToken = true
256
257		case c == plus || c == minus:
258			if inNumber || (b.index > 0 && numIndex[b.data[b.index-1]]) {
259				inToken = true
260			} else if !inToken && (b.index+1 < b.length && numIndex[b.data[b.index+1]]) {
261				inToken = true
262				inNumber = true
263			} else if !inToken {
264				return errInvalidToken
265			}
266
267		default:
268			if len(stack) != 0 || inToken {
269				inToken = true
270			} else {
271				goto end
272			}
273		}
274
275		b.index++
276	}
277
278end:
279	if len(stack) != 0 {
280		return errUnmatchedParenthesis
281	}
282
283	if first == b.index {
284		return errors.New("no token found")
285	}
286
287	if inNumber && !numIndex[b.data[b.index-1]] {
288		inNumber = false
289	}
290
291	return nil
292}
293
294func pathStateContainsValidPathToken(c byte) bool {
295	if significantTokens[c] {
296		return true
297	}
298
299	if filterTokens[c] {
300		return true
301	}
302
303	if numIndex[c] {
304		return true
305	}
306
307	if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' {
308		return true
309	}
310
311	return false
312}
313
314func (b *buffer) numeric(token bool) error {
315	if token {
316		b.last = GO
317	}
318
319	for ; b.index < b.length; b.index++ {
320		b.class = b.getClasses(doubleQuote)
321		if b.class == __ {
322			return errInvalidToken
323		}
324
325		b.state = StateTransitionTable[b.last][b.class]
326		if b.state == __ {
327			if token {
328				break
329			}
330
331			return errInvalidToken
332		}
333
334		if b.state < __ {
335			return nil
336		}
337
338		if b.state < MI || b.state > E3 {
339			return nil
340		}
341
342		b.last = b.state
343	}
344
345	if b.last != ZE && b.last != IN && b.last != FR && b.last != E3 {
346		return errInvalidToken
347	}
348
349	return nil
350}
351
352func (b *buffer) getClasses(c byte) Classes {
353	if b.data[b.index] >= 128 {
354		return C_ETC
355	}
356
357	if c == singleQuote {
358		return QuoteAsciiClasses[b.data[b.index]]
359	}
360
361	return AsciiClasses[b.data[b.index]]
362}
363
364func (b *buffer) getState() States {
365	b.last = b.state
366
367	b.class = b.getClasses(doubleQuote)
368	if b.class == __ {
369		return __
370	}
371
372	b.state = StateTransitionTable[b.last][b.class]
373
374	return b.state
375}
376
377// string parses a string token from the buffer.
378func (b *buffer) string(search byte, token bool) error {
379	if token {
380		b.last = GO
381	}
382
383	for ; b.index < b.length; b.index++ {
384		b.class = b.getClasses(search)
385
386		if b.class == __ {
387			return errInvalidToken
388		}
389
390		b.state = StateTransitionTable[b.last][b.class]
391		if b.state == __ {
392			return errInvalidToken
393		}
394
395		if b.state < __ {
396			break
397		}
398
399		b.last = b.state
400	}
401
402	return nil
403}
404
405func (b *buffer) word(bs []byte) error {
406	var c byte
407
408	max := len(bs)
409	index := 0
410
411	for ; b.index < b.length && index < max; b.index++ {
412		c = b.data[b.index]
413
414		if c != bs[index] {
415			return errInvalidToken
416		}
417
418		index++
419		if index >= max {
420			break
421		}
422	}
423
424	if index != max {
425		return errInvalidToken
426	}
427
428	return nil
429}
430
431func numberKind2f64(value any) (result float64, err error) {
432	switch typed := value.(type) {
433	case float64:
434		result = typed
435	case float32:
436		result = float64(typed)
437	case int:
438		result = float64(typed)
439	case int8:
440		result = float64(typed)
441	case int16:
442		result = float64(typed)
443	case int32:
444		result = float64(typed)
445	case int64:
446		result = float64(typed)
447	case uint:
448		result = float64(typed)
449	case uint8:
450		result = float64(typed)
451	case uint16:
452		result = float64(typed)
453	case uint32:
454		result = float64(typed)
455	case uint64:
456		result = float64(typed)
457	default:
458		err = ufmt.Errorf("invalid number type: %T", value)
459	}
460
461	return
462}