escape_test.gno

6.99 Kb · 233 lines
  1package json
  2
  3import (
  4	"bytes"
  5	"testing"
  6	"unicode/utf8"
  7)
  8
  9func TestHexToInt(t *testing.T) {
 10	tests := []struct {
 11		name string
 12		c    byte
 13		want int
 14	}{
 15		{"Digit 0", '0', 0},
 16		{"Digit 9", '9', 9},
 17		{"Uppercase A", 'A', 10},
 18		{"Uppercase F", 'F', 15},
 19		{"Lowercase a", 'a', 10},
 20		{"Lowercase f", 'f', 15},
 21		{"Invalid character1", 'g', badHex},
 22		{"Invalid character2", 'G', badHex},
 23		{"Invalid character3", 'z', badHex},
 24	}
 25
 26	for _, tt := range tests {
 27		t.Run(tt.name, func(t *testing.T) {
 28			if got := h2i(tt.c); got != tt.want {
 29				t.Errorf("h2i() = %v, want %v", got, tt.want)
 30			}
 31		})
 32	}
 33}
 34
 35func TestIsSurrogatePair(t *testing.T) {
 36	testCases := []struct {
 37		name     string
 38		r        rune
 39		expected bool
 40	}{
 41		{"high surrogate start", 0xD800, true},
 42		{"high surrogate end", 0xDBFF, true},
 43		{"low surrogate start", 0xDC00, true},
 44		{"low surrogate end", 0xDFFF, true},
 45		{"Non-surrogate", 0x0000, false},
 46		{"Non-surrogate 2", 0xE000, false},
 47	}
 48
 49	for _, tc := range testCases {
 50		t.Run(tc.name, func(t *testing.T) {
 51			if got := isSurrogatePair(tc.r); got != tc.expected {
 52				t.Errorf("isSurrogate() = %v, want %v", got, tc.expected)
 53			}
 54		})
 55	}
 56}
 57
 58func TestCombineSurrogates(t *testing.T) {
 59	testCases := []struct {
 60		high, low rune
 61		expected  rune
 62	}{
 63		{0xD83D, 0xDC36, 0x1F436}, // 🐶 U+1F436 DOG FACE
 64		{0xD83D, 0xDE00, 0x1F600}, // 😀 U+1F600 GRINNING FACE
 65		{0xD83C, 0xDF03, 0x1F303}, // 🌃 U+1F303 NIGHT WITH STARS
 66	}
 67
 68	for _, tc := range testCases {
 69		result := combineSurrogates(tc.high, tc.low)
 70		if result != tc.expected {
 71			t.Errorf("combineSurrogates(%U, %U) = %U; want %U", tc.high, tc.low, result, tc.expected)
 72		}
 73	}
 74}
 75
 76func TestDecodeSingleUnicodeEscape(t *testing.T) {
 77	testCases := []struct {
 78		input    []byte
 79		expected rune
 80		isValid  bool
 81	}{
 82		// valid unicode escape sequences
 83		{[]byte(`\u0041`), 'A', true},
 84		{[]byte(`\u03B1`), 'α', true},
 85		{[]byte(`\u00E9`), 'é', true}, // valid non-English character
 86		{[]byte(`\u0021`), '!', true}, // valid special character
 87		{[]byte(`\uFF11`), '１', true},
 88		{[]byte(`\uD83D`), 0xD83D, true},
 89		{[]byte(`\uDE03`), 0xDE03, true},
 90
 91		// invalid unicode escape sequences
 92		{[]byte(`\u004`), utf8.RuneError, false},  // too short
 93		{[]byte(`\uXYZW`), utf8.RuneError, false}, // invalid hex
 94		{[]byte(`\u00G1`), utf8.RuneError, false}, // non-hex character
 95	}
 96
 97	for _, tc := range testCases {
 98		result, isValid := decodeSingleUnicodeEscape(tc.input)
 99		if result != tc.expected || isValid != tc.isValid {
100			t.Errorf("decodeSingleUnicodeEscape(%s) = (%U, %v); want (%U, %v)", tc.input, result, isValid, tc.expected, tc.isValid)
101		}
102	}
103}
104
105func TestDecodeUnicodeEscape(t *testing.T) {
106	tests := []struct {
107		input    []byte
108		expected rune
109		size     int
110	}{
111		{[]byte(`\u0041`), 'A', 6},
112		{[]byte(`\uD83D\uDE00`), 0x1F600, 12}, // 😀
113		{[]byte(`\uD834\uDD1E`), 0x1D11E, 12}, // 𝄞
114		{[]byte(`\uFFFF`), '\uFFFF', 6},
115		{[]byte(`\uXYZW`), utf8.RuneError, -1},
116		{[]byte(`\uD800`), utf8.RuneError, -1},       // single high surrogate
117		{[]byte(`\uDC00`), utf8.RuneError, -1},       // single low surrogate
118		{[]byte(`\uD800\uDC00`), 0x10000, 12},        // First code point above U+FFFF
119		{[]byte(`\uDBFF\uDFFF`), 0x10FFFF, 12},       // Maximum code point
120		{[]byte(`\uD83D\u0041`), utf8.RuneError, -1}, // invalid surrogate pair
121	}
122
123	for _, tc := range tests {
124		r, size := decodeUnicodeEscape(tc.input)
125		if r != tc.expected || size != tc.size {
126			t.Errorf("decodeUnicodeEscape(%q) = (%U, %d); want (%U, %d)", tc.input, r, size, tc.expected, tc.size)
127		}
128	}
129}
130
131func TestUnescapeToUTF8(t *testing.T) {
132	tests := []struct {
133		input       []byte
134		expectedIn  int
135		expectedOut int
136		isError     bool
137	}{
138		// valid escape sequences
139		{[]byte(`\n`), 2, 1, false},
140		{[]byte(`\t`), 2, 1, false},
141		{[]byte(`\u0041`), 6, 1, false},
142		{[]byte(`\u03B1`), 6, 2, false},
143		{[]byte(`\uD830\uDE03`), 12, 4, false},
144
145		// invalid escape sequences
146		{[]byte(`\`), -1, -1, true},            // incomplete escape sequence
147		{[]byte(`\x`), -1, -1, true},           // invalid escape character
148		{[]byte(`\u`), -1, -1, true},           // incomplete unicode escape sequence
149		{[]byte(`\u004`), -1, -1, true},        // invalid unicode escape sequence
150		{[]byte(`\uXYZW`), -1, -1, true},       // invalid unicode escape sequence
151		{[]byte(`\uD83D\u0041`), -1, -1, true}, // invalid unicode escape sequence
152	}
153
154	for _, tc := range tests {
155		input := make([]byte, len(tc.input))
156		copy(input, tc.input)
157		output := make([]byte, utf8.UTFMax)
158		inLen, outLen, err := processEscapedUTF8(input, output)
159		if (err != nil) != tc.isError {
160			t.Errorf("processEscapedUTF8(%q) = %v; want %v", tc.input, err, tc.isError)
161		}
162
163		if inLen != tc.expectedIn || outLen != tc.expectedOut {
164			t.Errorf("processEscapedUTF8(%q) = (%d, %d); want (%d, %d)", tc.input, inLen, outLen, tc.expectedIn, tc.expectedOut)
165		}
166	}
167}
168
169func TestUnescape(t *testing.T) {
170	tests := []struct {
171		name     string
172		input    []byte
173		expected []byte
174		isError  bool
175	}{
176		{"NoEscape", []byte("hello world"), []byte("hello world"), false},
177		{"SingleEscape", []byte("hello\\nworld"), []byte("hello\nworld"), false},
178		{"MultipleEscapes", []byte("line1\\nline2\\r\\nline3"), []byte("line1\nline2\r\nline3"), false},
179		{"UnicodeEscape", []byte("snowman:\\u2603"), []byte("snowman:\u2603"), false},
180		{"SurrogatePair", []byte("emoji:\\uD83D\\uDE00"), []byte("emoji:😀"), false},
181		{"InvalidEscape", []byte("hello\\xworld"), nil, true},
182		{"IncompleteUnicode", []byte("incomplete:\\u123"), nil, true},
183		{"InvalidSurrogatePair", []byte("invalid:\\uD83D\\u0041"), nil, true},
184	}
185
186	for _, tc := range tests {
187		t.Run(tc.name, func(t *testing.T) {
188			output := make([]byte, len(tc.input)*2) // Allocate extra space for possible expansion
189			result, err := Unescape(tc.input, output)
190			if (err != nil) != tc.isError {
191				t.Errorf("Unescape(%q) error = %v; want error = %v", tc.input, err, tc.isError)
192			}
193
194			if !tc.isError && !bytes.Equal(result, tc.expected) {
195				t.Errorf("Unescape(%q) = %q; want %q", tc.input, result, tc.expected)
196			}
197		})
198	}
199}
200
201func TestUnquoteBytes(t *testing.T) {
202	tests := []struct {
203		input    []byte
204		border   byte
205		expected []byte
206		ok       bool
207	}{
208		{[]byte("\"hello\""), '"', []byte("hello"), true},
209		{[]byte("'hello'"), '\'', []byte("hello"), true},
210		{[]byte("\"hello"), '"', nil, false},
211		{[]byte("hello\""), '"', nil, false},
212		{[]byte("\"he\\\"llo\""), '"', []byte("he\"llo"), true},
213		{[]byte("\"he\\nllo\""), '"', []byte("he\nllo"), true},
214		{[]byte("\"\""), '"', []byte(""), true},
215		{[]byte("''"), '\'', []byte(""), true},
216		{[]byte("\"\\u0041\""), '"', []byte("A"), true},
217		{[]byte(`"Hello, 世界"`), '"', []byte("Hello, 世界"), true},
218		{[]byte(`"Hello, \x80"`), '"', nil, false},
219		{[]byte(`"invalid surrogate: \uD83D\u0041"`), '"', nil, false},
220	}
221
222	for _, tc := range tests {
223		result, pass := unquoteBytes(tc.input, tc.border)
224
225		if pass != tc.ok {
226			t.Errorf("unquoteBytes(%q) = %v; want %v", tc.input, pass, tc.ok)
227		}
228
229		if !bytes.Equal(result, tc.expected) {
230			t.Errorf("unquoteBytes(%q) = %q; want %q", tc.input, result, tc.expected)
231		}
232	}
233}