escape_test.gno
6.99 Kb Β· 233 lines
1package json
2
3import (
4 "bytes"
5 "testing"
6 "unicode/utf8"
7)
8
9func TestHexToInt(t *testing.T) {
10 tests := []struct {
11 name string
12 c byte
13 want int
14 }{
15 {"Digit 0", '0', 0},
16 {"Digit 9", '9', 9},
17 {"Uppercase A", 'A', 10},
18 {"Uppercase F", 'F', 15},
19 {"Lowercase a", 'a', 10},
20 {"Lowercase f", 'f', 15},
21 {"Invalid character1", 'g', badHex},
22 {"Invalid character2", 'G', badHex},
23 {"Invalid character3", 'z', badHex},
24 }
25
26 for _, tt := range tests {
27 t.Run(tt.name, func(t *testing.T) {
28 if got := h2i(tt.c); got != tt.want {
29 t.Errorf("h2i() = %v, want %v", got, tt.want)
30 }
31 })
32 }
33}
34
35func TestIsSurrogatePair(t *testing.T) {
36 testCases := []struct {
37 name string
38 r rune
39 expected bool
40 }{
41 {"high surrogate start", 0xD800, true},
42 {"high surrogate end", 0xDBFF, true},
43 {"low surrogate start", 0xDC00, true},
44 {"low surrogate end", 0xDFFF, true},
45 {"Non-surrogate", 0x0000, false},
46 {"Non-surrogate 2", 0xE000, false},
47 }
48
49 for _, tc := range testCases {
50 t.Run(tc.name, func(t *testing.T) {
51 if got := isSurrogatePair(tc.r); got != tc.expected {
52 t.Errorf("isSurrogate() = %v, want %v", got, tc.expected)
53 }
54 })
55 }
56}
57
58func TestCombineSurrogates(t *testing.T) {
59 testCases := []struct {
60 high, low rune
61 expected rune
62 }{
63 {0xD83D, 0xDC36, 0x1F436}, // πΆ U+1F436 DOG FACE
64 {0xD83D, 0xDE00, 0x1F600}, // π U+1F600 GRINNING FACE
65 {0xD83C, 0xDF03, 0x1F303}, // π U+1F303 NIGHT WITH STARS
66 }
67
68 for _, tc := range testCases {
69 result := combineSurrogates(tc.high, tc.low)
70 if result != tc.expected {
71 t.Errorf("combineSurrogates(%U, %U) = %U; want %U", tc.high, tc.low, result, tc.expected)
72 }
73 }
74}
75
76func TestDecodeSingleUnicodeEscape(t *testing.T) {
77 testCases := []struct {
78 input []byte
79 expected rune
80 isValid bool
81 }{
82 // valid unicode escape sequences
83 {[]byte(`\u0041`), 'A', true},
84 {[]byte(`\u03B1`), 'Ξ±', true},
85 {[]byte(`\u00E9`), 'Γ©', true}, // valid non-English character
86 {[]byte(`\u0021`), '!', true}, // valid special character
87 {[]byte(`\uFF11`), 'οΌ', true},
88 {[]byte(`\uD83D`), 0xD83D, true},
89 {[]byte(`\uDE03`), 0xDE03, true},
90
91 // invalid unicode escape sequences
92 {[]byte(`\u004`), utf8.RuneError, false}, // too short
93 {[]byte(`\uXYZW`), utf8.RuneError, false}, // invalid hex
94 {[]byte(`\u00G1`), utf8.RuneError, false}, // non-hex character
95 }
96
97 for _, tc := range testCases {
98 result, isValid := decodeSingleUnicodeEscape(tc.input)
99 if result != tc.expected || isValid != tc.isValid {
100 t.Errorf("decodeSingleUnicodeEscape(%s) = (%U, %v); want (%U, %v)", tc.input, result, isValid, tc.expected, tc.isValid)
101 }
102 }
103}
104
105func TestDecodeUnicodeEscape(t *testing.T) {
106 tests := []struct {
107 input []byte
108 expected rune
109 size int
110 }{
111 {[]byte(`\u0041`), 'A', 6},
112 {[]byte(`\uD83D\uDE00`), 0x1F600, 12}, // π
113 {[]byte(`\uD834\uDD1E`), 0x1D11E, 12}, // π
114 {[]byte(`\uFFFF`), '\uFFFF', 6},
115 {[]byte(`\uXYZW`), utf8.RuneError, -1},
116 {[]byte(`\uD800`), utf8.RuneError, -1}, // single high surrogate
117 {[]byte(`\uDC00`), utf8.RuneError, -1}, // single low surrogate
118 {[]byte(`\uD800\uDC00`), 0x10000, 12}, // First code point above U+FFFF
119 {[]byte(`\uDBFF\uDFFF`), 0x10FFFF, 12}, // Maximum code point
120 {[]byte(`\uD83D\u0041`), utf8.RuneError, -1}, // invalid surrogate pair
121 }
122
123 for _, tc := range tests {
124 r, size := decodeUnicodeEscape(tc.input)
125 if r != tc.expected || size != tc.size {
126 t.Errorf("decodeUnicodeEscape(%q) = (%U, %d); want (%U, %d)", tc.input, r, size, tc.expected, tc.size)
127 }
128 }
129}
130
131func TestUnescapeToUTF8(t *testing.T) {
132 tests := []struct {
133 input []byte
134 expectedIn int
135 expectedOut int
136 isError bool
137 }{
138 // valid escape sequences
139 {[]byte(`\n`), 2, 1, false},
140 {[]byte(`\t`), 2, 1, false},
141 {[]byte(`\u0041`), 6, 1, false},
142 {[]byte(`\u03B1`), 6, 2, false},
143 {[]byte(`\uD830\uDE03`), 12, 4, false},
144
145 // invalid escape sequences
146 {[]byte(`\`), -1, -1, true}, // incomplete escape sequence
147 {[]byte(`\x`), -1, -1, true}, // invalid escape character
148 {[]byte(`\u`), -1, -1, true}, // incomplete unicode escape sequence
149 {[]byte(`\u004`), -1, -1, true}, // invalid unicode escape sequence
150 {[]byte(`\uXYZW`), -1, -1, true}, // invalid unicode escape sequence
151 {[]byte(`\uD83D\u0041`), -1, -1, true}, // invalid unicode escape sequence
152 }
153
154 for _, tc := range tests {
155 input := make([]byte, len(tc.input))
156 copy(input, tc.input)
157 output := make([]byte, utf8.UTFMax)
158 inLen, outLen, err := processEscapedUTF8(input, output)
159 if (err != nil) != tc.isError {
160 t.Errorf("processEscapedUTF8(%q) = %v; want %v", tc.input, err, tc.isError)
161 }
162
163 if inLen != tc.expectedIn || outLen != tc.expectedOut {
164 t.Errorf("processEscapedUTF8(%q) = (%d, %d); want (%d, %d)", tc.input, inLen, outLen, tc.expectedIn, tc.expectedOut)
165 }
166 }
167}
168
169func TestUnescape(t *testing.T) {
170 tests := []struct {
171 name string
172 input []byte
173 expected []byte
174 isError bool
175 }{
176 {"NoEscape", []byte("hello world"), []byte("hello world"), false},
177 {"SingleEscape", []byte("hello\\nworld"), []byte("hello\nworld"), false},
178 {"MultipleEscapes", []byte("line1\\nline2\\r\\nline3"), []byte("line1\nline2\r\nline3"), false},
179 {"UnicodeEscape", []byte("snowman:\\u2603"), []byte("snowman:\u2603"), false},
180 {"SurrogatePair", []byte("emoji:\\uD83D\\uDE00"), []byte("emoji:π"), false},
181 {"InvalidEscape", []byte("hello\\xworld"), nil, true},
182 {"IncompleteUnicode", []byte("incomplete:\\u123"), nil, true},
183 {"InvalidSurrogatePair", []byte("invalid:\\uD83D\\u0041"), nil, true},
184 }
185
186 for _, tc := range tests {
187 t.Run(tc.name, func(t *testing.T) {
188 output := make([]byte, len(tc.input)*2) // Allocate extra space for possible expansion
189 result, err := Unescape(tc.input, output)
190 if (err != nil) != tc.isError {
191 t.Errorf("Unescape(%q) error = %v; want error = %v", tc.input, err, tc.isError)
192 }
193
194 if !tc.isError && !bytes.Equal(result, tc.expected) {
195 t.Errorf("Unescape(%q) = %q; want %q", tc.input, result, tc.expected)
196 }
197 })
198 }
199}
200
201func TestUnquoteBytes(t *testing.T) {
202 tests := []struct {
203 input []byte
204 border byte
205 expected []byte
206 ok bool
207 }{
208 {[]byte("\"hello\""), '"', []byte("hello"), true},
209 {[]byte("'hello'"), '\'', []byte("hello"), true},
210 {[]byte("\"hello"), '"', nil, false},
211 {[]byte("hello\""), '"', nil, false},
212 {[]byte("\"he\\\"llo\""), '"', []byte("he\"llo"), true},
213 {[]byte("\"he\\nllo\""), '"', []byte("he\nllo"), true},
214 {[]byte("\"\""), '"', []byte(""), true},
215 {[]byte("''"), '\'', []byte(""), true},
216 {[]byte("\"\\u0041\""), '"', []byte("A"), true},
217 {[]byte(`"Hello, δΈη"`), '"', []byte("Hello, δΈη"), true},
218 {[]byte(`"Hello, \x80"`), '"', nil, false},
219 {[]byte(`"invalid surrogate: \uD83D\u0041"`), '"', nil, false},
220 }
221
222 for _, tc := range tests {
223 result, pass := unquoteBytes(tc.input, tc.border)
224
225 if pass != tc.ok {
226 t.Errorf("unquoteBytes(%q) = %v; want %v", tc.input, pass, tc.ok)
227 }
228
229 if !bytes.Equal(result, tc.expected) {
230 t.Errorf("unquoteBytes(%q) = %q; want %q", tc.input, result, tc.expected)
231 }
232 }
233}