internal.gno
10.39 Kb ยท 198 lines
1package json
2
3// Reference: https://github.com/freddierice/php_source/blob/467ed5d6edff72219afd3e644516f131118ef48e/ext/json/JSON_parser.c
4// Copyright (c) 2005 JSON.org
5
6// Go implementation is taken from: https://github.com/spyzhov/ajson/blob/master/internal/state.go
7
8type (
9 States int8 // possible states of the parser
10 Classes int8 // JSON string character types
11)
12
13const __ = -1
14
15// enum classes
16const (
17 C_SPACE Classes = iota /* space */
18 C_WHITE /* other whitespace */
19 C_LCURB /* { */
20 C_RCURB /* } */
21 C_LSQRB /* [ */
22 C_RSQRB /* ] */
23 C_COLON /* : */
24 C_COMMA /* , */
25 C_QUOTE /* " */
26 C_BACKS /* \ */
27 C_SLASH /* / */
28 C_PLUS /* + */
29 C_MINUS /* - */
30 C_POINT /* . */
31 C_ZERO /* 0 */
32 C_DIGIT /* 123456789 */
33 C_LOW_A /* a */
34 C_LOW_B /* b */
35 C_LOW_C /* c */
36 C_LOW_D /* d */
37 C_LOW_E /* e */
38 C_LOW_F /* f */
39 C_LOW_L /* l */
40 C_LOW_N /* n */
41 C_LOW_R /* r */
42 C_LOW_S /* s */
43 C_LOW_T /* t */
44 C_LOW_U /* u */
45 C_ABCDF /* ABCDF */
46 C_E /* E */
47 C_ETC /* everything else */
48)
49
50// AsciiClasses array maps the 128 ASCII characters into character classes.
51var AsciiClasses = [128]Classes{
52 /*
53 This array maps the 128 ASCII characters into character classes.
54 The remaining Unicode characters should be mapped to C_ETC.
55 Non-whitespace control characters are errors.
56 */
57 __, __, __, __, __, __, __, __,
58 __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __,
59 __, __, __, __, __, __, __, __,
60 __, __, __, __, __, __, __, __,
61
62 C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
63 C_ETC, C_ETC, C_ETC, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH,
64 C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
65 C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
66
67 C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC,
68 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
69 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
70 C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC,
71
72 C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
73 C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC,
74 C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC,
75 C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC,
76}
77
78// QuoteAsciiClasses is a HACK for single quote from AsciiClasses
79var QuoteAsciiClasses = [128]Classes{
80 /*
81 This array maps the 128 ASCII characters into character classes.
82 The remaining Unicode characters should be mapped to C_ETC.
83 Non-whitespace control characters are errors.
84 */
85 __, __, __, __, __, __, __, __,
86 __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __,
87 __, __, __, __, __, __, __, __,
88 __, __, __, __, __, __, __, __,
89
90 C_SPACE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_QUOTE,
91 C_ETC, C_ETC, C_ETC, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH,
92 C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
93 C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
94
95 C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC,
96 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
97 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
98 C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC,
99
100 C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
101 C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC,
102 C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC,
103 C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC,
104}
105
106/*
107The state codes.
108*/
109const (
110 GO States = iota /* start */
111 OK /* ok */
112 OB /* object */
113 KE /* key */
114 CO /* colon */
115 VA /* value */
116 AR /* array */
117 ST /* string */
118 ES /* escape */
119 U1 /* u1 */
120 U2 /* u2 */
121 U3 /* u3 */
122 U4 /* u4 */
123 MI /* minus */
124 ZE /* zero */
125 IN /* integer */
126 DT /* dot */
127 FR /* fraction */
128 E1 /* e */
129 E2 /* ex */
130 E3 /* exp */
131 T1 /* tr */
132 T2 /* tru */
133 T3 /* true */
134 F1 /* fa */
135 F2 /* fal */
136 F3 /* fals */
137 F4 /* false */
138 N1 /* nu */
139 N2 /* nul */
140 N3 /* null */
141)
142
143// List of action codes.
144// these constants are defining an action that should be performed under certain conditions.
145const (
146 cl States = -2 /* colon */
147 cm States = -3 /* comma */
148 qt States = -4 /* quote */
149 bo States = -5 /* bracket open */
150 co States = -6 /* curly bracket open */
151 bc States = -7 /* bracket close */
152 cc States = -8 /* curly bracket close */
153 ec States = -9 /* curly bracket empty */
154)
155
156// StateTransitionTable is the state transition table takes the current state and the current symbol, and returns either
157// a new state or an action. An action is represented as a negative number. A JSON text is accepted if at the end of the
158// text the state is OK and if the mode is DONE.
159var StateTransitionTable = [31][31]States{
160 /*
161 The state transition table takes the current state and the current symbol,
162 and returns either a new state or an action. An action is represented as a
163 negative number. A JSON text is accepted if at the end of the text the
164 state is OK and if the mode is DONE.
165 white 1-9 ABCDF etc
166 space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E |*/
167 /*start GO*/ {GO, GO, co, __, bo, __, __, __, ST, __, __, __, MI, __, ZE, IN, __, __, __, __, __, F1, __, N1, __, __, T1, __, __, __, __},
168 /*ok OK*/ {OK, OK, __, cc, __, bc, __, cm, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
169 /*object OB*/ {OB, OB, __, ec, __, __, __, __, ST, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
170 /*key KE*/ {KE, KE, __, __, __, __, __, __, ST, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
171 /*colon CO*/ {CO, CO, __, __, __, __, cl, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
172 /*value VA*/ {VA, VA, co, __, bo, __, __, __, ST, __, __, __, MI, __, ZE, IN, __, __, __, __, __, F1, __, N1, __, __, T1, __, __, __, __},
173 /*array AR*/ {AR, AR, co, __, bo, bc, __, __, ST, __, __, __, MI, __, ZE, IN, __, __, __, __, __, F1, __, N1, __, __, T1, __, __, __, __},
174 /*string ST*/ {ST, __, ST, ST, ST, ST, ST, ST, qt, ES, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST},
175 /*escape ES*/ {__, __, __, __, __, __, __, __, ST, ST, ST, __, __, __, __, __, __, ST, __, __, __, ST, __, ST, ST, __, ST, U1, __, __, __},
176 /*u1 U1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U2, U2, U2, U2, U2, U2, U2, U2, __, __, __, __, __, __, U2, U2, __},
177 /*u2 U2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U3, U3, U3, U3, U3, U3, U3, U3, __, __, __, __, __, __, U3, U3, __},
178 /*u3 U3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U4, U4, U4, U4, U4, U4, U4, U4, __, __, __, __, __, __, U4, U4, __},
179 /*u4 U4*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, ST, ST, ST, ST, ST, ST, ST, ST, __, __, __, __, __, __, ST, ST, __},
180 /*minus MI*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, ZE, IN, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
181 /*zero ZE*/ {OK, OK, __, cc, __, bc, __, cm, __, __, __, __, __, DT, __, __, __, __, __, __, E1, __, __, __, __, __, __, __, __, E1, __},
182 /*int IN*/ {OK, OK, __, cc, __, bc, __, cm, __, __, __, __, __, DT, IN, IN, __, __, __, __, E1, __, __, __, __, __, __, __, __, E1, __},
183 /*dot DT*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, FR, FR, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
184 /*frac FR*/ {OK, OK, __, cc, __, bc, __, cm, __, __, __, __, __, __, FR, FR, __, __, __, __, E1, __, __, __, __, __, __, __, __, E1, __},
185 /*e E1*/ {__, __, __, __, __, __, __, __, __, __, __, E2, E2, __, E3, E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
186 /*ex E2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, E3, E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
187 /*exp E3*/ {OK, OK, __, cc, __, bc, __, cm, __, __, __, __, __, __, E3, E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
188 /*tr T1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, T2, __, __, __, __, __, __},
189 /*tru T2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, T3, __, __, __},
190 /*true T3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, OK, __, __, __, __, __, __, __, __, __, __},
191 /*fa F1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, F2, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
192 /*fal F2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, F3, __, __, __, __, __, __, __, __},
193 /*fals F3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, F4, __, __, __, __, __},
194 /*false F4*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, OK, __, __, __, __, __, __, __, __, __, __},
195 /*nu N1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, N2, __, __, __},
196 /*nul N2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, N3, __, __, __, __, __, __, __, __},
197 /*null N3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, OK, __, __, __, __, __, __, __, __},
198}