internal.gno

10.39 Kb ยท 198 lines
  1package json
  2
  3// Reference: https://github.com/freddierice/php_source/blob/467ed5d6edff72219afd3e644516f131118ef48e/ext/json/JSON_parser.c
  4// Copyright (c) 2005 JSON.org
  5
  6// Go implementation is taken from: https://github.com/spyzhov/ajson/blob/master/internal/state.go
  7
  8type (
  9	States  int8 // possible states of the parser
 10	Classes int8 // JSON string character types
 11)
 12
 13const __ = -1
 14
 15// enum classes
 16const (
 17	C_SPACE Classes = iota /* space */
 18	C_WHITE                /* other whitespace */
 19	C_LCURB                /* {  */
 20	C_RCURB                /* } */
 21	C_LSQRB                /* [ */
 22	C_RSQRB                /* ] */
 23	C_COLON                /* : */
 24	C_COMMA                /* , */
 25	C_QUOTE                /* " */
 26	C_BACKS                /* \ */
 27	C_SLASH                /* / */
 28	C_PLUS                 /* + */
 29	C_MINUS                /* - */
 30	C_POINT                /* . */
 31	C_ZERO                 /* 0 */
 32	C_DIGIT                /* 123456789 */
 33	C_LOW_A                /* a */
 34	C_LOW_B                /* b */
 35	C_LOW_C                /* c */
 36	C_LOW_D                /* d */
 37	C_LOW_E                /* e */
 38	C_LOW_F                /* f */
 39	C_LOW_L                /* l */
 40	C_LOW_N                /* n */
 41	C_LOW_R                /* r */
 42	C_LOW_S                /* s */
 43	C_LOW_T                /* t */
 44	C_LOW_U                /* u */
 45	C_ABCDF                /* ABCDF */
 46	C_E                    /* E */
 47	C_ETC                  /* everything else */
 48)
 49
 50// AsciiClasses array maps the 128 ASCII characters into character classes.
 51var AsciiClasses = [128]Classes{
 52	/*
 53	   This array maps the 128 ASCII characters into character classes.
 54	   The remaining Unicode characters should be mapped to C_ETC.
 55	   Non-whitespace control characters are errors.
 56	*/
 57	__, __, __, __, __, __, __, __,
 58	__, C_WHITE, C_WHITE, __, __, C_WHITE, __, __,
 59	__, __, __, __, __, __, __, __,
 60	__, __, __, __, __, __, __, __,
 61
 62	C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
 63	C_ETC, C_ETC, C_ETC, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH,
 64	C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
 65	C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
 66
 67	C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC,
 68	C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
 69	C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
 70	C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC,
 71
 72	C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
 73	C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC,
 74	C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC,
 75	C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC,
 76}
 77
 78// QuoteAsciiClasses is a HACK for single quote from AsciiClasses
 79var QuoteAsciiClasses = [128]Classes{
 80	/*
 81	   This array maps the 128 ASCII characters into character classes.
 82	   The remaining Unicode characters should be mapped to C_ETC.
 83	   Non-whitespace control characters are errors.
 84	*/
 85	__, __, __, __, __, __, __, __,
 86	__, C_WHITE, C_WHITE, __, __, C_WHITE, __, __,
 87	__, __, __, __, __, __, __, __,
 88	__, __, __, __, __, __, __, __,
 89
 90	C_SPACE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_QUOTE,
 91	C_ETC, C_ETC, C_ETC, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH,
 92	C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
 93	C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
 94
 95	C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC,
 96	C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
 97	C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
 98	C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC,
 99
100	C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
101	C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC,
102	C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC,
103	C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC,
104}
105
106/*
107The state codes.
108*/
109const (
110	GO States = iota /* start    */
111	OK               /* ok       */
112	OB               /* object   */
113	KE               /* key      */
114	CO               /* colon    */
115	VA               /* value    */
116	AR               /* array    */
117	ST               /* string   */
118	ES               /* escape   */
119	U1               /* u1       */
120	U2               /* u2       */
121	U3               /* u3       */
122	U4               /* u4       */
123	MI               /* minus    */
124	ZE               /* zero     */
125	IN               /* integer  */
126	DT               /* dot      */
127	FR               /* fraction */
128	E1               /* e        */
129	E2               /* ex       */
130	E3               /* exp      */
131	T1               /* tr       */
132	T2               /* tru      */
133	T3               /* true     */
134	F1               /* fa       */
135	F2               /* fal      */
136	F3               /* fals     */
137	F4               /* false    */
138	N1               /* nu       */
139	N2               /* nul      */
140	N3               /* null     */
141)
142
143// List of action codes.
144// these constants are defining an action that should be performed under certain conditions.
145const (
146	cl States = -2 /* colon           */
147	cm States = -3 /* comma           */
148	qt States = -4 /* quote           */
149	bo States = -5 /* bracket open    */
150	co States = -6 /* curly bracket open  */
151	bc States = -7 /* bracket close   */
152	cc States = -8 /* curly bracket close */
153	ec States = -9 /* curly bracket empty */
154)
155
156// StateTransitionTable is the state transition table takes the current state and the current symbol, and returns either
157// a new state or an action. An action is represented as a negative number. A JSON text is accepted if at the end of the
158// text the state is OK and if the mode is DONE.
159var StateTransitionTable = [31][31]States{
160	/*
161	   The state transition table takes the current state and the current symbol,
162	   and returns either a new state or an action. An action is represented as a
163	   negative number. A JSON text is accepted if at the end of the text the
164	   state is OK and if the mode is DONE.
165	                  white                                                    1-9                                                ABCDF   etc
166	            space   |   {   }   [   ]   :   ,   "   \   /   +   -   .   0   |   a   b   c   d   e   f   l   n   r   s   t   u   |   E   |*/
167	/*start  GO*/ {GO, GO, co, __, bo, __, __, __, ST, __, __, __, MI, __, ZE, IN, __, __, __, __, __, F1, __, N1, __, __, T1, __, __, __, __},
168	/*ok     OK*/ {OK, OK, __, cc, __, bc, __, cm, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
169	/*object OB*/ {OB, OB, __, ec, __, __, __, __, ST, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
170	/*key    KE*/ {KE, KE, __, __, __, __, __, __, ST, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
171	/*colon  CO*/ {CO, CO, __, __, __, __, cl, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
172	/*value  VA*/ {VA, VA, co, __, bo, __, __, __, ST, __, __, __, MI, __, ZE, IN, __, __, __, __, __, F1, __, N1, __, __, T1, __, __, __, __},
173	/*array  AR*/ {AR, AR, co, __, bo, bc, __, __, ST, __, __, __, MI, __, ZE, IN, __, __, __, __, __, F1, __, N1, __, __, T1, __, __, __, __},
174	/*string ST*/ {ST, __, ST, ST, ST, ST, ST, ST, qt, ES, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST},
175	/*escape ES*/ {__, __, __, __, __, __, __, __, ST, ST, ST, __, __, __, __, __, __, ST, __, __, __, ST, __, ST, ST, __, ST, U1, __, __, __},
176	/*u1     U1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U2, U2, U2, U2, U2, U2, U2, U2, __, __, __, __, __, __, U2, U2, __},
177	/*u2     U2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U3, U3, U3, U3, U3, U3, U3, U3, __, __, __, __, __, __, U3, U3, __},
178	/*u3     U3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U4, U4, U4, U4, U4, U4, U4, U4, __, __, __, __, __, __, U4, U4, __},
179	/*u4     U4*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, ST, ST, ST, ST, ST, ST, ST, ST, __, __, __, __, __, __, ST, ST, __},
180	/*minus  MI*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, ZE, IN, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
181	/*zero   ZE*/ {OK, OK, __, cc, __, bc, __, cm, __, __, __, __, __, DT, __, __, __, __, __, __, E1, __, __, __, __, __, __, __, __, E1, __},
182	/*int    IN*/ {OK, OK, __, cc, __, bc, __, cm, __, __, __, __, __, DT, IN, IN, __, __, __, __, E1, __, __, __, __, __, __, __, __, E1, __},
183	/*dot    DT*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, FR, FR, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
184	/*frac   FR*/ {OK, OK, __, cc, __, bc, __, cm, __, __, __, __, __, __, FR, FR, __, __, __, __, E1, __, __, __, __, __, __, __, __, E1, __},
185	/*e      E1*/ {__, __, __, __, __, __, __, __, __, __, __, E2, E2, __, E3, E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
186	/*ex     E2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, E3, E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
187	/*exp    E3*/ {OK, OK, __, cc, __, bc, __, cm, __, __, __, __, __, __, E3, E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
188	/*tr     T1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, T2, __, __, __, __, __, __},
189	/*tru    T2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, T3, __, __, __},
190	/*true   T3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, OK, __, __, __, __, __, __, __, __, __, __},
191	/*fa     F1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, F2, __, __, __, __, __, __, __, __, __, __, __, __, __, __},
192	/*fal    F2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, F3, __, __, __, __, __, __, __, __},
193	/*fals   F3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, F4, __, __, __, __, __},
194	/*false  F4*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, OK, __, __, __, __, __, __, __, __, __, __},
195	/*nu     N1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, N2, __, __, __},
196	/*nul    N2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, N3, __, __, __, __, __, __, __, __},
197	/*null   N3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, OK, __, __, __, __, __, __, __, __},
198}