1
1
/* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
2
- *
3
- * Some parts of this source file were taken from NGINX
4
- * (src/http/ngx_http_parser.c) copyright (C) 2002-2009 Igor Sysoev.
5
2
*
6
3
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
4
* of this software and associated documentation files (the "Software"), to
@@ -109,18 +106,44 @@ static const char *method_strings[] =
109
106
};
110
107
111
108
112
- static const char lowcase [256 ] =
113
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
114
- " \0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0"
115
- "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0_"
116
- "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
117
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
118
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
119
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
120
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" ;
121
-
122
-
123
- static const int unhex [] =
109
+ /* ' ', '_', '-' and all alpha-numeric ascii characters are accepted by acceptable_header.
110
+ The 'A'-'Z' are lower-cased. */
111
+ static const unsigned char acceptable_header [256 ] = {
112
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
113
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
114
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
115
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
116
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
117
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
118
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
119
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
120
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
121
+ ' ' , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
122
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
123
+ 0 , 0 , 0 , 0 , 0 , '-' , 0 , 0 ,
124
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
125
+ '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' ,
126
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
127
+ '8' , '9' , 0 , 0 , 0 , 0 , 0 , 0 ,
128
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
129
+ 0 , 'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' ,
130
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
131
+ 'h' , 'i' , 'j' , 'k' , 'l' , 'm' , 'n' , 'o' ,
132
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
133
+ 'p' , 'q' , 'r' , 's' , 't' , 'u' , 'v' , 'w' ,
134
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
135
+ 'x' , 'y' , 'z' , 0 , 0 , 0 , 0 , '_' ,
136
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
137
+ 0 , 'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' ,
138
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
139
+ 'h' , 'i' , 'j' , 'k' , 'l' , 'm' , 'n' , 'o' ,
140
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
141
+ 'p' , 'q' , 'r' , 's' , 't' , 'u' , 'v' , 'w' ,
142
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
143
+ 'x' , 'y' , 'z' , 0 , 0 , 0 , 0 , 0 };
144
+
145
+
146
+ static const int unhex [256 ] =
124
147
{-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1
125
148
,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1
126
149
,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1
@@ -132,26 +155,39 @@ static const int unhex[] =
132
155
};
133
156
134
157
135
-
136
- static const uint32_t usual [] = {
137
- 0xffffdbfe , /* 1111 1111 1111 1111 1101 1011 1111 1110 */
138
-
139
- /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
140
- 0x7ffffff6 , /* 0111 1111 1111 1111 1111 1111 1111 0110 */
141
-
142
- /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
143
- 0xffffffff , /* 1111 1111 1111 1111 1111 1111 1111 1111 */
144
-
145
- /* ~}| {zyx wvut srqp onml kjih gfed cba` */
146
- 0xffffffff , /* 1111 1111 1111 1111 1111 1111 1111 1111 */
147
-
148
- 0xffffffff , /* 1111 1111 1111 1111 1111 1111 1111 1111 */
149
- 0xffffffff , /* 1111 1111 1111 1111 1111 1111 1111 1111 */
150
- 0xffffffff , /* 1111 1111 1111 1111 1111 1111 1111 1111 */
151
- 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
152
- };
153
-
154
- #define USUAL (c ) (usual[c >> 5] & (1 << (c & 0x1f)))
158
+ static const int normal_url_char [256 ] = {
159
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
160
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
161
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
162
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
163
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
164
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
165
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
166
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
167
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
168
+ 0 , 1 , 1 , 0 , 1 , 1 , 1 , 1 ,
169
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
170
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
171
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
172
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
173
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
174
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 ,
175
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
176
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
177
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
178
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
179
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
180
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
181
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
182
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
183
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
184
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
185
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
186
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
187
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
188
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
189
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
190
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 };
155
191
156
192
157
193
enum state
@@ -692,7 +728,7 @@ size_t http_parser_execute (http_parser *parser,
692
728
693
729
case s_req_path :
694
730
{
695
- if (USUAL ( ch ) ) break ;
731
+ if (normal_url_char [( unsigned char ) ch ] ) break ;
696
732
697
733
switch (ch ) {
698
734
case ' ' :
@@ -728,7 +764,7 @@ size_t http_parser_execute (http_parser *parser,
728
764
729
765
case s_req_query_string_start :
730
766
{
731
- if (USUAL ( ch ) ) {
767
+ if (normal_url_char [( unsigned char ) ch ] ) {
732
768
MARK (query_string );
733
769
state = s_req_query_string ;
734
770
break ;
@@ -762,7 +798,7 @@ size_t http_parser_execute (http_parser *parser,
762
798
763
799
case s_req_query_string :
764
800
{
765
- if (USUAL ( ch ) ) break ;
801
+ if (normal_url_char [( unsigned char ) ch ] ) break ;
766
802
767
803
switch (ch ) {
768
804
case '?' :
@@ -797,7 +833,7 @@ size_t http_parser_execute (http_parser *parser,
797
833
798
834
case s_req_fragment_start :
799
835
{
800
- if (USUAL ( ch ) ) {
836
+ if (normal_url_char [( unsigned char ) ch ] ) {
801
837
MARK (fragment );
802
838
state = s_req_fragment ;
803
839
break ;
@@ -832,7 +868,7 @@ size_t http_parser_execute (http_parser *parser,
832
868
833
869
case s_req_fragment :
834
870
{
835
- if (USUAL ( ch ) ) break ;
871
+ if (normal_url_char [( unsigned char ) ch ] ) break ;
836
872
837
873
switch (ch ) {
838
874
case ' ' :
@@ -1005,7 +1041,7 @@ size_t http_parser_execute (http_parser *parser,
1005
1041
1006
1042
case s_header_field :
1007
1043
{
1008
- c = lowcase [(unsigned char )ch ];
1044
+ c = acceptable_header [(unsigned char )ch ];
1009
1045
1010
1046
if (c ) {
1011
1047
switch (header_state ) {
@@ -1141,7 +1177,7 @@ size_t http_parser_execute (http_parser *parser,
1141
1177
state = s_header_value ;
1142
1178
index = 0 ;
1143
1179
1144
- c = lowcase [(unsigned char )ch ];
1180
+ c = acceptable_header [(unsigned char )ch ];
1145
1181
1146
1182
if (!c ) {
1147
1183
if (ch == CR ) {
@@ -1202,7 +1238,7 @@ size_t http_parser_execute (http_parser *parser,
1202
1238
1203
1239
case s_header_value :
1204
1240
{
1205
- c = lowcase [(unsigned char )ch ];
1241
+ c = acceptable_header [(unsigned char )ch ];
1206
1242
1207
1243
if (!c ) {
1208
1244
if (ch == CR ) {
0 commit comments