8000 Use lookup tables of my own. · zwxalgorithm/http-parser@6f12467 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6f12467

Browse files
committed
Use lookup tables of my own.
1 parent d0dfc98 commit 6f12467

File tree

1 file changed

+79
-43
lines changed

1 file changed

+79
-43
lines changed

http_parser.c

Lines changed: 79 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
/* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
2-
*
3-
* Some parts of this source file were taken from NGINX
4-
* (src/http/ngx_http_parser.c) copyright (C) 2002-2009 Igor Sysoev.
52
*
63
* Permission is hereby granted, free of charge, to any person obtaining a copy
74
* of this software and associated documentation files (the "Software"), to
@@ -109,18 +106,44 @@ static const char *method_strings[] =
109106
};
110107

111108

112-
static const char lowcase[256] =
113-
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
114-
" \0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0"
115-
"\0abcdefghijklmnopqrstuvwxyz\0\0\0\0_"
116-
"\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
117-
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
118-
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
119-
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
120-
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
121-
122-
123-
static const int unhex[] =
109+
/* ' ', '_', '-' and all alpha-numeric ascii characters are accepted by acceptable_header.
110+
The 'A'-'Z' are lower-cased. */
111+
static const unsigned char acceptable_header[256] = {
112+
/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
113+
0, 0, 0, 0, 0, 0, 0, 0,
114+
/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
115+
0, 0, 0, 0, 0, 0, 0, 0,
116+
/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
117+
0, 0, 0, 0, 0, 0, 0, 0,
118+
/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
119+
0, 0, 0, 0, 0, 0, 0, 0,
120+
/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
121+
' ', 0, 0, 0, 0, 0, 0, 0,
122+
/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
123+
0, 0, 0, 0, 0, '-', 0, 0,
124+
/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
125+
'0', '1', '2', '3', '4', '5', '6', '7',
126+
/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
127+
'8', '9', 0, 0, 0, 0, 0, 0,
128+
/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
129+
0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
130+
/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
131+
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
132+
/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
133+
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
134+
/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
135+
'x', 'y', 'z', 0, 0, 0, 0, '_',
136+
/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
137+
0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
138+
/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
139+
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
140+
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
141+
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
142+
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
143+
'x', 'y', 'z', 0, 0, 0, 0, 0 };
144+
145+
146+
static const int unhex[256] =
124147
{-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
125148
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
126149
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
@@ -132,26 +155,39 @@ static const int unhex[] =
132155
};
133156

134157

135-
136-
static const uint32_t usual[] = {
137-
0xffffdbfe, /* 1111 1111 1111 1111 1101 1011 1111 1110 */
138-
139-
/* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
140-
0x7ffffff6, /* 0111 1111 1111 1111 1111 1111 1111 0110 */
141-
142-
/* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
143-
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
144-
145-
/* ~}| {zyx wvut srqp onml kjih gfed cba` */
146-
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
147-
148-
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
149-
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
150-
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
151-
0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
152-
};
153-
154-
#define USUAL(c) (usual[c >> 5] & (1 << (c & 0x1f)))
158+
static const int normal_url_char[256] = {
159+
/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
160+
0, 0, 0, 0, 0, 0, 0, 0,
161+
/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
162+
0, 0, 0, 0, 0, 0, 0, 0,
163+
/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
164+
0, 0, 0, 0, 0, 0, 0, 0,
165+
/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
166+
0, 0, 0, 0, 0, 0, 0, 0,
167+
/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
168+
0, 1, 1, 0, 1, 1, 1, 1,
169+
/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
170+
1, 1, 1, 1, 1, 1, 1, 1,
171+
/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
172+
1, 1, 1, 1, 1, 1, 1, 1,
173+
/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
174+
1, 1, 1, 1, 1, 1, 1, 0,
175+
/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
176+
1, 1, 1, 1, 1, 1, 1, 1,
177+
/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
178+
1, 1, 1, 1, 1, 1, 1, 1,
179+
/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
180+
1, 1, 1, 1, 1, 1, 1, 1,
181+
/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
182+
1, 1, 1, 1, 1, 1, 1, 1,
183+
/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
184+
1, 1, 1, 1, 1, 1, 1, 1,
185+
/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
186+
1, 1, 1, 1, 1, 1, 1, 1,
187+
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
188+
1, 1, 1, 1, 1, 1, 1, 1,
189+
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
190+
1, 1, 1, 1, 1, 1, 1, 0 };
155191

156192

157193
enum state
@@ -692,7 +728,7 @@ size_t http_parser_execute (http_parser *parser,
692728

693729
case s_req_path:
694730
{
695-
if (USUAL(ch)) break;
731+
if (normal_url_char[(unsigned char)ch]) break;
696732

697733
switch (ch) {
698734
case ' ':
@@ -728,7 +764,7 @@ size_t http_parser_execute (http_parser *parser,
728764

729765
case s_req_query_string_start:
730766
{
731-
if (USUAL(ch)) {
767+
if (normal_url_char[(unsigned char)ch]) {
732768
MARK(query_string);
733769
state = s_req_query_string;
734770
break;
@@ -762,7 +798,7 @@ size_t http_parser_execute (http_parser *parser,
762798

763799
case s_req_query_string:
764800
{
765-
if (USUAL(ch)) break;
801+
if (normal_url_char[(unsigned char)ch]) break;
766802

767803
switch (ch) {
768804
case '?':
@@ -797,7 +833,7 @@ size_t http_parser_execute (http_parser *parser,
797833

798834
case s_req_fragment_start:
799835
{
800-
if (USUAL(ch)) {
836+
if (normal_url_char[(unsigned char)ch]) {
801837
MARK(fragment);
802838
state = s_req_fragment;
803839
break;
@@ -832,7 +868,7 @@ size_t http_parser_execute (http_parser *parser,
832868

833869
case s_req_fragment:
834870
{
835-
if (USUAL(ch)) break;
871+
if (normal_url_char[(unsigned char)ch]) break;
836872

837873
switch (ch) {
838874
case ' ':
@@ -1005,7 +1041,7 @@ size_t http_parser_execute (http_parser *parser,
10051041

10061042
case s_header_field:
10071043
{
1008-
c = lowcase[(unsigned char)ch];
1044+
c = acceptable_header[(unsigned char)ch];
10091045

10101046
if (c) {
10111047
switch (header_state) {
@@ -1141,7 +1177,7 @@ size_t http_parser_execute (http_parser *parser,
11411177
state = s_header_value;
11421178
index = 0;
11431179

1144-
c = lowcase[(unsigned char)ch];
1180+
c = acceptable_header[(unsigned char)ch];
11451181

11461182
if (!c) {
11471183
if (ch == CR) {
@@ -1202,7 +1238,7 @@ size_t http_parser_execute (http_parser *parser,
12021238

12031239
case s_header_value:
12041240
{
1205-
c = lowcase[(unsigned char)ch];
1241+
c = acceptable_header[(unsigned char)ch];
12061242

12071243
if (!c) {
12081244
if (ch == CR) {

0 commit comments

Comments
 (0)
0