ee82c82c8bfa6cdc212cc48c21bdfed17ca2f066
[project/cgi-io.git] / multipart_parser.c
1 /* Based on node-formidable by Felix Geisendörfer
2 * Igor Afonov - afonov@gmail.com - 2012
3 * MIT License - http://www.opensource.org/licenses/mit-license.php
4 */
5
6 #include "multipart_parser.h"
7
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <string.h>
11
12 static void multipart_log(const char * format, ...)
13 {
14 #ifdef DEBUG_MULTIPART
15 va_list args;
16 va_start(args, format);
17
18 fprintf(stderr, "[HTTP_MULTIPART_PARSER] %s:%d: ", __FILE__, __LINE__);
19 vfprintf(stderr, format, args);
20 fprintf(stderr, "\n");
21 #endif
22 }
23
24 #define NOTIFY_CB(FOR) \
25 do { \
26 if (p->settings->on_##FOR) { \
27 if (p->settings->on_##FOR(p) != 0) { \
28 return i; \
29 } \
30 } \
31 } while (0)
32
33 #define EMIT_DATA_CB(FOR, ptr, len) \
34 do { \
35 if (p->settings->on_##FOR) { \
36 if (p->settings->on_##FOR(p, ptr, len) != 0) { \
37 return i; \
38 } \
39 } \
40 } while (0)
41
42
43 #define LF 10
44 #define CR 13
45
46 struct multipart_parser {
47 void * data;
48
49 size_t index;
50 size_t boundary_length;
51
52 unsigned char state;
53
54 const multipart_parser_settings* settings;
55
56 char* lookbehind;
57 char multipart_boundary[1];
58 };
59
60 enum state {
61 s_uninitialized = 1,
62 s_start,
63 s_start_boundary,
64 s_header_field_start,
65 s_header_field,
66 s_headers_almost_done,
67 s_header_value_start,
68 s_header_value,
69 s_header_value_almost_done,
70 s_part_data_start,
71 s_part_data,
72 s_part_data_almost_boundary,
73 s_part_data_boundary,
74 s_part_data_almost_end,
75 s_part_data_end,
76 s_part_data_final_hyphen,
77 s_end
78 };
79
80 multipart_parser* multipart_parser_init
81 (const char *boundary, const multipart_parser_settings* settings) {
82
83 multipart_parser* p = malloc(sizeof(multipart_parser) +
84 strlen(boundary) +
85 strlen(boundary) + 9);
86
87 strcpy(p->multipart_boundary, boundary);
88 p->boundary_length = strlen(boundary);
89
90 p->lookbehind = (p->multipart_boundary + p->boundary_length + 1);
91
92 p->index = 0;
93 p->state = s_start;
94 p->settings = settings;
95
96 return p;
97 }
98
99 void multipart_parser_free(multipart_parser* p) {
100 free(p);
101 }
102
103 void multipart_parser_set_data(multipart_parser *p, void *data) {
104 p->data = data;
105 }
106
107 void *multipart_parser_get_data(multipart_parser *p) {
108 return p->data;
109 }
110
111 size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len) {
112 size_t i = 0;
113 size_t mark = 0;
114 char c, cl;
115 int is_last = 0;
116
117 while(i < len) {
118 c = buf[i];
119 is_last = (i == (len - 1));
120 switch (p->state) {
121 case s_start:
122 multipart_log("s_start");
123 p->index = 0;
124 p->state = s_start_boundary;
125
126 /* fallthrough */
127 case s_start_boundary:
128 multipart_log("s_start_boundary");
129 if (p->index == p->boundary_length) {
130 if (c != CR) {
131 return i;
132 }
133 p->index++;
134 break;
135 } else if (p->index == (p->boundary_length + 1)) {
136 if (c != LF) {
137 return i;
138 }
139 p->index = 0;
140 NOTIFY_CB(part_data_begin);
141 p->state = s_header_field_start;
142 break;
143 }
144 if (c != p->multipart_boundary[p->index]) {
145 return i;
146 }
147 p->index++;
148 break;
149
150 case s_header_field_start:
151 multipart_log("s_header_field_start");
152 mark = i;
153 p->state = s_header_field;
154
155 /* fallthrough */
156 case s_header_field:
157 multipart_log("s_header_field");
158 if (c == CR) {
159 p->state = s_headers_almost_done;
160 break;
161 }
162
163 if (c == '-') {
164 break;
165 }
166
167 if (c == ':') {
168 EMIT_DATA_CB(header_field, buf + mark, i - mark);
169 p->state = s_header_value_start;
170 break;
171 }
172
173 cl = tolower(c);
174 if (cl < 'a' || cl > 'z') {
175 multipart_log("invalid character in header name");
176 return i;
177 }
178 if (is_last)
179 EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
180 break;
181
182 case s_headers_almost_done:
183 multipart_log("s_headers_almost_done");
184 if (c != LF) {
185 return i;
186 }
187
188 p->state = s_part_data_start;
189 break;
190
191 case s_header_value_start:
192 multipart_log("s_header_value_start");
193 if (c == ' ') {
194 break;
195 }
196
197 mark = i;
198 p->state = s_header_value;
199
200 /* fallthrough */
201 case s_header_value:
202 multipart_log("s_header_value");
203 if (c == CR) {
204 EMIT_DATA_CB(header_value, buf + mark, i - mark);
205 p->state = s_header_value_almost_done;
206 }
207 if (is_last)
208 EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
209 break;
210
211 case s_header_value_almost_done:
212 multipart_log("s_header_value_almost_done");
213 if (c != LF) {
214 return i;
215 }
216 p->state = s_header_field_start;
217 break;
218
219 case s_part_data_start:
220 multipart_log("s_part_data_start");
221 NOTIFY_CB(headers_complete);
222 mark = i;
223 p->state = s_part_data;
224
225 /* fallthrough */
226 case s_part_data:
227 multipart_log("s_part_data");
228 if (c == CR) {
229 EMIT_DATA_CB(part_data, buf + mark, i - mark);
230 mark = i;
231 p->state = s_part_data_almost_boundary;
232 p->lookbehind[0] = CR;
233 break;
234 }
235 if (is_last)
236 EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
237 break;
238
239 case s_part_data_almost_boundary:
240 multipart_log("s_part_data_almost_boundary");
241 if (c == LF) {
242 p->state = s_part_data_boundary;
243 p->lookbehind[1] = LF;
244 p->index = 0;
245 break;
246 }
247 EMIT_DATA_CB(part_data, p->lookbehind, 1);
248 p->state = s_part_data;
249 mark = i --;
250 break;
251
252 case s_part_data_boundary:
253 multipart_log("s_part_data_boundary");
254 if (p->multipart_boundary[p->index] != c) {
255 EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
256 p->state = s_part_data;
257 mark = i --;
258 break;
259 }
260 p->lookbehind[2 + p->index] = c;
261 if ((++ p->index) == p->boundary_length) {
262 NOTIFY_CB(part_data_end);
263 p->state = s_part_data_almost_end;
264 }
265 break;
266
267 case s_part_data_almost_end:
268 multipart_log("s_part_data_almost_end");
269 if (c == '-') {
270 p->state = s_part_data_final_hyphen;
271 break;
272 }
273 if (c == CR) {
274 p->state = s_part_data_end;
275 break;
276 }
277 return i;
278
279 case s_part_data_final_hyphen:
280 multipart_log("s_part_data_final_hyphen");
281 if (c == '-') {
282 NOTIFY_CB(body_end);
283 p->state = s_end;
284 break;
285 }
286 return i;
287
288 case s_part_data_end:
289 multipart_log("s_part_data_end");
290 if (c == LF) {
291 p->state = s_header_field_start;
292 NOTIFY_CB(part_data_begin);
293 break;
294 }
295 return i;
296
297 case s_end:
298 multipart_log("s_end: %02X", (int) c);
299 break;
300
301 default:
302 multipart_log("Multipart parser unrecoverable error");
303 return 0;
304 }
305 ++ i;
306 }
307
308 return len;
309 }