* luci/libs: add support for chunked transfer decoding in http.protocol
[project/luci.git] / libs / http / luasrc / http / protocol.lua
1 --[[
2
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 $Id$
13
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 require("ltn12")
19 require("luci.util")
20 require("luci.http.protocol.filter")
21
22 HTTP_MAX_CONTENT = 1024*4 -- 4 kB maximum content size
23 HTTP_URLENC_MAXKEYLEN = 1024 -- maximum allowd size of urlencoded parameter names
24
25
26 -- Decode an urlencoded string.
27 -- Returns the decoded value.
28 function urldecode( str )
29
30 local function __chrdec( hex )
31 return string.char( tonumber( hex, 16 ) )
32 end
33
34 if type(str) == "string" then
35 str = str:gsub( "+", " " ):gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
36 end
37
38 return str
39 end
40
41
42 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
43 -- Returns a table value with urldecoded values.
44 function urldecode_params( url, tbl )
45
46 local params = tbl or { }
47
48 if url:find("?") then
49 url = url:gsub( "^.+%?([^?]+)", "%1" )
50 end
51
52 for i, pair in ipairs(luci.util.split( url, "[&;]+", nil, true )) do
53
54 -- find key and value
55 local key = urldecode( pair:match("^([^=]+)") )
56 local val = urldecode( pair:match("^[^=]+=(.+)$") )
57
58 -- store
59 if type(key) == "string" and key:len() > 0 then
60 if type(val) ~= "string" then val = "" end
61
62 if not params[key] then
63 params[key] = val
64 elseif type(params[key]) ~= "table" then
65 params[key] = { params[key], val }
66 else
67 table.insert( params[key], val )
68 end
69 end
70 end
71
72 return params
73 end
74
75
76 -- Encode given string in urlencoded format.
77 -- Returns the encoded string.
78 function urlencode( str )
79
80 local function __chrenc( chr )
81 return string.format(
82 "%%%02x", string.byte( chr )
83 )
84 end
85
86 if type(str) == "string" then
87 str = str:gsub(
88 "([^a-zA-Z0-9$_%-%.+!*'(),])",
89 __chrenc
90 )
91 end
92
93 return str
94 end
95
96
97 -- Encode given table to urlencoded string.
98 -- Returns the encoded string.
99 function urlencode_params( tbl )
100 local enc = ""
101
102 for k, v in pairs(tbl) do
103 enc = enc .. ( enc and "&" or "" ) ..
104 urlencode(k) .. "=" ..
105 urlencode(v)
106 end
107
108 return enc
109 end
110
111
112 -- Table of our process states
113 local process_states = { }
114
115 -- Extract "magic", the first line of a http message.
116 -- Extracts the message type ("get", "post" or "response"), the requested uri
117 -- or the status code if the line descripes a http response.
118 process_states['magic'] = function( msg, chunk )
119
120 if chunk ~= nil then
121
122 -- Is it a request?
123 local method, uri, http_ver = chunk:match("^([A-Z]+) ([^ ]+) HTTP/([01]%.[019])$")
124
125 -- Yup, it is
126 if method then
127
128 msg.type = "request"
129 msg.request_method = method:lower()
130 msg.request_uri = uri
131 msg.http_version = tonumber( http_ver )
132 msg.headers = { }
133
134 -- We're done, next state is header parsing
135 return true, function( chunk )
136 return process_states['headers']( msg, chunk )
137 end
138
139 -- Is it a response?
140 else
141
142 local http_ver, code, message = chunk:match("^HTTP/([01]%.[019]) ([0-9]+) ([^\r\n]+)$")
143
144 -- Is a response
145 if code then
146
147 msg.type = "response"
148 msg.status_code = code
149 msg.status_message = message
150 msg.http_version = tonumber( http_ver )
151 msg.headers = { }
152
153 -- We're done, next state is header parsing
154 return true, function( chunk )
155 return process_states['headers']( msg, chunk )
156 end
157 end
158 end
159 end
160
161 -- Can't handle it
162 return nil, "Invalid HTTP message magic"
163 end
164
165
166 -- Extract headers from given string.
167 process_states['headers'] = function( msg, chunk )
168
169 if chunk ~= nil then
170
171 -- Look for a valid header format
172 local hdr, val = chunk:match( "^([A-Z][A-Za-z0-9%-_]+): +(.+)$" )
173
174 if type(hdr) == "string" and hdr:len() > 0 and
175 type(val) == "string" and val:len() > 0
176 then
177 msg.headers[hdr] = val
178
179 -- Valid header line, proceed
180 return true, nil
181
182 elseif #chunk == 0 then
183 -- Empty line, we won't accept data anymore
184 return false, nil
185 else
186 -- Junk data
187 return nil, "Invalid HTTP header received"
188 end
189 else
190 return nil, "Unexpected EOF"
191 end
192 end
193
194
195 -- Find first MIME boundary
196 process_states['mime-init'] = function( msg, chunk, filecb )
197
198 if chunk ~= nil then
199 if #chunk >= #msg.mime_boundary + 2 then
200 local boundary = chunk:sub( 1, #msg.mime_boundary + 4 )
201
202 if boundary == "--" .. msg.mime_boundary .. "\r\n" then
203
204 -- Store remaining data in buffer
205 msg._mimebuffer = chunk:sub( #msg.mime_boundary + 5, #chunk )
206
207 -- Switch to header processing state
208 return true, function( chunk )
209 return process_states['mime-headers']( msg, chunk, filecb )
210 end
211 else
212 return nil, "Invalid MIME boundary"
213 end
214 else
215 return true
216 end
217 else
218 return nil, "Unexpected EOF"
219 end
220 end
221
222
223 -- Read MIME part headers
224 process_states['mime-headers'] = function( msg, chunk, filecb )
225
226 if chunk ~= nil then
227
228 -- Combine look-behind buffer with current chunk
229 chunk = msg._mimebuffer .. chunk
230
231 if not msg._mimeheaders then
232 msg._mimeheaders = { }
233 end
234
235 local function __storehdr( k, v )
236 msg._mimeheaders[k] = v
237 return ""
238 end
239
240 -- Read all header lines
241 local ok, count = 1, 0
242 while ok > 0 do
243 chunk, ok = chunk:gsub( "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r\n", __storehdr )
244 count = count + ok
245 end
246
247 -- Headers processed, check for empty line
248 chunk, ok = chunk:gsub( "^\r\n", "" )
249
250 -- Store remaining buffer contents
251 msg._mimebuffer = chunk
252
253 -- End of headers
254 if ok > 0 then
255
256 -- When no Content-Type header is given assume text/plain
257 if not msg._mimeheaders['Content-Type'] then
258 msg._mimeheaders['Content-Type'] = 'text/plain'
259 end
260
261 -- Check Content-Disposition
262 if msg._mimeheaders['Content-Disposition'] then
263 -- Check for "form-data" token
264 if msg._mimeheaders['Content-Disposition']:match("^form%-data; ") then
265 -- Check for field name, filename
266 local field = msg._mimeheaders['Content-Disposition']:match('name="(.-)"')
267 local file = msg._mimeheaders['Content-Disposition']:match('filename="(.+)"$')
268
269 -- Is a file field and we have a callback
270 if file and filecb then
271 msg.params[field] = file
272 msg._mimecallback = function(chunk,eof)
273 filecb( {
274 name = field;
275 file = file;
276 headers = msg._mimeheaders
277 }, chunk, eof )
278 end
279
280 -- Treat as form field
281 else
282 msg.params[field] = ""
283 msg._mimecallback = function(chunk,eof)
284 msg.params[field] = msg.params[field] .. chunk
285 end
286 end
287
288 -- Header was valid, continue with mime-data
289 return true, function( chunk )
290 return process_states['mime-data']( msg, chunk, filecb )
291 end
292 else
293 -- Unknown Content-Disposition, abort
294 return nil, "Unexpected Content-Disposition MIME section header"
295 end
296 else
297 -- Content-Disposition is required, abort without
298 return nil, "Missing Content-Disposition MIME section header"
299 end
300
301 -- We parsed no headers yet and buffer is almost empty
302 elseif count > 0 or #chunk < 128 then
303 -- Keep feeding me with chunks
304 return true, nil
305 end
306
307 -- Buffer looks like garbage
308 return nil, "Malformed MIME section header"
309 else
310 return nil, "Unexpected EOF"
311 end
312 end
313
314
315 -- Read MIME part data
316 process_states['mime-data'] = function( msg, chunk, filecb )
317
318 if chunk ~= nil then
319
320 -- Combine look-behind buffer with current chunk
321 local buffer = msg._mimebuffer .. chunk
322
323 -- Look for MIME boundary
324 local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "\r\n", 1, true )
325
326 if spos then
327 -- Content data
328 msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
329
330 -- Store remainder
331 msg._mimebuffer = buffer:sub( epos + 1, #buffer )
332
333 -- Next state is mime-header processing
334 return true, function( chunk )
335 return process_states['mime-headers']( msg, chunk, filecb )
336 end
337 else
338 -- Look for EOF?
339 local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "--\r\n", 1, true )
340
341 if spos then
342 -- Content data
343 msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
344
345 -- We processed the final MIME boundary, cleanup
346 msg._mimebuffer = nil
347 msg._mimeheaders = nil
348 msg._mimecallback = nil
349
350 -- We won't accept data anymore
351 return false
352 else
353 -- We're somewhere within a data section and our buffer is full
354 if #buffer > #chunk then
355 -- Flush buffered data
356 msg._mimecallback( buffer:sub( 1, #buffer - #chunk ), false )
357
358 -- Store new data
359 msg._mimebuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
360
361 -- Buffer is not full yet, append new data
362 else
363 msg._mimebuffer = buffer
364 end
365
366 -- Keep feeding me
367 return true
368 end
369 end
370 else
371 return nil, "Unexpected EOF"
372 end
373 end
374
375
376 -- Init urldecoding stream
377 process_states['urldecode-init'] = function( msg, chunk, filecb )
378
379 if chunk ~= nil then
380
381 -- Check for Content-Length
382 if msg.env.CONTENT_LENGTH then
383 msg.content_length = tonumber(msg.env.CONTENT_LENGTH)
384
385 if msg.content_length <= HTTP_MAX_CONTENT then
386 -- Initialize buffer
387 msg._urldecbuffer = chunk
388 msg._urldeclength = 0
389
390 -- Switch to urldecode-key state
391 return true, function(chunk)
392 return process_states['urldecode-key']( msg, chunk, filecb )
393 end
394 else
395 return nil, "Request exceeds maximum allowed size"
396 end
397 else
398 return nil, "Missing Content-Length header"
399 end
400 else
401 return nil, "Unexpected EOF"
402 end
403 end
404
405
406 -- Process urldecoding stream, read and validate parameter key
407 process_states['urldecode-key'] = function( msg, chunk, filecb )
408 if chunk ~= nil then
409
410 -- Prevent oversized requests
411 if msg._urldeclength >= msg.content_length then
412 return nil, "Request exceeds maximum allowed size"
413 end
414
415 -- Combine look-behind buffer with current chunk
416 local buffer = msg._urldecbuffer .. chunk
417 local spos, epos = buffer:find("=")
418
419 -- Found param
420 if spos then
421
422 -- Check that key doesn't exceed maximum allowed key length
423 if ( spos - 1 ) <= HTTP_URLENC_MAXKEYLEN then
424 local key = urldecode( buffer:sub( 1, spos - 1 ) )
425
426 -- Prepare buffers
427 msg.params[key] = ""
428 msg._urldeclength = msg._urldeclength + epos
429 msg._urldecbuffer = buffer:sub( epos + 1, #buffer )
430
431 -- Use file callback or store values inside msg.params
432 if filecb then
433 msg._urldeccallback = function( chunk, eof )
434 filecb( field, chunk, eof )
435 end
436 else
437 msg._urldeccallback = function( chunk, eof )
438 msg.params[key] = msg.params[key] .. chunk
439
440 -- FIXME: Use a filter
441 if eof then
442 msg.params[key] = urldecode( msg.params[key] )
443 end
444 end
445 end
446
447 -- Proceed with urldecode-value state
448 return true, function( chunk )
449 return process_states['urldecode-value']( msg, chunk, filecb )
450 end
451 else
452 return nil, "POST parameter exceeds maximum allowed length"
453 end
454 else
455 return nil, "POST data exceeds maximum allowed length"
456 end
457 else
458 return nil, "Unexpected EOF"
459 end
460 end
461
462
463 -- Process urldecoding stream, read parameter value
464 process_states['urldecode-value'] = function( msg, chunk, filecb )
465
466 if chunk ~= nil then
467
468 -- Combine look-behind buffer with current chunk
469 local buffer = msg._urldecbuffer .. chunk
470
471 -- Check for EOF
472 if #buffer == 0 then
473 -- Compare processed length
474 if msg._urldeclength == msg.content_length then
475 -- Cleanup
476 msg._urldeclength = nil
477 msg._urldecbuffer = nil
478 msg._urldeccallback = nil
479
480 -- We won't accept data anymore
481 return false
482 else
483 return nil, "Content-Length mismatch"
484 end
485 end
486
487 -- Check for end of value
488 local spos, epos = buffer:find("[&;]")
489 if spos then
490
491 -- Flush buffer, send eof
492 msg._urldeccallback( buffer:sub( 1, spos - 1 ), true )
493 msg._urldecbuffer = buffer:sub( epos + 1, #buffer )
494 msg._urldeclength = msg._urldeclength + epos
495
496 -- Back to urldecode-key state
497 return true, function( chunk )
498 return process_states['urldecode-key']( msg, chunk, filecb )
499 end
500 else
501 -- We're somewhere within a data section and our buffer is full
502 if #buffer > #chunk then
503 -- Flush buffered data
504 msg._urldeccallback( buffer:sub( 1, #buffer - #chunk ), false )
505
506 -- Store new data
507 msg._urldeclength = msg._urldeclength + #buffer - #chunk
508 msg._urldecbuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
509
510 -- Buffer is not full yet, append new data
511 else
512 msg._urldecbuffer = buffer
513 end
514
515 -- Keep feeding me
516 return true
517 end
518 else
519 return nil, "Unexpected EOF"
520 end
521 end
522
523
524 -- Decode MIME encoded data.
525 function mimedecode_message_body( source, msg, filecb )
526
527 -- Find mime boundary
528 if msg and msg.env.CONTENT_TYPE then
529
530 local bound = msg.env.CONTENT_TYPE:match("^multipart/form%-data; boundary=(.+)")
531
532 if bound then
533 msg.mime_boundary = bound
534 else
535 return nil, "No MIME boundary found or invalid content type given"
536 end
537 end
538
539 -- Create an initial LTN12 sink
540 -- The whole MIME parsing process is implemented as fancy sink, sinks replace themself
541 -- depending on current processing state (init, header, data). Return the initial state.
542 local sink = ltn12.sink.simplify(
543 function( chunk )
544 return process_states['mime-init']( msg, chunk, filecb )
545 end
546 )
547
548 -- Create a throttling LTN12 source
549 -- Frequent state switching in the mime parsing process leads to unwanted buffer aggregation.
550 -- This source checks wheather there's still data in our internal read buffer and returns an
551 -- empty string if there's already enough data in the processing queue. If the internal buffer
552 -- runs empty we're calling the original source to get the next chunk of data.
553 local tsrc = function()
554
555 -- XXX: we schould propably keep the maximum buffer size in sync with
556 -- the blocksize of our original source... but doesn't really matter
557 if msg._mimebuffer ~= null and #msg._mimebuffer > 256 then
558 return ""
559 else
560 return source()
561 end
562 end
563
564 -- Pump input data...
565 while true do
566 -- get data
567 local ok, err = ltn12.pump.step( tsrc, sink )
568
569 -- error
570 if not ok and err then
571 return nil, err
572
573 -- eof
574 elseif not ok then
575 return true
576 end
577 end
578 end
579
580
581 -- Decode urlencoded data.
582 function urldecode_message_body( source, msg )
583
584 -- Create an initial LTN12 sink
585 -- Return the initial state.
586 local sink = ltn12.sink.simplify(
587 function( chunk )
588 return process_states['urldecode-init']( msg, chunk )
589 end
590 )
591
592 -- Create a throttling LTN12 source
593 -- See explaination in mimedecode_message_body().
594 local tsrc = function()
595 if msg._urldecbuffer ~= null and #msg._urldecbuffer > 0 then
596 return ""
597 else
598 return source()
599 end
600 end
601
602 -- Pump input data...
603 while true do
604 -- get data
605 local ok, err = ltn12.pump.step( tsrc, sink )
606
607 -- step
608 if not ok and err then
609 return nil, err
610
611 -- eof
612 elseif not ok then
613 return true
614 end
615 end
616 end
617
618
619 -- Parse a http message
620 function parse_message( data, filecb )
621
622 local reader = _linereader( data, HTTP_MAX_READBUF )
623 local message = parse_message_header( reader )
624
625 if message then
626 parse_message_body( reader, message, filecb )
627 end
628
629 return message
630 end
631
632
633 -- Parse a http message header
634 function parse_message_header( source )
635
636 local ok = true
637 local msg = { }
638
639 local sink = ltn12.sink.simplify(
640 function( chunk )
641 return process_states['magic']( msg, chunk )
642 end
643 )
644
645 -- Pump input data...
646 while ok do
647
648 -- get data
649 ok, err = ltn12.pump.step( source, sink )
650
651 -- error
652 if not ok and err then
653 return nil, err
654
655 -- eof
656 elseif not ok then
657
658 -- Process get parameters
659 if ( msg.request_method == "get" or msg.request_method == "post" ) and
660 msg.request_uri:match("?")
661 then
662 msg.params = urldecode_params( msg.request_uri )
663 else
664 msg.params = { }
665 end
666
667 -- Populate common environment variables
668 msg.env = {
669 CONTENT_LENGTH = msg.headers['Content-Length'];
670 CONTENT_TYPE = msg.headers['Content-Type'];
671 REQUEST_METHOD = msg.request_method:upper();
672 REQUEST_URI = msg.request_uri;
673 SCRIPT_NAME = msg.request_uri:gsub("?.+$","");
674 SCRIPT_FILENAME = ""; -- XXX implement me
675 SERVER_PROTOCOL = "HTTP/" .. msg.http_version
676 }
677
678 -- Populate HTTP_* environment variables
679 for i, hdr in ipairs( {
680 'Accept',
681 'Accept-Charset',
682 'Accept-Encoding',
683 'Accept-Language',
684 'Connection',
685 'Cookie',
686 'Host',
687 'Referer',
688 'User-Agent',
689 } ) do
690 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
691 local val = msg.headers[hdr]
692
693 msg.env[var] = val
694 end
695 end
696 end
697
698 return msg
699 end
700
701
702 -- Parse a http message body
703 function parse_message_body( source, msg, filecb )
704
705 -- Install an additional filter if we're operating on chunked transfer
706 -- coding and client is HTTP/1.1 capable
707 if msg.http_version == 1.1 and
708 msg.headers['Transfer-Encoding'] and
709 msg.headers['Transfer-Encoding']:find("chunked")
710 then
711 source = ltn12.source.chain(
712 source, luci.http.protocol.filter.decode_chunked
713 )
714 end
715
716
717 -- Is it multipart/mime ?
718 if msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
719 msg.env.CONTENT_TYPE:match("^multipart/form%-data")
720 then
721
722 return mimedecode_message_body( source, msg, filecb )
723
724 -- Is it application/x-www-form-urlencoded ?
725 elseif msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
726 msg.env.CONTENT_TYPE == "application/x-www-form-urlencoded"
727 then
728 return urldecode_message_body( source, msg, filecb )
729
730
731 -- Unhandled encoding
732 -- If a file callback is given then feed it line by line, else
733 -- store whole buffer in message.content
734 else
735
736 local sink
737
738 -- If we have a file callback then feed it
739 if type(filecb) == "function" then
740 sink = filecb
741
742 -- ... else append to .content
743 else
744 msg.content = ""
745 msg.content_length = 0
746
747 sink = function( chunk )
748 if ( msg.content_length + #chunk ) <= HTTP_MAX_CONTENT then
749
750 msg.content = msg.content .. chunk
751 msg.content_length = msg.content_length + #chunk
752
753 return true
754 else
755 return nil, "POST data exceeds maximum allowed length"
756 end
757 end
758 end
759
760 -- Pump data...
761 while true do
762 local ok, err = ltn12.pump.step( source, sink )
763
764 if not ok and err then
765 return nil, err
766 elseif not err then
767 return true
768 end
769 end
770 end
771 end