* luci/libs: fix eof handling for urldecode_message_body() in protocol.lua
[project/luci.git] / libs / http / luasrc / http / protocol.lua
1 --[[
2
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 $Id$
13
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 require("ltn12")
19 require("luci.http.protocol.filter")
20
21 HTTP_MAX_CONTENT = 1024*4 -- 4 kB maximum content size
22 HTTP_URLENC_MAXKEYLEN = 1024 -- maximum allowd size of urlencoded parameter names
23
24
25 -- Decode an urlencoded string.
26 -- Returns the decoded value.
27 function urldecode( str )
28
29 local function __chrdec( hex )
30 return string.char( tonumber( hex, 16 ) )
31 end
32
33 if type(str) == "string" then
34 str = str:gsub( "+", " " ):gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
35 end
36
37 return str
38 end
39
40
41 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
42 -- Returns a table value with urldecoded values.
43 function urldecode_params( url, tbl )
44
45 local params = tbl or { }
46
47 if url:find("?") then
48 url = url:gsub( "^.+%?([^?]+)", "%1" )
49 end
50
51 for pair in url:gmatch( "[^&;]+" ) do
52
53 -- find key and value
54 local key = urldecode( pair:match("^([^=]+)") )
55 local val = urldecode( pair:match("^[^=]+=(.+)$") )
56
57 -- store
58 if type(key) == "string" and key:len() > 0 then
59 if type(val) ~= "string" then val = "" end
60
61 if not params[key] then
62 params[key] = val
63 elseif type(params[key]) ~= "table" then
64 params[key] = { params[key], val }
65 else
66 table.insert( params[key], val )
67 end
68 end
69 end
70
71 return params
72 end
73
74
75 -- Encode given string in urlencoded format.
76 -- Returns the encoded string.
77 function urlencode( str )
78
79 local function __chrenc( chr )
80 return string.format(
81 "%%%02x", string.byte( chr )
82 )
83 end
84
85 if type(str) == "string" then
86 str = str:gsub(
87 "([^a-zA-Z0-9$_%-%.+!*'(),])",
88 __chrenc
89 )
90 end
91
92 return str
93 end
94
95
96 -- Encode given table to urlencoded string.
97 -- Returns the encoded string.
98 function urlencode_params( tbl )
99 local enc = ""
100
101 for k, v in pairs(tbl) do
102 enc = enc .. ( enc and "&" or "" ) ..
103 urlencode(k) .. "=" ..
104 urlencode(v)
105 end
106
107 return enc
108 end
109
110
111 -- Table of our process states
112 local process_states = { }
113
114 -- Extract "magic", the first line of a http message.
115 -- Extracts the message type ("get", "post" or "response"), the requested uri
116 -- or the status code if the line descripes a http response.
117 process_states['magic'] = function( msg, chunk )
118
119 if chunk ~= nil then
120
121 -- Is it a request?
122 local method, uri, http_ver = chunk:match("^([A-Z]+) ([^ ]+) HTTP/([01]%.[019])$")
123
124 -- Yup, it is
125 if method then
126
127 msg.type = "request"
128 msg.request_method = method:lower()
129 msg.request_uri = uri
130 msg.http_version = tonumber( http_ver )
131 msg.headers = { }
132
133 -- We're done, next state is header parsing
134 return true, function( chunk )
135 return process_states['headers']( msg, chunk )
136 end
137
138 -- Is it a response?
139 else
140
141 local http_ver, code, message = chunk:match("^HTTP/([01]%.[019]) ([0-9]+) ([^\r\n]+)$")
142
143 -- Is a response
144 if code then
145
146 msg.type = "response"
147 msg.status_code = code
148 msg.status_message = message
149 msg.http_version = tonumber( http_ver )
150 msg.headers = { }
151
152 -- We're done, next state is header parsing
153 return true, function( chunk )
154 return process_states['headers']( msg, chunk )
155 end
156 end
157 end
158 end
159
160 -- Can't handle it
161 return nil, "Invalid HTTP message magic"
162 end
163
164
165 -- Extract headers from given string.
166 process_states['headers'] = function( msg, chunk )
167
168 if chunk ~= nil then
169
170 -- Look for a valid header format
171 local hdr, val = chunk:match( "^([A-Z][A-Za-z0-9%-_]+): +(.+)$" )
172
173 if type(hdr) == "string" and hdr:len() > 0 and
174 type(val) == "string" and val:len() > 0
175 then
176 msg.headers[hdr] = val
177
178 -- Valid header line, proceed
179 return true, nil
180
181 elseif #chunk == 0 then
182 -- Empty line, we won't accept data anymore
183 return false, nil
184 else
185 -- Junk data
186 return nil, "Invalid HTTP header received"
187 end
188 else
189 return nil, "Unexpected EOF"
190 end
191 end
192
193
194 -- Find first MIME boundary
195 process_states['mime-init'] = function( msg, chunk, filecb )
196
197 if chunk ~= nil then
198 if #chunk >= #msg.mime_boundary + 2 then
199 local boundary = chunk:sub( 1, #msg.mime_boundary + 4 )
200
201 if boundary == "--" .. msg.mime_boundary .. "\r\n" then
202
203 -- Store remaining data in buffer
204 msg._mimebuffer = chunk:sub( #msg.mime_boundary + 5, #chunk )
205
206 -- Switch to header processing state
207 return true, function( chunk )
208 return process_states['mime-headers']( msg, chunk, filecb )
209 end
210 else
211 return nil, "Invalid MIME boundary"
212 end
213 else
214 return true
215 end
216 else
217 return nil, "Unexpected EOF"
218 end
219 end
220
221
222 -- Read MIME part headers
223 process_states['mime-headers'] = function( msg, chunk, filecb )
224
225 if chunk ~= nil then
226
227 -- Combine look-behind buffer with current chunk
228 chunk = msg._mimebuffer .. chunk
229
230 if not msg._mimeheaders then
231 msg._mimeheaders = { }
232 end
233
234 local function __storehdr( k, v )
235 msg._mimeheaders[k] = v
236 return ""
237 end
238
239 -- Read all header lines
240 local ok, count = 1, 0
241 while ok > 0 do
242 chunk, ok = chunk:gsub( "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r\n", __storehdr )
243 count = count + ok
244 end
245
246 -- Headers processed, check for empty line
247 chunk, ok = chunk:gsub( "^\r\n", "" )
248
249 -- Store remaining buffer contents
250 msg._mimebuffer = chunk
251
252 -- End of headers
253 if ok > 0 then
254
255 -- When no Content-Type header is given assume text/plain
256 if not msg._mimeheaders['Content-Type'] then
257 msg._mimeheaders['Content-Type'] = 'text/plain'
258 end
259
260 -- Check Content-Disposition
261 if msg._mimeheaders['Content-Disposition'] then
262 -- Check for "form-data" token
263 if msg._mimeheaders['Content-Disposition']:match("^form%-data; ") then
264 -- Check for field name, filename
265 local field = msg._mimeheaders['Content-Disposition']:match('name="(.-)"')
266 local file = msg._mimeheaders['Content-Disposition']:match('filename="(.+)"$')
267
268 -- Is a file field and we have a callback
269 if file and filecb then
270 msg.params[field] = file
271 msg._mimecallback = function(chunk,eof)
272 filecb( {
273 name = field;
274 file = file;
275 headers = msg._mimeheaders
276 }, chunk, eof )
277 end
278
279 -- Treat as form field
280 else
281 msg.params[field] = ""
282 msg._mimecallback = function(chunk,eof)
283 msg.params[field] = msg.params[field] .. chunk
284 end
285 end
286
287 -- Header was valid, continue with mime-data
288 return true, function( chunk )
289 return process_states['mime-data']( msg, chunk, filecb )
290 end
291 else
292 -- Unknown Content-Disposition, abort
293 return nil, "Unexpected Content-Disposition MIME section header"
294 end
295 else
296 -- Content-Disposition is required, abort without
297 return nil, "Missing Content-Disposition MIME section header"
298 end
299
300 -- We parsed no headers yet and buffer is almost empty
301 elseif count > 0 or #chunk < 128 then
302 -- Keep feeding me with chunks
303 return true, nil
304 end
305
306 -- Buffer looks like garbage
307 return nil, "Malformed MIME section header"
308 else
309 return nil, "Unexpected EOF"
310 end
311 end
312
313
314 -- Read MIME part data
315 process_states['mime-data'] = function( msg, chunk, filecb )
316
317 if chunk ~= nil then
318
319 -- Combine look-behind buffer with current chunk
320 local buffer = msg._mimebuffer .. chunk
321
322 -- Look for MIME boundary
323 local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "\r\n", 1, true )
324
325 if spos then
326 -- Content data
327 msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
328
329 -- Store remainder
330 msg._mimebuffer = buffer:sub( epos + 1, #buffer )
331
332 -- Next state is mime-header processing
333 return true, function( chunk )
334 return process_states['mime-headers']( msg, chunk, filecb )
335 end
336 else
337 -- Look for EOF?
338 local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "--\r\n", 1, true )
339
340 if spos then
341 -- Content data
342 msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
343
344 -- We processed the final MIME boundary, cleanup
345 msg._mimebuffer = nil
346 msg._mimeheaders = nil
347 msg._mimecallback = nil
348
349 -- We won't accept data anymore
350 return false
351 else
352 -- We're somewhere within a data section and our buffer is full
353 if #buffer > #chunk then
354 -- Flush buffered data
355 msg._mimecallback( buffer:sub( 1, #buffer - #chunk ), false )
356
357 -- Store new data
358 msg._mimebuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
359
360 -- Buffer is not full yet, append new data
361 else
362 msg._mimebuffer = buffer
363 end
364
365 -- Keep feeding me
366 return true
367 end
368 end
369 else
370 return nil, "Unexpected EOF"
371 end
372 end
373
374
375 -- Init urldecoding stream
376 process_states['urldecode-init'] = function( msg, chunk, filecb )
377
378 if chunk ~= nil then
379
380 -- Check for Content-Length
381 if msg.env.CONTENT_LENGTH then
382 msg.content_length = tonumber(msg.env.CONTENT_LENGTH)
383
384 if msg.content_length <= HTTP_MAX_CONTENT then
385 -- Initialize buffer
386 msg._urldecbuffer = chunk
387 msg._urldeclength = 0
388
389 -- Switch to urldecode-key state
390 return true, function(chunk)
391 return process_states['urldecode-key']( msg, chunk, filecb )
392 end
393 else
394 return nil, "Request exceeds maximum allowed size"
395 end
396 else
397 return nil, "Missing Content-Length header"
398 end
399 else
400 return nil, "Unexpected EOF"
401 end
402 end
403
404
405 -- Process urldecoding stream, read and validate parameter key
406 process_states['urldecode-key'] = function( msg, chunk, filecb )
407 if chunk ~= nil then
408
409 -- Prevent oversized requests
410 if msg._urldeclength >= msg.content_length then
411 return nil, "Request exceeds maximum allowed size"
412 end
413
414 -- Combine look-behind buffer with current chunk
415 local buffer = msg._urldecbuffer .. chunk
416 local spos, epos = buffer:find("=")
417
418 -- Found param
419 if spos then
420
421 -- Check that key doesn't exceed maximum allowed key length
422 if ( spos - 1 ) <= HTTP_URLENC_MAXKEYLEN then
423 local key = urldecode( buffer:sub( 1, spos - 1 ) )
424
425 -- Prepare buffers
426 msg.params[key] = ""
427 msg._urldeclength = msg._urldeclength + epos
428 msg._urldecbuffer = buffer:sub( epos + 1, #buffer )
429
430 -- Use file callback or store values inside msg.params
431 if filecb then
432 msg._urldeccallback = function( chunk, eof )
433 filecb( field, chunk, eof )
434 end
435 else
436 msg._urldeccallback = function( chunk, eof )
437 msg.params[key] = msg.params[key] .. chunk
438
439 -- FIXME: Use a filter
440 if eof then
441 msg.params[key] = urldecode( msg.params[key] )
442 end
443 end
444 end
445
446 -- Proceed with urldecode-value state
447 return true, function( chunk )
448 return process_states['urldecode-value']( msg, chunk, filecb )
449 end
450 else
451 return nil, "POST parameter exceeds maximum allowed length"
452 end
453 else
454 return nil, "POST data exceeds maximum allowed length"
455 end
456 else
457 return nil, "Unexpected EOF"
458 end
459 end
460
461
462 -- Process urldecoding stream, read parameter value
463 process_states['urldecode-value'] = function( msg, chunk, filecb )
464
465 if chunk ~= nil then
466
467 -- Combine look-behind buffer with current chunk
468 local buffer = msg._urldecbuffer .. chunk
469
470 -- Check for EOF
471 if #buffer == 0 then
472 -- Compare processed length
473 if msg._urldeclength == msg.content_length then
474 -- Cleanup
475 msg._urldeclength = nil
476 msg._urldecbuffer = nil
477 msg._urldeccallback = nil
478
479 -- We won't accept data anymore
480 return false
481 else
482 return nil, "Content-Length mismatch"
483 end
484 end
485
486 -- Check for end of value
487 local spos, epos = buffer:find("[&;]")
488 if spos then
489
490 -- Flush buffer, send eof
491 msg._urldeccallback( buffer:sub( 1, spos - 1 ), true )
492 msg._urldecbuffer = buffer:sub( epos + 1, #buffer )
493 msg._urldeclength = msg._urldeclength + epos
494
495 -- Back to urldecode-key state
496 return true, function( chunk )
497 return process_states['urldecode-key']( msg, chunk, filecb )
498 end
499 else
500 -- We're somewhere within a data section and our buffer is full
501 if #buffer > #chunk then
502 -- Flush buffered data
503 msg._urldeccallback( buffer:sub( 1, #buffer - #chunk ), false )
504
505 -- Store new data
506 msg._urldeclength = msg._urldeclength + #buffer - #chunk
507 msg._urldecbuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
508
509 -- Buffer is not full yet, append new data
510 else
511 msg._urldecbuffer = buffer
512 end
513
514 -- Keep feeding me
515 return true
516 end
517 else
518 -- Send EOF
519 msg._urldeccallback( "", true )
520 return false
521 end
522 end
523
524
525 -- Decode MIME encoded data.
526 function mimedecode_message_body( source, msg, filecb )
527
528 -- Find mime boundary
529 if msg and msg.env.CONTENT_TYPE then
530
531 local bound = msg.env.CONTENT_TYPE:match("^multipart/form%-data; boundary=(.+)")
532
533 if bound then
534 msg.mime_boundary = bound
535 else
536 return nil, "No MIME boundary found or invalid content type given"
537 end
538 end
539
540 -- Create an initial LTN12 sink
541 -- The whole MIME parsing process is implemented as fancy sink, sinks replace themself
542 -- depending on current processing state (init, header, data). Return the initial state.
543 local sink = ltn12.sink.simplify(
544 function( chunk )
545 return process_states['mime-init']( msg, chunk, filecb )
546 end
547 )
548
549 -- Create a throttling LTN12 source
550 -- Frequent state switching in the mime parsing process leads to unwanted buffer aggregation.
551 -- This source checks wheather there's still data in our internal read buffer and returns an
552 -- empty string if there's already enough data in the processing queue. If the internal buffer
553 -- runs empty we're calling the original source to get the next chunk of data.
554 local tsrc = function()
555
556 -- XXX: we schould propably keep the maximum buffer size in sync with
557 -- the blocksize of our original source... but doesn't really matter
558 if msg._mimebuffer ~= null and #msg._mimebuffer > 256 then
559 return ""
560 else
561 return source()
562 end
563 end
564
565 -- Pump input data...
566 while true do
567 -- get data
568 local ok, err = ltn12.pump.step( tsrc, sink )
569
570 -- error
571 if not ok and err then
572 return nil, err
573
574 -- eof
575 elseif not ok then
576 return true
577 end
578 end
579 end
580
581
582 -- Decode urlencoded data.
583 function urldecode_message_body( source, msg )
584
585 -- Create an initial LTN12 sink
586 -- Return the initial state.
587 local sink = ltn12.sink.simplify(
588 function( chunk )
589 return process_states['urldecode-init']( msg, chunk )
590 end
591 )
592
593 -- Create a throttling LTN12 source
594 -- See explaination in mimedecode_message_body().
595 local tsrc = function()
596 if msg._urldecbuffer ~= null and #msg._urldecbuffer > 0 then
597 return ""
598 else
599 return source()
600 end
601 end
602
603 -- Pump input data...
604 while true do
605 -- get data
606 local ok, err = ltn12.pump.step( tsrc, sink )
607
608 -- step
609 if not ok and err then
610 return nil, err
611
612 -- eof
613 elseif not ok then
614 return true
615 end
616 end
617 end
618
619
620 -- Parse a http message
621 function parse_message( data, filecb )
622
623 local reader = _linereader( data, HTTP_MAX_READBUF )
624 local message = parse_message_header( reader )
625
626 if message then
627 parse_message_body( reader, message, filecb )
628 end
629
630 return message
631 end
632
633
634 -- Parse a http message header
635 function parse_message_header( source )
636
637 local ok = true
638 local msg = { }
639
640 local sink = ltn12.sink.simplify(
641 function( chunk )
642 return process_states['magic']( msg, chunk )
643 end
644 )
645
646 -- Pump input data...
647 while ok do
648
649 -- get data
650 ok, err = ltn12.pump.step( source, sink )
651
652 -- error
653 if not ok and err then
654 return nil, err
655
656 -- eof
657 elseif not ok then
658
659 -- Process get parameters
660 if ( msg.request_method == "get" or msg.request_method == "post" ) and
661 msg.request_uri:match("?")
662 then
663 msg.params = urldecode_params( msg.request_uri )
664 else
665 msg.params = { }
666 end
667
668 -- Populate common environment variables
669 msg.env = {
670 CONTENT_LENGTH = msg.headers['Content-Length'];
671 CONTENT_TYPE = msg.headers['Content-Type'];
672 REQUEST_METHOD = msg.request_method:upper();
673 REQUEST_URI = msg.request_uri;
674 SCRIPT_NAME = msg.request_uri:gsub("?.+$","");
675 SCRIPT_FILENAME = ""; -- XXX implement me
676 SERVER_PROTOCOL = "HTTP/" .. msg.http_version
677 }
678
679 -- Populate HTTP_* environment variables
680 for i, hdr in ipairs( {
681 'Accept',
682 'Accept-Charset',
683 'Accept-Encoding',
684 'Accept-Language',
685 'Connection',
686 'Cookie',
687 'Host',
688 'Referer',
689 'User-Agent',
690 } ) do
691 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
692 local val = msg.headers[hdr]
693
694 msg.env[var] = val
695 end
696 end
697 end
698
699 return msg
700 end
701
702
703 -- Parse a http message body
704 function parse_message_body( source, msg, filecb )
705
706 -- Install an additional filter if we're operating on chunked transfer
707 -- coding and client is HTTP/1.1 capable
708 if msg.http_version == 1.1 and
709 msg.headers['Transfer-Encoding'] and
710 msg.headers['Transfer-Encoding']:find("chunked")
711 then
712 source = ltn12.source.chain(
713 source, luci.http.protocol.filter.decode_chunked
714 )
715 end
716
717
718 -- Is it multipart/mime ?
719 if msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
720 msg.env.CONTENT_TYPE:match("^multipart/form%-data")
721 then
722
723 return mimedecode_message_body( source, msg, filecb )
724
725 -- Is it application/x-www-form-urlencoded ?
726 elseif msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
727 msg.env.CONTENT_TYPE == "application/x-www-form-urlencoded"
728 then
729 return urldecode_message_body( source, msg, filecb )
730
731
732 -- Unhandled encoding
733 -- If a file callback is given then feed it chunk by chunk, else
734 -- store whole buffer in message.content
735 else
736
737 local sink
738
739 -- If we have a file callback then feed it
740 if type(filecb) == "function" then
741 sink = filecb
742
743 -- ... else append to .content
744 else
745 msg.content = ""
746 msg.content_length = 0
747
748 sink = function( chunk )
749 if ( msg.content_length + #chunk ) <= HTTP_MAX_CONTENT then
750
751 msg.content = msg.content .. chunk
752 msg.content_length = msg.content_length + #chunk
753
754 return true
755 else
756 return nil, "POST data exceeds maximum allowed length"
757 end
758 end
759 end
760
761 -- Pump data...
762 while true do
763 local ok, err = ltn12.pump.step( source, sink )
764
765 if not ok and err then
766 return nil, err
767 elseif not err then
768 return true
769 end
770 end
771 end
772 end
773
774
775 -- Push a response to a socket
776 function push_response(request, response, sourceout, sinkout, sinkerr)
777 local code = response.status
778 sinkout(request.env.SERVER_PROTOCOL .. " " .. code .. " " .. statusmsg[code] .. "\r\n")
779
780 -- FIXME: Add support for keep-alive
781 response.headers["Connection"] = "close"
782
783 for k,v in pairs(response.headers) do
784 sinkout(k .. ": " .. v .. "\r\n")
785 end
786
787 sinkout("\r\n")
788
789 if sourceout then
790 ltn12.pump.all(sourceout, sinkout)
791 end
792 end
793
794
795 -- Status codes
796 statusmsg = {
797 [200] = "OK",
798 [400] = "Bad Request",
799 [403] = "Forbidden",
800 [404] = "Not Found",
801 [500] = "Internal Server Error",
802 [503] = "Server Unavailable",
803 }