* luci/libs: http.protocol: switch to blockwise reading in urlencoded post requests...
[project/luci.git] / libs / web / luasrc / http / protocol.lua
1 --[[
2
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 $Id$
13
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 require("luci.util")
19
20
21 HTTP_MAX_CONTENT = 1024^2 -- 1 MB maximum content size
22 HTTP_MAX_READBUF = 1024 -- 1 kB read buffer size
23
24 HTTP_DEFAULT_CTYPE = "text/html" -- default content type
25 HTTP_DEFAULT_VERSION = "1.0" -- HTTP default version
26
27
28 -- Decode an urlencoded string.
29 -- Returns the decoded value.
30 function urldecode( str )
31
32 local function __chrdec( hex )
33 return string.char( tonumber( hex, 16 ) )
34 end
35
36 if type(str) == "string" then
37 str = str:gsub( "+", " " ):gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
38 end
39
40 return str
41 end
42
43
44 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
45 -- Returns a table value with urldecoded values.
46 function urldecode_params( url )
47
48 local params = { }
49
50 if url:find("?") then
51 url = url:gsub( "^.+%?([^?]+)", "%1" )
52 end
53
54 for i, pair in ipairs(luci.util.split( url, "[&;]+", nil, true )) do
55
56 -- find key and value
57 local key = urldecode( pair:match("^([^=]+)") )
58 local val = urldecode( pair:match("^[^=]+=(.+)$") )
59
60 -- store
61 if type(key) == "string" and key:len() > 0 then
62 if type(val) ~= "string" then val = "" end
63
64 if not params[key] then
65 params[key] = val
66 elseif type(params[key]) ~= "table" then
67 params[key] = { params[key], val }
68 else
69 table.insert( params[key], val )
70 end
71 end
72 end
73
74 return params
75 end
76
77
78 -- Encode given string in urlencoded format.
79 -- Returns the encoded string.
80 function urlencode( str )
81
82 local function __chrenc( chr )
83 return string.format(
84 "%%%02x", string.byte( chr )
85 )
86 end
87
88 if type(str) == "string" then
89 str = str:gsub(
90 "([^a-zA-Z0-9$_%-%.+!*'(),])",
91 __chrenc
92 )
93 end
94
95 return str
96 end
97
98
99 -- Encode given table to urlencoded string.
100 -- Returns the encoded string.
101 function urlencode_params( tbl )
102 local enc = ""
103
104 for k, v in pairs(tbl) do
105 enc = enc .. ( enc and "&" or "" ) ..
106 urlencode(k) .. "=" ..
107 urlencode(v)
108 end
109
110 return enc
111 end
112
113
114 -- Decode MIME encoded data.
115 -- Returns a table with decoded values.
116 function mimedecode( data, boundary, filecb )
117
118 local params = { }
119
120 -- create a line reader
121 local reader = _linereader( data, HTTP_MAX_READBUF )
122
123 -- state variables
124 local in_part = false
125 local in_file = false
126 local in_fbeg = false
127 local in_size = true
128
129 local filename
130 local buffer
131 local field
132 local clen = 0
133
134 -- try to read all mime parts
135 for line, eol in reader do
136
137 -- update content length
138 clen = clen + line:len()
139
140 if clen >= HTTP_MAX_CONTENT then
141 in_size = false
142 end
143
144 -- when no boundary is given, try to find it
145 if not boundary then
146 boundary = line:match("^%-%-([^\r\n]+)\r?\n$")
147 end
148
149 -- Got a valid boundary line or reached max allowed size.
150 if ( boundary and line:sub(1,2) == "--" and line:len() > #boundary + 2 and
151 line:sub( 3, 2 + #boundary ) == boundary ) or not in_size
152 then
153 -- Flush the data of the previous mime part.
154 -- When field and/or buffer are set to nil we should discard
155 -- the previous section entirely due to format violations.
156 if type(field) == "string" and field:len() > 0 and
157 type(buffer) == "string"
158 then
159 -- According to the rfc the \r\n preceeding a boundary
160 -- is assumed to be part of the boundary itself.
161 -- Since we are reading line by line here, this crlf
162 -- is part of the last line of our section content,
163 -- so strip it before storing the buffer.
164 buffer = buffer:gsub("\r?\n$","")
165
166 -- If we're in a file part and a file callback has been provided
167 -- then do a final call and send eof.
168 if in_file and type(filecb) == "function" then
169 filecb( field, filename, buffer, true )
170 params[field] = filename
171
172 -- Store buffer.
173 else
174 params[field] = buffer
175 end
176 end
177
178 -- Reset vars
179 buffer = ""
180 filename = nil
181 field = nil
182 in_file = false
183
184 -- Abort here if we reached maximum allowed size
185 if not in_size then break end
186
187 -- Do we got the last boundary?
188 if line:len() > #boundary + 4 and
189 line:sub( #boundary + 2, #boundary + 4 ) == "--"
190 then
191 -- No more processing
192 in_part = false
193
194 -- It's a middle boundary
195 else
196
197 -- Read headers
198 local hlen, headers = extract_headers( reader )
199
200 -- Check for valid headers
201 if headers['Content-Disposition'] then
202
203 -- Got no content type header, assume content-type "text/plain"
204 if not headers['Content-Type'] then
205 headers['Content-Type'] = 'text/plain'
206 end
207
208 -- Find field name
209 local hdrvals = luci.util.split(
210 headers['Content-Disposition'], '; '
211 )
212
213 -- Valid form data part?
214 if hdrvals[1] == "form-data" and hdrvals[2]:match("^name=") then
215
216 -- Store field identifier
217 field = hdrvals[2]:match('^name="(.+)"$')
218
219 -- Do we got a file upload field?
220 if #hdrvals == 3 and hdrvals[3]:match("^filename=") then
221 in_file = true
222 if_fbeg = true
223 filename = hdrvals[3]:match('^filename="(.+)"$')
224 end
225
226 -- Entering next part processing
227 in_part = true
228 end
229 end
230 end
231
232 -- Processing content
233 elseif in_part then
234
235 -- XXX: Would be really good to switch from line based to
236 -- buffered reading here.
237
238
239 -- If we're in a file part and a file callback has been provided
240 -- then call the callback and reset the buffer.
241 if in_file and type(filecb) == "function" then
242
243 -- If we're not processing the first chunk, then call
244 if not in_fbeg then
245 filecb( field, filename, buffer, false )
246 buffer = ""
247
248 -- Clear in_fbeg flag after first run
249 else
250 in_fbeg = false
251 end
252 end
253
254 -- Append date to buffer
255 buffer = buffer .. line
256 end
257 end
258
259 return params
260 end
261
262
263 -- Extract "magic", the first line of a http message.
264 -- Returns the message type ("get", "post" or "response"), the requested uri
265 -- if it is a valid http request or the status code if the line descripes a
266 -- http response. For requests the third parameter is nil, for responses it
267 -- contains the human readable status description.
268 function extract_magic( reader )
269
270 for line in reader do
271 -- Is it a request?
272 local method, uri = line:match("^([A-Z]+) ([^ ]+) HTTP/[01]%.[019]\r?\n$")
273
274 -- Yup, it is
275 if method then
276 return method:lower(), uri, nil
277
278 -- Is it a response?
279 else
280 local code, message = line:match("^HTTP/[01]%.[019] ([0-9]+) ([^\r\n]+)\r?\n$")
281
282 -- Is a response
283 if code then
284 return "response", code + 0, message
285
286 -- Can't handle it
287 else
288 return nil
289 end
290 end
291 end
292 end
293
294
295 -- Extract headers from given string.
296 -- Returns a table of extracted headers and the remainder of the parsed data.
297 function extract_headers( reader, tbl )
298
299 local headers = tbl or { }
300 local count = 0
301
302 -- Iterate line by line
303 for line in reader do
304
305 -- Look for a valid header format
306 local hdr, val = line:match( "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r?\n$" )
307
308 if type(hdr) == "string" and hdr:len() > 0 and
309 type(val) == "string" and val:len() > 0
310 then
311 count = count + line:len()
312 headers[hdr] = val
313
314 elseif line:match("^\r?\n$") then
315
316 return count + line:len(), headers
317
318 else
319 -- junk data, don't add length
320 return count, headers
321 end
322 end
323
324 return count, headers
325 end
326
327
328 -- Parse a http message
329 function parse_message( data, filecb )
330
331 local reader = _linereader( data, HTTP_MAX_READBUF )
332 local message = parse_message_header( reader )
333
334 if message then
335 parse_message_body( reader, message, filecb )
336 end
337
338 return message
339 end
340
341
342 -- Parse a http message header
343 function parse_message_header( data )
344
345 -- Create a line reader
346 local reader = _linereader( data, HTTP_MAX_READBUF )
347 local message = { }
348
349 -- Try to extract magic
350 local method, arg1, arg2 = extract_magic( reader )
351
352 -- Does it looks like a valid message?
353 if method then
354
355 message.request_method = method
356 message.status_code = arg2 and arg1 or 200
357 message.status_message = arg2 or nil
358 message.request_uri = arg2 and nil or arg1
359
360 if method == "response" then
361 message.type = "response"
362 else
363 message.type = "request"
364 end
365
366 -- Parse headers?
367 local hlen, hdrs = extract_headers( reader )
368
369 -- Valid headers?
370 if hlen > 2 and type(hdrs) == "table" then
371
372 message.headers = hdrs
373
374 -- Process get parameters
375 if ( method == "get" or method == "post" ) and
376 message.request_uri:match("?")
377 then
378 message.params = urldecode_params( message.request_uri )
379 else
380 message.params = { }
381 end
382
383 -- Populate common environment variables
384 message.env = {
385 CONTENT_LENGTH = hdrs['Content-Length'];
386 CONTENT_TYPE = hdrs['Content-Type'];
387 REQUEST_METHOD = message.request_method;
388 REQUEST_URI = message.request_uri;
389 SCRIPT_NAME = message.request_uri:gsub("?.+$","");
390 SCRIPT_FILENAME = "" -- XXX implement me
391 }
392
393 -- Populate HTTP_* environment variables
394 for i, hdr in ipairs( {
395 'Accept',
396 'Accept-Charset',
397 'Accept-Encoding',
398 'Accept-Language',
399 'Connection',
400 'Cookie',
401 'Host',
402 'Referer',
403 'User-Agent',
404 } ) do
405 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
406 local val = hdrs[hdr]
407
408 message.env[var] = val
409 end
410
411
412 return message
413 end
414 end
415 end
416
417
418 -- Parse a http message body
419 function parse_message_body( reader, message, filecb )
420
421 if type(message) == "table" then
422 local env = message.env
423
424 local clen = ( env.CONTENT_LENGTH or HTTP_MAX_CONTENT ) + 0
425
426 -- Process post method
427 if env.REQUEST_METHOD:lower() == "post" and env.CONTENT_TYPE then
428
429 -- Is it multipart/form-data ?
430 if env.CONTENT_TYPE:match("^multipart/form%-data") then
431
432 -- Read multipart/mime data
433 for k, v in pairs( mimedecode(
434 reader,
435 env.CONTENT_TYPE:match("boundary=(.+)"),
436 filecb
437 ) ) do
438 message.params[k] = v
439 end
440
441 -- Is it x-www-form-urlencoded?
442 elseif env.CONTENT_TYPE:match('^application/x%-www%-form%-urlencoded') then
443
444 -- Read post data
445 local post_data = ""
446
447 for chunk, eol in reader do
448
449 post_data = post_data .. chunk
450
451 -- Abort on eol or if maximum allowed size or content length is reached
452 if eol or #post_data >= HTTP_MAX_CONTENT or #post_data > clen then
453 break
454 end
455 end
456
457 -- Parse params
458 for k, v in pairs( urldecode_params( post_data ) ) do
459 message.params[k] = v
460 end
461
462 -- Unhandled encoding
463 -- If a file callback is given then feed it line by line, else
464 -- store whole buffer in message.content
465 else
466
467 local len = 0
468
469 for chunk in reader do
470
471 len = len + #chunk
472
473 -- We have a callback, feed it.
474 if type(filecb) == "function" then
475
476 filecb( "_post", nil, chunk, false )
477
478 -- Append to .content buffer.
479 else
480 message.content =
481 type(message.content) == "string"
482 and message.content .. chunk
483 or chunk
484 end
485
486 -- Abort if maximum allowed size or content length is reached
487 if len >= HTTP_MAX_CONTENT or len >= clen then
488 break
489 end
490 end
491
492 -- Send eof to callback
493 if type(filecb) == "function" then
494 filecb( "_post", nil, "", true )
495 end
496 end
497 end
498 end
499 end
500
501
502 -- Wrap given object into a line read iterator
503 function _linereader( obj, bufsz )
504
505 bufsz = ( bufsz and bufsz >= 256 ) and bufsz or 256
506
507 local __read = function() return nil end
508 local __eof = function(x) return type(x) ~= "string" or #x == 0 end
509
510 local _pos = 1
511 local _buf = ""
512 local _eof = nil
513
514 -- object is string
515 if type(obj) == "string" then
516
517 __read = function() return obj:sub( _pos, _pos + bufsz - #_buf - 1 ) end
518
519 -- object implements a receive() or read() function
520 elseif type(obj) == "userdata" and ( type(obj.receive) == "function" or type(obj.read) == "function" ) then
521
522 if type(obj.read) == "function" then
523 __read = function() return obj:read( bufsz ) end
524 else
525 __read = function() return obj:receive( bufsz ) end
526 end
527
528 -- object is a function
529 elseif type(obj) == "function" then
530
531 return obj
532
533 -- no usable data type
534 else
535
536 -- dummy iterator
537 return __read
538 end
539
540
541 -- generic block to line algorithm
542 return function()
543 if not _eof then
544 local buffer = __read()
545
546 if __eof( buffer ) then
547 buffer = ""
548 end
549
550 _pos = _pos + #buffer
551 buffer = _buf .. buffer
552
553 local crlf, endpos = buffer:find("\r?\n")
554
555
556 if crlf then
557 _buf = buffer:sub( endpos + 1, #buffer )
558 return buffer:sub( 1, endpos ), true
559 else
560 -- check for eof
561 _eof = __eof( buffer )
562
563 -- clear overflow buffer
564 _buf = ""
565
566 return buffer, false
567 end
568 else
569 return nil
570 end
571 end
572 end