contrib, build: bundle LuaSrcDiet and make it available in build targets
[project/luci.git] / contrib / luasrcdiet / lua / llex.lua
1 --[[--------------------------------------------------------------------
2
3 llex.lua: Lua 5.1 lexical analyzer in Lua
4 This file is part of LuaSrcDiet, based on Yueliang material.
5
6 Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
7 The COPYRIGHT file describes the conditions
8 under which this software may be distributed.
9
10 See the ChangeLog for more information.
11
12 ----------------------------------------------------------------------]]
13
14 --[[--------------------------------------------------------------------
15 -- NOTES:
16 -- * This is a version of the native 5.1.x lexer from Yueliang 0.4.0,
17 -- with significant modifications to handle LuaSrcDiet's needs:
18 -- (1) llex.error is an optional error function handler
19 -- (2) seminfo for strings include their delimiters and no
20 -- translation operations are performed on them
21 -- * ADDED shbang handling has been added to support executable scripts
22 -- * NO localized decimal point replacement magic
23 -- * NO limit to number of lines
24 -- * NO support for compatible long strings (LUA_COMPAT_LSTR)
25 -- * Please read technotes.txt for more technical details.
26 ----------------------------------------------------------------------]]
27
28 local base = _G
29 local string = require "string"
30 module "llex"
31
32 local find = string.find
33 local match = string.match
34 local sub = string.sub
35
36 ----------------------------------------------------------------------
37 -- initialize keyword list, variables
38 ----------------------------------------------------------------------
39
40 local kw = {}
41 for v in string.gmatch([[
42 and break do else elseif end false for function if in
43 local nil not or repeat return then true until while]], "%S+") do
44 kw[v] = true
45 end
46
47 -- NOTE: see init() for module variables (externally visible):
48 -- tok, seminfo, tokln
49
50 local z, -- source stream
51 sourceid, -- name of source
52 I, -- position of lexer
53 buff, -- buffer for strings
54 ln -- line number
55
56 ----------------------------------------------------------------------
57 -- add information to token listing
58 ----------------------------------------------------------------------
59
60 local function addtoken(token, info)
61 local i = #tok + 1
62 tok[i] = token
63 seminfo[i] = info
64 tokln[i] = ln
65 end
66
67 ----------------------------------------------------------------------
68 -- handles line number incrementation and end-of-line characters
69 ----------------------------------------------------------------------
70
71 local function inclinenumber(i, is_tok)
72 local sub = sub
73 local old = sub(z, i, i)
74 i = i + 1 -- skip '\n' or '\r'
75 local c = sub(z, i, i)
76 if (c == "\n" or c == "\r") and (c ~= old) then
77 i = i + 1 -- skip '\n\r' or '\r\n'
78 old = old..c
79 end
80 if is_tok then addtoken("TK_EOL", old) end
81 ln = ln + 1
82 I = i
83 return i
84 end
85
86 ----------------------------------------------------------------------
87 -- initialize lexer for given source _z and source name _sourceid
88 ----------------------------------------------------------------------
89
90 function init(_z, _sourceid)
91 z = _z -- source
92 sourceid = _sourceid -- name of source
93 I = 1 -- lexer's position in source
94 ln = 1 -- line number
95 tok = {} -- lexed token list*
96 seminfo = {} -- lexed semantic information list*
97 tokln = {} -- line numbers for messages*
98 -- (*) externally visible thru' module
99 --------------------------------------------------------------------
100 -- initial processing (shbang handling)
101 --------------------------------------------------------------------
102 local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)")
103 if p then -- skip first line
104 I = I + #q
105 addtoken("TK_COMMENT", q)
106 if #r > 0 then inclinenumber(I, true) end
107 end
108 end
109
110 ----------------------------------------------------------------------
111 -- returns a chunk name or id, no truncation for long names
112 ----------------------------------------------------------------------
113
114 function chunkid()
115 if sourceid and match(sourceid, "^[=@]") then
116 return sub(sourceid, 2) -- remove first char
117 end
118 return "[string]"
119 end
120
121 ----------------------------------------------------------------------
122 -- formats error message and throws error
123 -- * a simplified version, does not report what token was responsible
124 ----------------------------------------------------------------------
125
126 function errorline(s, line)
127 local e = error or base.error
128 e(string.format("%s:%d: %s", chunkid(), line or ln, s))
129 end
130 local errorline = errorline
131
132 ------------------------------------------------------------------------
133 -- count separators ("=") in a long string delimiter
134 ------------------------------------------------------------------------
135
136 local function skip_sep(i)
137 local sub = sub
138 local s = sub(z, i, i)
139 i = i + 1
140 local count = #match(z, "=*", i) -- note, take the length
141 i = i + count
142 I = i
143 return (sub(z, i, i) == s) and count or (-count) - 1
144 end
145
146 ----------------------------------------------------------------------
147 -- reads a long string or long comment
148 ----------------------------------------------------------------------
149
150 local function read_long_string(is_str, sep)
151 local i = I + 1 -- skip 2nd '['
152 local sub = sub
153 local c = sub(z, i, i)
154 if c == "\r" or c == "\n" then -- string starts with a newline?
155 i = inclinenumber(i) -- skip it
156 end
157 local j = i
158 while true do
159 local p, q, r = find(z, "([\r\n%]])", i) -- (long range)
160 if not p then
161 errorline(is_str and "unfinished long string" or
162 "unfinished long comment")
163 end
164 i = p
165 if r == "]" then -- delimiter test
166 if skip_sep(i) == sep then
167 buff = sub(z, buff, I)
168 I = I + 1 -- skip 2nd ']'
169 return buff
170 end
171 i = I
172 else -- newline
173 buff = buff.."\n"
174 i = inclinenumber(i)
175 end
176 end--while
177 end
178
179 ----------------------------------------------------------------------
180 -- reads a string
181 ----------------------------------------------------------------------
182
183 local function read_string(del)
184 local i = I
185 local find = find
186 local sub = sub
187 while true do
188 local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range)
189 if p then
190 if r == "\n" or r == "\r" then
191 errorline("unfinished string")
192 end
193 i = p
194 if r == "\\" then -- handle escapes
195 i = i + 1
196 r = sub(z, i, i)
197 if r == "" then break end -- (EOZ error)
198 p = find("abfnrtv\n\r", r, 1, true)
199 ------------------------------------------------------
200 if p then -- special escapes
201 if p > 7 then
202 i = inclinenumber(i)
203 else
204 i = i + 1
205 end
206 ------------------------------------------------------
207 elseif find(r, "%D") then -- other non-digits
208 i = i + 1
209 ------------------------------------------------------
210 else -- \xxx sequence
211 local p, q, s = find(z, "^(%d%d?%d?)", i)
212 i = q + 1
213 if s + 1 > 256 then -- UCHAR_MAX
214 errorline("escape sequence too large")
215 end
216 ------------------------------------------------------
217 end--if p
218 else
219 i = i + 1
220 if r == del then -- ending delimiter
221 I = i
222 return sub(z, buff, i - 1) -- return string
223 end
224 end--if r
225 else
226 break -- (error)
227 end--if p
228 end--while
229 errorline("unfinished string")
230 end
231
232 ------------------------------------------------------------------------
233 -- main lexer function
234 ------------------------------------------------------------------------
235
236 function llex()
237 local find = find
238 local match = match
239 while true do--outer
240 local i = I
241 -- inner loop allows break to be used to nicely section tests
242 while true do--inner
243 ----------------------------------------------------------------
244 local p, _, r = find(z, "^([_%a][_%w]*)", i)
245 if p then
246 I = i + #r
247 if kw[r] then
248 addtoken("TK_KEYWORD", r) -- reserved word (keyword)
249 else
250 addtoken("TK_NAME", r) -- identifier
251 end
252 break -- (continue)
253 end
254 ----------------------------------------------------------------
255 local p, _, r = find(z, "^(%.?)%d", i)
256 if p then -- numeral
257 if r == "." then i = i + 1 end
258 local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i)
259 i = q + 1
260 if #r == 1 then -- optional exponent
261 if match(z, "^[%+%-]", i) then -- optional sign
262 i = i + 1
263 end
264 end
265 local _, q = find(z, "^[_%w]*", i)
266 I = q + 1
267 local v = sub(z, p, q) -- string equivalent
268 if not base.tonumber(v) then -- handles hex test also
269 errorline("malformed number")
270 end
271 addtoken("TK_NUMBER", v)
272 break -- (continue)
273 end
274 ----------------------------------------------------------------
275 local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i)
276 if p then
277 if t == "\n" or t == "\r" then -- newline
278 inclinenumber(i, true)
279 else
280 I = q + 1 -- whitespace
281 addtoken("TK_SPACE", r)
282 end
283 break -- (continue)
284 end
285 ----------------------------------------------------------------
286 local r = match(z, "^%p", i)
287 if r then
288 buff = i
289 local p = find("-[\"\'.=<>~", r, 1, true)
290 if p then
291 -- two-level if block for punctuation/symbols
292 --------------------------------------------------------
293 if p <= 2 then
294 if p == 1 then -- minus
295 local c = match(z, "^%-%-(%[?)", i)
296 if c then
297 i = i + 2
298 local sep = -1
299 if c == "[" then
300 sep = skip_sep(i)
301 end
302 if sep >= 0 then -- long comment
303 addtoken("TK_LCOMMENT", read_long_string(false, sep))
304 else -- short comment
305 I = find(z, "[\n\r]", i) or (#z + 1)
306 addtoken("TK_COMMENT", sub(z, buff, I - 1))
307 end
308 break -- (continue)
309 end
310 -- (fall through for "-")
311 else -- [ or long string
312 local sep = skip_sep(i)
313 if sep >= 0 then
314 addtoken("TK_LSTRING", read_long_string(true, sep))
315 elseif sep == -1 then
316 addtoken("TK_OP", "[")
317 else
318 errorline("invalid long string delimiter")
319 end
320 break -- (continue)
321 end
322 --------------------------------------------------------
323 elseif p <= 5 then
324 if p < 5 then -- strings
325 I = i + 1
326 addtoken("TK_STRING", read_string(r))
327 break -- (continue)
328 end
329 r = match(z, "^%.%.?%.?", i) -- .|..|... dots
330 -- (fall through)
331 --------------------------------------------------------
332 else -- relational
333 r = match(z, "^%p=?", i)
334 -- (fall through)
335 end
336 end
337 I = i + #r
338 addtoken("TK_OP", r) -- for other symbols, fall through
339 break -- (continue)
340 end
341 ----------------------------------------------------------------
342 local r = sub(z, i, i)
343 if r ~= "" then
344 I = i + 1
345 addtoken("TK_OP", r) -- other single-char tokens
346 break
347 end
348 addtoken("TK_EOS", "") -- end of stream,
349 return -- exit here
350 ----------------------------------------------------------------
351 end--while inner
352 end--while outer
353 end
354
355 return base.getfenv()