contrib, build: bundle LuaSrcDiet and make it available in build targets
[project/luci.git] / contrib / luasrcdiet / lua / llex.lua
diff --git a/contrib/luasrcdiet/lua/llex.lua b/contrib/luasrcdiet/lua/llex.lua
new file mode 100644 (file)
index 0000000..a637f30
--- /dev/null
@@ -0,0 +1,355 @@
+--[[--------------------------------------------------------------------
+
+  llex.lua: Lua 5.1 lexical analyzer in Lua
+  This file is part of LuaSrcDiet, based on Yueliang material.
+
+  Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
+  The COPYRIGHT file describes the conditions
+  under which this software may be distributed.
+
+  See the ChangeLog for more information.
+
+----------------------------------------------------------------------]]
+
+--[[--------------------------------------------------------------------
+-- NOTES:
+-- * This is a version of the native 5.1.x lexer from Yueliang 0.4.0,
+--   with significant modifications to handle LuaSrcDiet's needs:
+--   (1) llex.error is an optional error function handler
+--   (2) seminfo for strings include their delimiters and no
+--       translation operations are performed on them
+-- * ADDED shbang handling has been added to support executable scripts
+-- * NO localized decimal point replacement magic
+-- * NO limit to number of lines
+-- * NO support for compatible long strings (LUA_COMPAT_LSTR)
+-- * Please read technotes.txt for more technical details.
+----------------------------------------------------------------------]]
+
+local base = _G
+local string = require "string"
+module "llex"
+
+local find = string.find
+local match = string.match
+local sub = string.sub
+
+----------------------------------------------------------------------
+-- initialize keyword list, variables
+----------------------------------------------------------------------
+
+local kw = {}
+for v in string.gmatch([[
+and break do else elseif end false for function if in
+local nil not or repeat return then true until while]], "%S+") do
+  kw[v] = true
+end
+
+-- NOTE: see init() for module variables (externally visible):
+--       tok, seminfo, tokln
+
+local z,                -- source stream
+      sourceid,         -- name of source
+      I,                -- position of lexer
+      buff,             -- buffer for strings
+      ln                -- line number
+
+----------------------------------------------------------------------
+-- add information to token listing
+----------------------------------------------------------------------
+
+local function addtoken(token, info)
+  local i = #tok + 1
+  tok[i] = token
+  seminfo[i] = info
+  tokln[i] = ln
+end
+
+----------------------------------------------------------------------
+-- handles line number incrementation and end-of-line characters
+----------------------------------------------------------------------
+
+local function inclinenumber(i, is_tok)
+  local sub = sub
+  local old = sub(z, i, i)
+  i = i + 1  -- skip '\n' or '\r'
+  local c = sub(z, i, i)
+  if (c == "\n" or c == "\r") and (c ~= old) then
+    i = i + 1  -- skip '\n\r' or '\r\n'
+    old = old..c
+  end
+  if is_tok then addtoken("TK_EOL", old) end
+  ln = ln + 1
+  I = i
+  return i
+end
+
+----------------------------------------------------------------------
+-- initialize lexer for given source _z and source name _sourceid
+----------------------------------------------------------------------
+
+function init(_z, _sourceid)
+  z = _z                        -- source
+  sourceid = _sourceid          -- name of source
+  I = 1                         -- lexer's position in source
+  ln = 1                        -- line number
+  tok = {}                      -- lexed token list*
+  seminfo = {}                  -- lexed semantic information list*
+  tokln = {}                    -- line numbers for messages*
+                                -- (*) externally visible thru' module
+  --------------------------------------------------------------------
+  -- initial processing (shbang handling)
+  --------------------------------------------------------------------
+  local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)")
+  if p then                             -- skip first line
+    I = I + #q
+    addtoken("TK_COMMENT", q)
+    if #r > 0 then inclinenumber(I, true) end
+  end
+end
+
+----------------------------------------------------------------------
+-- returns a chunk name or id, no truncation for long names
+----------------------------------------------------------------------
+
+function chunkid()
+  if sourceid and match(sourceid, "^[=@]") then
+    return sub(sourceid, 2)  -- remove first char
+  end
+  return "[string]"
+end
+
+----------------------------------------------------------------------
+-- formats error message and throws error
+-- * a simplified version, does not report what token was responsible
+----------------------------------------------------------------------
+
+function errorline(s, line)
+  local e = error or base.error
+  e(string.format("%s:%d: %s", chunkid(), line or ln, s))
+end
+local errorline = errorline
+
+------------------------------------------------------------------------
+-- count separators ("=") in a long string delimiter
+------------------------------------------------------------------------
+
+local function skip_sep(i)
+  local sub = sub
+  local s = sub(z, i, i)
+  i = i + 1
+  local count = #match(z, "=*", i)  -- note, take the length
+  i = i + count
+  I = i
+  return (sub(z, i, i) == s) and count or (-count) - 1
+end
+
+----------------------------------------------------------------------
+-- reads a long string or long comment
+----------------------------------------------------------------------
+
+local function read_long_string(is_str, sep)
+  local i = I + 1  -- skip 2nd '['
+  local sub = sub
+  local c = sub(z, i, i)
+  if c == "\r" or c == "\n" then  -- string starts with a newline?
+    i = inclinenumber(i)  -- skip it
+  end
+  local j = i
+  while true do
+    local p, q, r = find(z, "([\r\n%]])", i) -- (long range)
+    if not p then
+      errorline(is_str and "unfinished long string" or
+                "unfinished long comment")
+    end
+    i = p
+    if r == "]" then                    -- delimiter test
+      if skip_sep(i) == sep then
+        buff = sub(z, buff, I)
+        I = I + 1  -- skip 2nd ']'
+        return buff
+      end
+      i = I
+    else                                -- newline
+      buff = buff.."\n"
+      i = inclinenumber(i)
+    end
+  end--while
+end
+
+----------------------------------------------------------------------
+-- reads a string
+----------------------------------------------------------------------
+
+local function read_string(del)
+  local i = I
+  local find = find
+  local sub = sub
+  while true do
+    local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range)
+    if p then
+      if r == "\n" or r == "\r" then
+        errorline("unfinished string")
+      end
+      i = p
+      if r == "\\" then                         -- handle escapes
+        i = i + 1
+        r = sub(z, i, i)
+        if r == "" then break end -- (EOZ error)
+        p = find("abfnrtv\n\r", r, 1, true)
+        ------------------------------------------------------
+        if p then                               -- special escapes
+          if p > 7 then
+            i = inclinenumber(i)
+          else
+            i = i + 1
+          end
+        ------------------------------------------------------
+        elseif find(r, "%D") then               -- other non-digits
+          i = i + 1
+        ------------------------------------------------------
+        else                                    -- \xxx sequence
+          local p, q, s = find(z, "^(%d%d?%d?)", i)
+          i = q + 1
+          if s + 1 > 256 then -- UCHAR_MAX
+            errorline("escape sequence too large")
+          end
+        ------------------------------------------------------
+        end--if p
+      else
+        i = i + 1
+        if r == del then                        -- ending delimiter
+          I = i
+          return sub(z, buff, i - 1)            -- return string
+        end
+      end--if r
+    else
+      break -- (error)
+    end--if p
+  end--while
+  errorline("unfinished string")
+end
+
+------------------------------------------------------------------------
+-- main lexer function
+------------------------------------------------------------------------
+
+function llex()
+  local find = find
+  local match = match
+  while true do--outer
+    local i = I
+    -- inner loop allows break to be used to nicely section tests
+    while true do--inner
+      ----------------------------------------------------------------
+      local p, _, r = find(z, "^([_%a][_%w]*)", i)
+      if p then
+        I = i + #r
+        if kw[r] then
+          addtoken("TK_KEYWORD", r)             -- reserved word (keyword)
+        else
+          addtoken("TK_NAME", r)                -- identifier
+        end
+        break -- (continue)
+      end
+      ----------------------------------------------------------------
+      local p, _, r = find(z, "^(%.?)%d", i)
+      if p then                                 -- numeral
+        if r == "." then i = i + 1 end
+        local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i)
+        i = q + 1
+        if #r == 1 then                         -- optional exponent
+          if match(z, "^[%+%-]", i) then        -- optional sign
+            i = i + 1
+          end
+        end
+        local _, q = find(z, "^[_%w]*", i)
+        I = q + 1
+        local v = sub(z, p, q)                  -- string equivalent
+        if not base.tonumber(v) then            -- handles hex test also
+          errorline("malformed number")
+        end
+        addtoken("TK_NUMBER", v)
+        break -- (continue)
+      end
+      ----------------------------------------------------------------
+      local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i)
+      if p then
+        if t == "\n" or t == "\r" then          -- newline
+          inclinenumber(i, true)
+        else
+          I = q + 1                             -- whitespace
+          addtoken("TK_SPACE", r)
+        end
+        break -- (continue)
+      end
+      ----------------------------------------------------------------
+      local r = match(z, "^%p", i)
+      if r then
+        buff = i
+        local p = find("-[\"\'.=<>~", r, 1, true)
+        if p then
+          -- two-level if block for punctuation/symbols
+          --------------------------------------------------------
+          if p <= 2 then
+            if p == 1 then                      -- minus
+              local c = match(z, "^%-%-(%[?)", i)
+              if c then
+                i = i + 2
+                local sep = -1
+                if c == "[" then
+                  sep = skip_sep(i)
+                end
+                if sep >= 0 then                -- long comment
+                  addtoken("TK_LCOMMENT", read_long_string(false, sep))
+                else                            -- short comment
+                  I = find(z, "[\n\r]", i) or (#z + 1)
+                  addtoken("TK_COMMENT", sub(z, buff, I - 1))
+                end
+                break -- (continue)
+              end
+              -- (fall through for "-")
+            else                                -- [ or long string
+              local sep = skip_sep(i)
+              if sep >= 0 then
+                addtoken("TK_LSTRING", read_long_string(true, sep))
+              elseif sep == -1 then
+                addtoken("TK_OP", "[")
+              else
+                errorline("invalid long string delimiter")
+              end
+              break -- (continue)
+            end
+          --------------------------------------------------------
+          elseif p <= 5 then
+            if p < 5 then                       -- strings
+              I = i + 1
+              addtoken("TK_STRING", read_string(r))
+              break -- (continue)
+            end
+            r = match(z, "^%.%.?%.?", i)        -- .|..|... dots
+            -- (fall through)
+          --------------------------------------------------------
+          else                                  -- relational
+            r = match(z, "^%p=?", i)
+            -- (fall through)
+          end
+        end
+        I = i + #r
+        addtoken("TK_OP", r)  -- for other symbols, fall through
+        break -- (continue)
+      end
+      ----------------------------------------------------------------
+      local r = sub(z, i, i)
+      if r ~= "" then
+        I = i + 1
+        addtoken("TK_OP", r)                    -- other single-char tokens
+        break
+      end
+      addtoken("TK_EOS", "")                    -- end of stream,
+      return                                    -- exit here
+      ----------------------------------------------------------------
+    end--while inner
+  end--while outer
+end
+
+return base.getfenv()