contrib, build: bundle LuaSrcDiet and make it available in build targets
[project/luci.git] / contrib / luasrcdiet / lua / optlex.lua
1 --[[--------------------------------------------------------------------
2
3 optlex.lua: does lexer-based optimizations
4 This file is part of LuaSrcDiet.
5
6 Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
7 The COPYRIGHT file describes the conditions
8 under which this software may be distributed.
9
10 See the ChangeLog for more information.
11
12 ----------------------------------------------------------------------]]
13
14 --[[--------------------------------------------------------------------
15 -- NOTES:
16 -- * For more lexer-based optimization ideas, see the TODO items or
17 -- look at technotes.txt.
18 -- * TODO: general string delimiter conversion optimizer
19 -- * TODO: (numbers) warn if overly significant digit
20 ----------------------------------------------------------------------]]
21
22 local base = _G
23 local string = require "string"
24 module "optlex"
25 local match = string.match
26 local sub = string.sub
27 local find = string.find
28 local rep = string.rep
29 local print
30
31 ------------------------------------------------------------------------
32 -- variables and data structures
33 ------------------------------------------------------------------------
34
35 -- error function, can override by setting own function into module
36 error = base.error
37
38 warn = {} -- table for warning flags
39
40 local stoks, sinfos, stoklns -- source lists
41
42 local is_realtoken = { -- significant (grammar) tokens
43 TK_KEYWORD = true,
44 TK_NAME = true,
45 TK_NUMBER = true,
46 TK_STRING = true,
47 TK_LSTRING = true,
48 TK_OP = true,
49 TK_EOS = true,
50 }
51 local is_faketoken = { -- whitespace (non-grammar) tokens
52 TK_COMMENT = true,
53 TK_LCOMMENT = true,
54 TK_EOL = true,
55 TK_SPACE = true,
56 }
57
58 local opt_details -- for extra information
59
60 ------------------------------------------------------------------------
61 -- true if current token is at the start of a line
62 -- * skips over deleted tokens via recursion
63 ------------------------------------------------------------------------
64
65 local function atlinestart(i)
66 local tok = stoks[i - 1]
67 if i <= 1 or tok == "TK_EOL" then
68 return true
69 elseif tok == "" then
70 return atlinestart(i - 1)
71 end
72 return false
73 end
74
75 ------------------------------------------------------------------------
76 -- true if current token is at the end of a line
77 -- * skips over deleted tokens via recursion
78 ------------------------------------------------------------------------
79
80 local function atlineend(i)
81 local tok = stoks[i + 1]
82 if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then
83 return true
84 elseif tok == "" then
85 return atlineend(i + 1)
86 end
87 return false
88 end
89
90 ------------------------------------------------------------------------
91 -- counts comment EOLs inside a long comment
92 -- * in order to keep line numbering, EOLs need to be reinserted
93 ------------------------------------------------------------------------
94
95 local function commenteols(lcomment)
96 local sep = #match(lcomment, "^%-%-%[=*%[")
97 local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims
98 local i, c = 1, 0
99 while true do
100 local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
101 if not p then break end -- if no matches, done
102 i = p + 1
103 c = c + 1
104 if #s > 0 and r ~= s then -- skip CRLF or LFCR
105 i = i + 1
106 end
107 end
108 return c
109 end
110
111 ------------------------------------------------------------------------
112 -- compares two tokens (i, j) and returns the whitespace required
113 -- * important! see technotes.txt for more information
114 -- * only two grammar/real tokens are being considered
115 -- * if "", no separation is needed
116 -- * if " ", then at least one whitespace (or EOL) is required
117 ------------------------------------------------------------------------
118
119 local function checkpair(i, j)
120 local match = match
121 local t1, t2 = stoks[i], stoks[j]
122 --------------------------------------------------------------------
123 if t1 == "TK_STRING" or t1 == "TK_LSTRING" or
124 t2 == "TK_STRING" or t2 == "TK_LSTRING" then
125 return ""
126 --------------------------------------------------------------------
127 elseif t1 == "TK_OP" or t2 == "TK_OP" then
128 if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or
129 (t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then
130 return ""
131 end
132 if t1 == "TK_OP" and t2 == "TK_OP" then
133 -- for TK_OP/TK_OP pairs, see notes in technotes.txt
134 local op, op2 = sinfos[i], sinfos[j]
135 if (match(op, "^%.%.?$") and match(op2, "^%.")) or
136 (match(op, "^[~=<>]$") and op2 == "=") or
137 (op == "[" and (op2 == "[" or op2 == "=")) then
138 return " "
139 end
140 return ""
141 end
142 -- "TK_OP" + "TK_NUMBER" case
143 local op = sinfos[i]
144 if t2 == "TK_OP" then op = sinfos[j] end
145 if match(op, "^%.%.?%.?$") then
146 return " "
147 end
148 return ""
149 --------------------------------------------------------------------
150 else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then
151 return " "
152 --------------------------------------------------------------------
153 end
154 end
155
156 ------------------------------------------------------------------------
157 -- repack tokens, removing deletions caused by optimization process
158 ------------------------------------------------------------------------
159
160 local function repack_tokens()
161 local dtoks, dinfos, dtoklns = {}, {}, {}
162 local j = 1
163 for i = 1, #stoks do
164 local tok = stoks[i]
165 if tok ~= "" then
166 dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i]
167 j = j + 1
168 end
169 end
170 stoks, sinfos, stoklns = dtoks, dinfos, dtoklns
171 end
172
173 ------------------------------------------------------------------------
174 -- number optimization
175 -- * optimization using string formatting functions is one way of doing
176 -- this, but here, we consider all cases and handle them separately
177 -- (possibly an idiotic approach...)
178 -- * scientific notation being generated is not in canonical form, this
179 -- may or may not be a bad thing, feedback welcome
180 -- * note: intermediate portions need to fit into a normal number range
181 -- * optimizations can be divided based on number patterns:
182 -- * hexadecimal:
183 -- (1) no need to remove leading zeros, just skip to (2)
184 -- (2) convert to integer if size equal or smaller
185 -- * change if equal size -> lose the 'x' to reduce entropy
186 -- (3) number is then processed as an integer
187 -- (4) note: does not make 0[xX] consistent
188 -- * integer:
189 -- (1) note: includes anything with trailing ".", ".0", ...
190 -- (2) remove useless fractional part, if present, e.g. 123.000
191 -- (3) remove leading zeros, e.g. 000123
192 -- (4) switch to scientific if shorter, e.g. 123000 -> 123e3
193 -- * with fraction:
194 -- (1) split into digits dot digits
195 -- (2) if no integer portion, take as zero (can omit later)
196 -- (3) handle degenerate .000 case, after which the fractional part
197 -- must be non-zero (if zero, it's matched as an integer)
198 -- (4) remove trailing zeros for fractional portion
199 -- (5) p.q where p > 0 and q > 0 cannot be shortened any more
200 -- (6) otherwise p == 0 and the form is .q, e.g. .000123
201 -- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6
202 -- * scientific:
203 -- (1) split into (digits dot digits) [eE] ([+-] digits)
204 -- (2) if significand has ".", shift it out so it becomes an integer
205 -- (3) if significand is zero, just use zero
206 -- (4) remove leading zeros for significand
207 -- (5) shift out trailing zeros for significand
208 -- (6) examine exponent and determine which format is best:
209 -- integer, with fraction, scientific
210 ------------------------------------------------------------------------
211
212 local function do_number(i)
213 local before = sinfos[i] -- 'before'
214 local z = before -- working representation
215 local y -- 'after', if better
216 --------------------------------------------------------------------
217 if match(z, "^0[xX]") then -- hexadecimal number
218 local v = base.tostring(base.tonumber(z))
219 if #v <= #z then
220 z = v -- change to integer, AND continue
221 else
222 return -- no change; stick to hex
223 end
224 end
225 --------------------------------------------------------------------
226 if match(z, "^%d+%.?0*$") then -- integer or has useless frac
227 z = match(z, "^(%d+)%.?0*$") -- int portion only
228 if z + 0 > 0 then
229 z = match(z, "^0*([1-9]%d*)$") -- remove leading zeros
230 local v = #match(z, "0*$")
231 local nv = base.tostring(v)
232 if v > #nv + 1 then -- scientific is shorter
233 z = sub(z, 1, #z - v).."e"..nv
234 end
235 y = z
236 else
237 y = "0" -- basic zero
238 end
239 --------------------------------------------------------------------
240 elseif not match(z, "[eE]") then -- number with fraction part
241 local p, q = match(z, "^(%d*)%.(%d+)$") -- split
242 if p == "" then p = 0 end -- int part zero
243 if q + 0 == 0 and p == 0 then
244 y = "0" -- degenerate .000 case
245 else
246 -- now, q > 0 holds and p is a number
247 local v = #match(q, "0*$") -- remove trailing zeros
248 if v > 0 then
249 q = sub(q, 1, #q - v)
250 end
251 -- if p > 0, nothing else we can do to simplify p.q case
252 if p + 0 > 0 then
253 y = p.."."..q
254 else
255 y = "."..q -- tentative, e.g. .000123
256 local v = #match(q, "^0*") -- # leading spaces
257 local w = #q - v -- # significant digits
258 local nv = base.tostring(#q)
259 -- e.g. compare 123e-6 versus .000123
260 if w + 2 + #nv < 1 + #q then
261 y = sub(q, -w).."e-"..nv
262 end
263 end
264 end
265 --------------------------------------------------------------------
266 else -- scientific number
267 local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$")
268 ex = base.tonumber(ex)
269 -- if got ".", shift out fractional portion of significand
270 local p, q = match(sig, "^(%d*)%.(%d*)$")
271 if p then
272 ex = ex - #q
273 sig = p..q
274 end
275 if sig + 0 == 0 then
276 y = "0" -- basic zero
277 else
278 local v = #match(sig, "^0*") -- remove leading zeros
279 sig = sub(sig, v + 1)
280 v = #match(sig, "0*$") -- shift out trailing zeros
281 if v > 0 then
282 sig = sub(sig, 1, #sig - v)
283 ex = ex + v
284 end
285 -- examine exponent and determine which format is best
286 local nex = base.tostring(ex)
287 if ex == 0 then -- it's just an integer
288 y = sig
289 elseif ex > 0 and (ex <= 1 + #nex) then -- a number
290 y = sig..rep("0", ex)
291 elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123
292 v = #sig + ex
293 y = sub(sig, 1, v).."."..sub(sig, v + 1)
294 elseif ex < 0 and (#nex >= -ex - #sig) then
295 -- e.g. compare 1234e-5 versus .01234
296 -- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig
297 -- -> #nex >= -ex - #sig
298 v = -ex - #sig
299 y = "."..rep("0", v)..sig
300 else -- non-canonical scientific representation
301 y = sig.."e"..ex
302 end
303 end--if sig
304 end
305 --------------------------------------------------------------------
306 if y and y ~= sinfos[i] then
307 if opt_details then
308 print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y)
309 opt_details = opt_details + 1
310 end
311 sinfos[i] = y
312 end
313 end
314
315 ------------------------------------------------------------------------
316 -- string optimization
317 -- * note: works on well-formed strings only!
318 -- * optimizations on characters can be summarized as follows:
319 -- \a\b\f\n\r\t\v -- no change
320 -- \\ -- no change
321 -- \"\' -- depends on delim, other can remove \
322 -- \[\] -- remove \
323 -- \<char> -- general escape, remove \
324 -- \<eol> -- normalize the EOL only
325 -- \ddd -- if \a\b\f\n\r\t\v, change to latter
326 -- if other < ascii 32, keep ddd but zap leading zeros
327 -- if >= ascii 32, translate it into the literal, then also
328 -- do escapes for \\,\",\' cases
329 -- <other> -- no change
330 -- * switch delimiters if string becomes shorter
331 ------------------------------------------------------------------------
332
333 local function do_string(I)
334 local info = sinfos[I]
335 local delim = sub(info, 1, 1) -- delimiter used
336 local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> '
337 local z = sub(info, 2, -2) -- actual string
338 local i = 1
339 local c_delim, c_ndelim = 0, 0 -- "/' counts
340 --------------------------------------------------------------------
341 while i <= #z do
342 local c = sub(z, i, i)
343 ----------------------------------------------------------------
344 if c == "\\" then -- escaped stuff
345 local j = i + 1
346 local d = sub(z, j, j)
347 local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true)
348 ------------------------------------------------------------
349 if not p then -- \<char> -- remove \
350 z = sub(z, 1, i - 1)..sub(z, j)
351 i = i + 1
352 ------------------------------------------------------------
353 elseif p <= 8 then -- \a\b\f\n\r\t\v\\
354 i = i + 2 -- no change
355 ------------------------------------------------------------
356 elseif p <= 10 then -- \<eol> -- normalize EOL
357 local eol = sub(z, j, j + 1)
358 if eol == "\r\n" or eol == "\n\r" then
359 z = sub(z, 1, i).."\n"..sub(z, j + 2)
360 elseif p == 10 then -- \r case
361 z = sub(z, 1, i).."\n"..sub(z, j + 1)
362 end
363 i = i + 2
364 ------------------------------------------------------------
365 elseif p <= 12 then -- \"\' -- remove \ for ndelim
366 if d == delim then
367 c_delim = c_delim + 1
368 i = i + 2
369 else
370 c_ndelim = c_ndelim + 1
371 z = sub(z, 1, i - 1)..sub(z, j)
372 i = i + 1
373 end
374 ------------------------------------------------------------
375 else -- \ddd -- various steps
376 local s = match(z, "^(%d%d?%d?)", j)
377 j = i + 1 + #s -- skip to location
378 local cv = s + 0
379 local cc = string.char(cv)
380 local p = find("\a\b\f\n\r\t\v", cc, 1, true)
381 if p then -- special escapes
382 s = "\\"..sub("abfnrtv", p, p)
383 elseif cv < 32 then -- normalized \ddd
384 s = "\\"..cv
385 elseif cc == delim then -- \<delim>
386 s = "\\"..cc
387 c_delim = c_delim + 1
388 elseif cc == "\\" then -- \\
389 s = "\\\\"
390 else -- literal character
391 s = cc
392 if cc == ndelim then
393 c_ndelim = c_ndelim + 1
394 end
395 end
396 z = sub(z, 1, i - 1)..s..sub(z, j)
397 i = i + #s
398 ------------------------------------------------------------
399 end--if p
400 ----------------------------------------------------------------
401 else-- c ~= "\\" -- <other> -- no change
402 i = i + 1
403 if c == ndelim then -- count ndelim, for switching delimiters
404 c_ndelim = c_ndelim + 1
405 end
406 ----------------------------------------------------------------
407 end--if c
408 end--while
409 --------------------------------------------------------------------
410 -- switching delimiters, a long-winded derivation:
411 -- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes
412 -- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes
413 -- simplifying the condition (1)>(2) --> c_delim > c_ndelim
414 if c_delim > c_ndelim then
415 i = 1
416 while i <= #z do
417 local p, q, r = find(z, "([\'\"])", i)
418 if not p then break end
419 if r == delim then -- \<delim> -> <delim>
420 z = sub(z, 1, p - 2)..sub(z, p)
421 i = p
422 else-- r == ndelim -- <ndelim> -> \<ndelim>
423 z = sub(z, 1, p - 1).."\\"..sub(z, p)
424 i = p + 2
425 end
426 end--while
427 delim = ndelim -- actually change delimiters
428 end
429 --------------------------------------------------------------------
430 z = delim..z..delim
431 if z ~= sinfos[I] then
432 if opt_details then
433 print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z)
434 opt_details = opt_details + 1
435 end
436 sinfos[I] = z
437 end
438 end
439
440 ------------------------------------------------------------------------
441 -- long string optimization
442 -- * note: warning flagged if trailing whitespace found, not trimmed
443 -- * remove first optional newline
444 -- * normalize embedded newlines
445 -- * reduce '=' separators in delimiters if possible
446 ------------------------------------------------------------------------
447
448 local function do_lstring(I)
449 local info = sinfos[I]
450 local delim1 = match(info, "^%[=*%[") -- cut out delimiters
451 local sep = #delim1
452 local delim2 = sub(info, -sep, -1)
453 local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims
454 local y = ""
455 local i = 1
456 --------------------------------------------------------------------
457 while true do
458 local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
459 -- deal with a single line
460 local ln
461 if not p then
462 ln = sub(z, i)
463 elseif p >= i then
464 ln = sub(z, i, p - 1)
465 end
466 if ln ~= "" then
467 -- flag a warning if there are trailing spaces, won't optimize!
468 if match(ln, "%s+$") then
469 warn.lstring = "trailing whitespace in long string near line "..stoklns[I]
470 end
471 y = y..ln
472 end
473 if not p then -- done if no more EOLs
474 break
475 end
476 -- deal with line endings, normalize them
477 i = p + 1
478 if p then
479 if #s > 0 and r ~= s then -- skip CRLF or LFCR
480 i = i + 1
481 end
482 -- skip first newline, which can be safely deleted
483 if not(i == 1 and i == p) then
484 y = y.."\n"
485 end
486 end
487 end--while
488 --------------------------------------------------------------------
489 -- handle possible deletion of one or more '=' separators
490 if sep >= 3 then
491 local chk, okay = sep - 1
492 -- loop to test ending delimiter with less of '=' down to zero
493 while chk >= 2 do
494 local delim = "%]"..rep("=", chk - 2).."%]"
495 if not match(y, delim) then okay = chk end
496 chk = chk - 1
497 end
498 if okay then -- change delimiters
499 sep = rep("=", okay - 2)
500 delim1, delim2 = "["..sep.."[", "]"..sep.."]"
501 end
502 end
503 --------------------------------------------------------------------
504 sinfos[I] = delim1..y..delim2
505 end
506
507 ------------------------------------------------------------------------
508 -- long comment optimization
509 -- * note: does not remove first optional newline
510 -- * trim trailing whitespace
511 -- * normalize embedded newlines
512 -- * reduce '=' separators in delimiters if possible
513 ------------------------------------------------------------------------
514
515 local function do_lcomment(I)
516 local info = sinfos[I]
517 local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
518 local sep = #delim1
519 local delim2 = sub(info, -sep, -1)
520 local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
521 local y = ""
522 local i = 1
523 --------------------------------------------------------------------
524 while true do
525 local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
526 -- deal with a single line, extract and check trailing whitespace
527 local ln
528 if not p then
529 ln = sub(z, i)
530 elseif p >= i then
531 ln = sub(z, i, p - 1)
532 end
533 if ln ~= "" then
534 -- trim trailing whitespace if non-empty line
535 local ws = match(ln, "%s*$")
536 if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end
537 y = y..ln
538 end
539 if not p then -- done if no more EOLs
540 break
541 end
542 -- deal with line endings, normalize them
543 i = p + 1
544 if p then
545 if #s > 0 and r ~= s then -- skip CRLF or LFCR
546 i = i + 1
547 end
548 y = y.."\n"
549 end
550 end--while
551 --------------------------------------------------------------------
552 -- handle possible deletion of one or more '=' separators
553 sep = sep - 2
554 if sep >= 3 then
555 local chk, okay = sep - 1
556 -- loop to test ending delimiter with less of '=' down to zero
557 while chk >= 2 do
558 local delim = "%]"..rep("=", chk - 2).."%]"
559 if not match(y, delim) then okay = chk end
560 chk = chk - 1
561 end
562 if okay then -- change delimiters
563 sep = rep("=", okay - 2)
564 delim1, delim2 = "--["..sep.."[", "]"..sep.."]"
565 end
566 end
567 --------------------------------------------------------------------
568 sinfos[I] = delim1..y..delim2
569 end
570
571 ------------------------------------------------------------------------
572 -- short comment optimization
573 -- * trim trailing whitespace
574 ------------------------------------------------------------------------
575
576 local function do_comment(i)
577 local info = sinfos[i]
578 local ws = match(info, "%s*$") -- just look from end of string
579 if #ws > 0 then
580 info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace
581 end
582 sinfos[i] = info
583 end
584
585 ------------------------------------------------------------------------
586 -- returns true if string found in long comment
587 -- * this is a feature to keep copyright or license texts
588 ------------------------------------------------------------------------
589
590 local function keep_lcomment(opt_keep, info)
591 if not opt_keep then return false end -- option not set
592 local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
593 local sep = #delim1
594 local delim2 = sub(info, -sep, -1)
595 local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
596 if find(z, opt_keep, 1, true) then -- try to match
597 return true
598 end
599 end
600
601 ------------------------------------------------------------------------
602 -- main entry point
603 -- * currently, lexer processing has 2 passes
604 -- * processing is done on a line-oriented basis, which is easier to
605 -- grok due to the next point...
606 -- * since there are various options that can be enabled or disabled,
607 -- processing is a little messy or convoluted
608 ------------------------------------------------------------------------
609
610 function optimize(option, toklist, semlist, toklnlist)
611 --------------------------------------------------------------------
612 -- set option flags
613 --------------------------------------------------------------------
614 local opt_comments = option["opt-comments"]
615 local opt_whitespace = option["opt-whitespace"]
616 local opt_emptylines = option["opt-emptylines"]
617 local opt_eols = option["opt-eols"]
618 local opt_strings = option["opt-strings"]
619 local opt_numbers = option["opt-numbers"]
620 local opt_keep = option.KEEP
621 opt_details = option.DETAILS and 0 -- upvalues for details display
622 print = print or base.print
623 if opt_eols then -- forced settings, otherwise won't work properly
624 opt_comments = true
625 opt_whitespace = true
626 opt_emptylines = true
627 end
628 --------------------------------------------------------------------
629 -- variable initialization
630 --------------------------------------------------------------------
631 stoks, sinfos, stoklns -- set source lists
632 = toklist, semlist, toklnlist
633 local i = 1 -- token position
634 local tok, info -- current token
635 local prev -- position of last grammar token
636 -- on same line (for TK_SPACE stuff)
637 --------------------------------------------------------------------
638 -- changes a token, info pair
639 --------------------------------------------------------------------
640 local function settoken(tok, info, I)
641 I = I or i
642 stoks[I] = tok or ""
643 sinfos[I] = info or ""
644 end
645 --------------------------------------------------------------------
646 -- processing loop (PASS 1)
647 --------------------------------------------------------------------
648 while true do
649 tok, info = stoks[i], sinfos[i]
650 ----------------------------------------------------------------
651 local atstart = atlinestart(i) -- set line begin flag
652 if atstart then prev = nil end
653 ----------------------------------------------------------------
654 if tok == "TK_EOS" then -- end of stream/pass
655 break
656 ----------------------------------------------------------------
657 elseif tok == "TK_KEYWORD" or -- keywords, identifiers,
658 tok == "TK_NAME" or -- operators
659 tok == "TK_OP" then
660 -- TK_KEYWORD and TK_OP can't be optimized without a big
661 -- optimization framework; it would be more of an optimizing
662 -- compiler, not a source code compressor
663 -- TK_NAME that are locals needs parser to analyze/optimize
664 prev = i
665 ----------------------------------------------------------------
666 elseif tok == "TK_NUMBER" then -- numbers
667 if opt_numbers then
668 do_number(i) -- optimize
669 end
670 prev = i
671 ----------------------------------------------------------------
672 elseif tok == "TK_STRING" or -- strings, long strings
673 tok == "TK_LSTRING" then
674 if opt_strings then
675 if tok == "TK_STRING" then
676 do_string(i) -- optimize
677 else
678 do_lstring(i) -- optimize
679 end
680 end
681 prev = i
682 ----------------------------------------------------------------
683 elseif tok == "TK_COMMENT" then -- short comments
684 if opt_comments then
685 if i == 1 and sub(info, 1, 1) == "#" then
686 -- keep shbang comment, trim whitespace
687 do_comment(i)
688 else
689 -- safe to delete, as a TK_EOL (or TK_EOS) always follows
690 settoken() -- remove entirely
691 end
692 elseif opt_whitespace then -- trim whitespace only
693 do_comment(i)
694 end
695 ----------------------------------------------------------------
696 elseif tok == "TK_LCOMMENT" then -- long comments
697 if keep_lcomment(opt_keep, info) then
698 ------------------------------------------------------------
699 -- if --keep, we keep a long comment if <msg> is found;
700 -- this is a feature to keep copyright or license texts
701 if opt_whitespace then -- trim whitespace only
702 do_lcomment(i)
703 end
704 prev = i
705 elseif opt_comments then
706 local eols = commenteols(info)
707 ------------------------------------------------------------
708 -- prepare opt_emptylines case first, if a disposable token
709 -- follows, current one is safe to dump, else keep a space;
710 -- it is implied that the operation is safe for '-', because
711 -- current is a TK_LCOMMENT, and must be separate from a '-'
712 if is_faketoken[stoks[i + 1]] then
713 settoken() -- remove entirely
714 tok = ""
715 else
716 settoken("TK_SPACE", " ")
717 end
718 ------------------------------------------------------------
719 -- if there are embedded EOLs to keep and opt_emptylines is
720 -- disabled, then switch the token into one or more EOLs
721 if not opt_emptylines and eols > 0 then
722 settoken("TK_EOL", rep("\n", eols))
723 end
724 ------------------------------------------------------------
725 -- if optimizing whitespaces, force reinterpretation of the
726 -- token to give a chance for the space to be optimized away
727 if opt_whitespace and tok ~= "" then
728 i = i - 1 -- to reinterpret
729 end
730 ------------------------------------------------------------
731 else -- disabled case
732 if opt_whitespace then -- trim whitespace only
733 do_lcomment(i)
734 end
735 prev = i
736 end
737 ----------------------------------------------------------------
738 elseif tok == "TK_EOL" then -- line endings
739 if atstart and opt_emptylines then
740 settoken() -- remove entirely
741 elseif info == "\r\n" or info == "\n\r" then
742 -- normalize the rest of the EOLs for CRLF/LFCR only
743 -- (note that TK_LCOMMENT can change into several EOLs)
744 settoken("TK_EOL", "\n")
745 end
746 ----------------------------------------------------------------
747 elseif tok == "TK_SPACE" then -- whitespace
748 if opt_whitespace then
749 if atstart or atlineend(i) then
750 -- delete leading and trailing whitespace
751 settoken() -- remove entirely
752 else
753 ------------------------------------------------------------
754 -- at this point, since leading whitespace have been removed,
755 -- there should be a either a real token or a TK_LCOMMENT
756 -- prior to hitting this whitespace; the TK_LCOMMENT case
757 -- only happens if opt_comments is disabled; so prev ~= nil
758 local ptok = stoks[prev]
759 if ptok == "TK_LCOMMENT" then
760 -- previous TK_LCOMMENT can abut with anything
761 settoken() -- remove entirely
762 else
763 -- prev must be a grammar token; consecutive TK_SPACE
764 -- tokens is impossible when optimizing whitespace
765 local ntok = stoks[i + 1]
766 if is_faketoken[ntok] then
767 -- handle special case where a '-' cannot abut with
768 -- either a short comment or a long comment
769 if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and
770 ptok == "TK_OP" and sinfos[prev] == "-" then
771 -- keep token
772 else
773 settoken() -- remove entirely
774 end
775 else--is_realtoken
776 -- check a pair of grammar tokens, if can abut, then
777 -- delete space token entirely, otherwise keep one space
778 local s = checkpair(prev, i + 1)
779 if s == "" then
780 settoken() -- remove entirely
781 else
782 settoken("TK_SPACE", " ")
783 end
784 end
785 end
786 ------------------------------------------------------------
787 end
788 end
789 ----------------------------------------------------------------
790 else
791 error("unidentified token encountered")
792 end
793 ----------------------------------------------------------------
794 i = i + 1
795 end--while
796 repack_tokens()
797 --------------------------------------------------------------------
798 -- processing loop (PASS 2)
799 --------------------------------------------------------------------
800 if opt_eols then
801 i = 1
802 -- aggressive EOL removal only works with most non-grammar tokens
803 -- optimized away because it is a rather simple scheme -- basically
804 -- it just checks 'real' token pairs around EOLs
805 if stoks[1] == "TK_COMMENT" then
806 -- first comment still existing must be shbang, skip whole line
807 i = 3
808 end
809 while true do
810 tok, info = stoks[i], sinfos[i]
811 --------------------------------------------------------------
812 if tok == "TK_EOS" then -- end of stream/pass
813 break
814 --------------------------------------------------------------
815 elseif tok == "TK_EOL" then -- consider each TK_EOL
816 local t1, t2 = stoks[i - 1], stoks[i + 1]
817 if is_realtoken[t1] and is_realtoken[t2] then -- sanity check
818 local s = checkpair(i - 1, i + 1)
819 if s == "" then
820 settoken() -- remove entirely
821 end
822 end
823 end--if tok
824 --------------------------------------------------------------
825 i = i + 1
826 end--while
827 repack_tokens()
828 end
829 --------------------------------------------------------------------
830 if opt_details and opt_details > 0 then print() end -- spacing
831 return stoks, sinfos, stoklns
832 end