libs/web: prepare template parser, dispatcher and i18n class for upcoming po format...
[project/luci.git] / libs / web / src / template_parser.c
1 /*
2 * LuCI Template - Parser implementation
3 *
4 * Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 #include "template_parser.h"
20
21
22 /* leading and trailing code for different types */
23 const char * gen_code[7][2] = {
24 { "write(\"", "\")" },
25 { NULL, NULL },
26 { "write(tostring(", " or \"\"))" },
27 { "include(\"", "\")" },
28 { "write(pcdata(translate(\"", "\")))" },
29 { "write(translate(\"", "\"))" },
30 { NULL, " " }
31 };
32
33 /* Simple strstr() like function that takes len arguments for both haystack and needle. */
34 static char *strfind(char *haystack, int hslen, const char *needle, int ndlen)
35 {
36 int match = 0;
37 int i, j;
38
39 for( i = 0; i < hslen; i++ )
40 {
41 if( haystack[i] == needle[0] )
42 {
43 match = ((ndlen == 1) || ((i + ndlen) <= hslen));
44
45 for( j = 1; (j < ndlen) && ((i + j) < hslen); j++ )
46 {
47 if( haystack[i+j] != needle[j] )
48 {
49 match = 0;
50 break;
51 }
52 }
53
54 if( match )
55 return &haystack[i];
56 }
57 }
58
59 return NULL;
60 }
61
62 /*
63 * Inspect current read buffer and find the number of "vague" characters at the end
64 * which could indicate an opening token. Returns the number of "vague" chars.
65 * The last continuous sequence of whitespace, optionally followed by a "<" is
66 * treated as "vague" because whitespace may be discarded if the upcoming opening
67 * token indicates pre-whitespace-removal ("<%-"). A single remaining "<" char
68 * can't be differentiated from an opening token ("<%"), so it's kept to be processed
69 * in the next cycle.
70 */
71 static int stokscan(struct template_parser *data, int off, int no_whitespace)
72 {
73 int i;
74 int skip = 0;
75 int tokoff = data->bufsize - 1;
76
77 for( i = tokoff; i >= off; i-- )
78 {
79 if( data->buf[i] == T_TOK_START[0] )
80 {
81 skip = tokoff - i + 1;
82 tokoff = i - 1;
83 break;
84 }
85 }
86
87 if( !no_whitespace )
88 {
89 for( i = tokoff; i >= off; i-- )
90 {
91 if( isspace(data->buf[i]) )
92 skip++;
93 else
94 break;
95 }
96 }
97
98 return skip;
99 }
100
101 /*
102 * Similar to stokscan() but looking for closing token indicators.
103 * Matches "-", optionally followed by a "%" char.
104 */
105 static int etokscan(struct template_parser *data)
106 {
107 int skip = 0;
108
109 if( (data->bufsize > 0) && (data->buf[data->bufsize-1] == T_TOK_END[0]) )
110 skip++;
111
112 if( (data->bufsize > skip) && (data->buf[data->bufsize-skip-1] == T_TOK_SKIPWS[0]) )
113 skip++;
114
115 return skip;
116 }
117
118 /*
119 * Generate Lua expressions from the given raw code, write it into the
120 * output buffer and set the lua_Reader specific size pointer.
121 * Takes parser-state, lua_Reader's size pointer and generator flags
122 * as parameter. The given flags indicate whether leading or trailing
123 * code should be added. Returns a pointer to the output buffer.
124 */
125 static const char * generate_expression(struct template_parser *data, size_t *sz, int what)
126 {
127 char tmp[T_OUTBUFSZ];
128 int i;
129 int size = 0;
130 int start = 0;
131 int whitespace = 0;
132
133 memset(tmp, 0, T_OUTBUFSZ);
134
135 /* Inject leading expression code (if any) */
136 if( (what & T_GEN_START) && (gen_code[data->type][0] != NULL) )
137 {
138 memcpy(tmp, gen_code[data->type][0], strlen(gen_code[data->type][0]));
139 size += strlen(gen_code[data->type][0]);
140 }
141
142 /* Parse source buffer */
143 for( i = 0; i < data->outsize; i++ )
144 {
145 /* Skip leading whitespace for non-raw and non-expr chunks */
146 if( !start && isspace(data->out[i]) && (data->type == T_TYPE_I18N ||
147 data->type == T_TYPE_I18N_RAW || data->type == T_TYPE_INCLUDE) )
148 continue;
149 else if( !start )
150 start = 1;
151
152 /* Found whitespace after i18n key */
153 if( data->type == T_TYPE_I18N || data->type == T_TYPE_I18N_RAW )
154 {
155 /* Is initial whitespace, insert space */
156 if( !whitespace && isspace(data->out[i]) )
157 {
158 tmp[size++] = ' ';
159 whitespace = 1;
160 }
161
162 /* Suppress subsequent whitespace, escape special chars */
163 else if( !isspace(data->out[i]) )
164 {
165 if( data->out[i] == '\\' || data->out[i] == '"' )
166 tmp[size++] = '\\';
167
168 tmp[size++] = data->out[i];
169 whitespace = 0;
170 }
171 }
172
173 /* Escape quotes, backslashes and newlines for plain and include expressions */
174 else if( (data->type == T_TYPE_TEXT || data->type == T_TYPE_INCLUDE) &&
175 (data->out[i] == '\\' || data->out[i] == '"' || data->out[i] == '\n' || data->out[i] == '\t') )
176 {
177 tmp[size++] = '\\';
178
179 switch(data->out[i])
180 {
181 case '\n':
182 tmp[size++] = 'n';
183 break;
184
185 case '\t':
186 tmp[size++] = 't';
187 break;
188
189 default:
190 tmp[size++] = data->out[i];
191 }
192 }
193
194 /* Normal char */
195 else
196 {
197 tmp[size++] = data->out[i];
198 }
199 }
200
201 /* Inject trailing expression code (if any) */
202 if( (what & T_GEN_END) && (gen_code[data->type][1] != NULL) )
203 {
204 /* Strip trailing space for i18n expressions */
205 if( data->type == T_TYPE_I18N || data->type == T_TYPE_I18N_RAW )
206 if( (size > 0) && (tmp[size-1] == ' ') )
207 size--;
208
209 memcpy(&tmp[size], gen_code[data->type][1], strlen(gen_code[data->type][1]));
210 size += strlen(gen_code[data->type][1]);
211 }
212
213 *sz = data->outsize = size;
214 memset(data->out, 0, T_OUTBUFSZ);
215 memcpy(data->out, tmp, size);
216
217 //printf("<<<%i|%i|%i|%s>>>\n", what, data->type, *sz, data->out);
218
219 return data->out;
220 }
221
222 /*
223 * Move the number of bytes specified in data->bufsize from the
224 * given source pointer to the beginning of the read buffer.
225 */
226 static void bufmove(struct template_parser *data, const char *src)
227 {
228 if( data->bufsize > 0 )
229 memmove(data->buf, src, data->bufsize);
230 else if( data->bufsize < 0 )
231 data->bufsize = 0;
232
233 data->buf[data->bufsize] = 0;
234 }
235
236 /*
237 * Move the given amount of bytes from the given source pointer
238 * to the output buffer and set data->outputsize.
239 */
240 static void bufout(struct template_parser *data, const char *src, int len)
241 {
242 if( len >= 0 )
243 {
244 memset(data->out, 0, T_OUTBUFSZ);
245 memcpy(data->out, src, len);
246 data->outsize = len;
247 }
248 else
249 {
250 data->outsize = 0;
251 }
252 }
253
254 /*
255 * lua_Reader compatible function that parses template code on demand from
256 * the given file handle.
257 */
258 const char *template_reader(lua_State *L, void *ud, size_t *sz)
259 {
260 struct template_parser *data = ud;
261 char *match = NULL;
262 int off = 0;
263 int ignore = 0;
264 int genflags = 0;
265 int readlen = 0;
266 int vague = 0;
267
268 while( !(data->flags & T_FLAG_EOF) || (data->bufsize > 0) )
269 {
270 /* Fill buffer */
271 if( !(data->flags & T_FLAG_EOF) && (data->bufsize < T_READBUFSZ) )
272 {
273 if( (readlen = read(data->fd, &data->buf[data->bufsize], T_READBUFSZ - data->bufsize)) > 0 )
274 data->bufsize += readlen;
275 else if( readlen == 0 )
276 data->flags |= T_FLAG_EOF;
277 else
278 return NULL;
279 }
280
281 /* Evaluate state */
282 switch(data->state)
283 {
284 /* Plain text chunk (before "<%") */
285 case T_STATE_TEXT_INIT:
286 case T_STATE_TEXT_NEXT:
287 off = 0; ignore = 0; *sz = 0;
288 data->type = T_TYPE_TEXT;
289
290 /* Skip leading whitespace if requested */
291 if( data->flags & T_FLAG_SKIPWS )
292 {
293 data->flags &= ~T_FLAG_SKIPWS;
294 while( (off < data->bufsize) && isspace(data->buf[off]) )
295 off++;
296 }
297
298 /* Found "<%" */
299 if( (match = strfind(&data->buf[off], data->bufsize - off - 1, T_TOK_START, strlen(T_TOK_START))) != NULL )
300 {
301 readlen = (int)(match - &data->buf[off]);
302 data->bufsize -= (readlen + strlen(T_TOK_START) + off);
303 match += strlen(T_TOK_START);
304
305 /* Check for leading '-' */
306 if( match[0] == T_TOK_SKIPWS[0] )
307 {
308 data->bufsize--;
309 match++;
310
311 while( (readlen > 1) && isspace(data->buf[off+readlen-1]) )
312 {
313 readlen--;
314 }
315 }
316
317 bufout(data, &data->buf[off], readlen);
318 bufmove(data, match);
319 data->state = T_STATE_CODE_INIT;
320 }
321
322 /* Maybe plain chunk */
323 else
324 {
325 /* Preserve trailing "<" or white space, maybe a start token */
326 vague = stokscan(data, off, 0);
327
328 /* We can process some bytes ... */
329 if( vague < data->bufsize )
330 {
331 readlen = data->bufsize - vague - off;
332 }
333
334 /* No bytes to process, so try to remove at least whitespace ... */
335 else
336 {
337 /* ... but try to preserve trailing "<" ... */
338 vague = stokscan(data, off, 1);
339
340 if( vague < data->bufsize )
341 {
342 readlen = data->bufsize - vague - off;
343 }
344
345 /* ... no chance, push out buffer */
346 else
347 {
348 readlen = vague - off;
349 vague = 0;
350 }
351 }
352
353 bufout(data, &data->buf[off], readlen);
354
355 data->state = T_STATE_TEXT_NEXT;
356 data->bufsize = vague;
357 bufmove(data, &data->buf[off+readlen]);
358 }
359
360 if( ignore || data->outsize == 0 )
361 continue;
362 else
363 return generate_expression(data, sz, T_GEN_START | T_GEN_END);
364
365 break;
366
367 /* Ignored chunk (inside "<%# ... %>") */
368 case T_STATE_SKIP:
369 ignore = 1;
370
371 /* Initial code chunk ("<% ...") */
372 case T_STATE_CODE_INIT:
373 off = 0;
374
375 /* Check for leading '-' */
376 if( data->buf[off] == T_TOK_SKIPWS[0] )
377 off++;
378
379 /* Determine code type */
380 switch(data->buf[off])
381 {
382 case '#':
383 ignore = 1;
384 off++;
385 data->type = T_TYPE_COMMENT;
386 break;
387
388 case '=':
389 off++;
390 data->type = T_TYPE_EXPR;
391 break;
392
393 case '+':
394 off++;
395 data->type = T_TYPE_INCLUDE;
396 break;
397
398 case ':':
399 off++;
400 data->type = T_TYPE_I18N;
401 break;
402
403 case '_':
404 off++;
405 data->type = T_TYPE_I18N_RAW;
406 break;
407
408 default:
409 data->type = T_TYPE_CODE;
410 break;
411 }
412
413 /* Subsequent code chunk ("..." or "... %>") */
414 case T_STATE_CODE_NEXT:
415 /* Found "%>" */
416 if( (match = strfind(&data->buf[off], data->bufsize - off, T_TOK_END, strlen(T_TOK_END))) != NULL )
417 {
418 genflags = ( data->state == T_STATE_CODE_INIT )
419 ? (T_GEN_START | T_GEN_END) : T_GEN_END;
420
421 readlen = (int)(match - &data->buf[off]);
422
423 /* Check for trailing '-' */
424 if( (match > data->buf) && (*(match-1) == T_TOK_SKIPWS[0]) )
425 {
426 readlen--;
427 data->flags |= T_FLAG_SKIPWS;
428 }
429
430 bufout(data, &data->buf[off], readlen);
431
432 data->state = T_STATE_TEXT_INIT;
433 data->bufsize -= ((int)(match - &data->buf[off]) + strlen(T_TOK_END) + off);
434 bufmove(data, &match[strlen(T_TOK_END)]);
435 }
436
437 /* Code chunk */
438 else
439 {
440 genflags = ( data->state == T_STATE_CODE_INIT ) ? T_GEN_START : 0;
441
442 /* Preserve trailing "%" and "-", maybe an end token */
443 vague = etokscan(data);
444 readlen = data->bufsize - off - vague;
445 bufout(data, &data->buf[off], readlen);
446
447 data->state = T_STATE_CODE_NEXT;
448 data->bufsize = vague;
449 bufmove(data, &data->buf[readlen+off]);
450 }
451
452 if( ignore || (data->outsize == 0 && !genflags) )
453 continue;
454 else
455 return generate_expression(data, sz, genflags);
456
457 break;
458 }
459 }
460
461 *sz = 0;
462 return NULL;
463 }
464
465