build: introduce luci-base
[project/luci.git] / modules / base / src / template_utils.c
1 /*
2 * LuCI Template - Utility functions
3 *
4 * Copyright (C) 2010 Jo-Philipp Wich <xm@subsignal.org>
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 #include "template_utils.h"
20 #include "template_lmo.h"
21
22 /* initialize a buffer object */
23 struct template_buffer * buf_init(int size)
24 {
25 struct template_buffer *buf;
26
27 if (size <= 0)
28 size = 1024;
29
30 buf = (struct template_buffer *)malloc(sizeof(struct template_buffer));
31
32 if (buf != NULL)
33 {
34 buf->fill = 0;
35 buf->size = size;
36 buf->data = malloc(buf->size);
37
38 if (buf->data != NULL)
39 {
40 buf->dptr = buf->data;
41 buf->data[0] = 0;
42
43 return buf;
44 }
45
46 free(buf);
47 }
48
49 return NULL;
50 }
51
52 /* grow buffer */
53 int buf_grow(struct template_buffer *buf, int size)
54 {
55 unsigned int off = (buf->dptr - buf->data);
56 char *data;
57
58 if (size <= 0)
59 size = 1024;
60
61 data = realloc(buf->data, buf->size + size);
62
63 if (data != NULL)
64 {
65 buf->data = data;
66 buf->dptr = data + off;
67 buf->size += size;
68
69 return buf->size;
70 }
71
72 return 0;
73 }
74
75 /* put one char into buffer object */
76 int buf_putchar(struct template_buffer *buf, char c)
77 {
78 if( ((buf->fill + 1) >= buf->size) && !buf_grow(buf, 0) )
79 return 0;
80
81 *(buf->dptr++) = c;
82 *(buf->dptr) = 0;
83
84 buf->fill++;
85 return 1;
86 }
87
88 /* append data to buffer */
89 int buf_append(struct template_buffer *buf, const char *s, int len)
90 {
91 if ((buf->fill + len + 1) >= buf->size)
92 {
93 if (!buf_grow(buf, len + 1))
94 return 0;
95 }
96
97 memcpy(buf->dptr, s, len);
98 buf->fill += len;
99 buf->dptr += len;
100
101 *(buf->dptr) = 0;
102
103 return len;
104 }
105
106 /* read buffer length */
107 int buf_length(struct template_buffer *buf)
108 {
109 return buf->fill;
110 }
111
112 /* destroy buffer object and return pointer to data */
113 char * buf_destroy(struct template_buffer *buf)
114 {
115 char *data = buf->data;
116
117 free(buf);
118 return data;
119 }
120
121
122 /* calculate the number of expected continuation chars */
123 static inline int mb_num_chars(unsigned char c)
124 {
125 if ((c & 0xE0) == 0xC0)
126 return 2;
127 else if ((c & 0xF0) == 0xE0)
128 return 3;
129 else if ((c & 0xF8) == 0xF0)
130 return 4;
131 else if ((c & 0xFC) == 0xF8)
132 return 5;
133 else if ((c & 0xFE) == 0xFC)
134 return 6;
135
136 return 1;
137 }
138
139 /* test whether the given byte is a valid continuation char */
140 static inline int mb_is_cont(unsigned char c)
141 {
142 return ((c >= 0x80) && (c <= 0xBF));
143 }
144
145 /* test whether the byte sequence at the given pointer with the given
146 * length is the shortest possible representation of the code point */
147 static inline int mb_is_shortest(unsigned char *s, int n)
148 {
149 switch (n)
150 {
151 case 2:
152 /* 1100000x (10xxxxxx) */
153 return !(((*s >> 1) == 0x60) &&
154 ((*(s+1) >> 6) == 0x02));
155
156 case 3:
157 /* 11100000 100xxxxx (10xxxxxx) */
158 return !((*s == 0xE0) &&
159 ((*(s+1) >> 5) == 0x04) &&
160 ((*(s+2) >> 6) == 0x02));
161
162 case 4:
163 /* 11110000 1000xxxx (10xxxxxx 10xxxxxx) */
164 return !((*s == 0xF0) &&
165 ((*(s+1) >> 4) == 0x08) &&
166 ((*(s+2) >> 6) == 0x02) &&
167 ((*(s+3) >> 6) == 0x02));
168
169 case 5:
170 /* 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) */
171 return !((*s == 0xF8) &&
172 ((*(s+1) >> 3) == 0x10) &&
173 ((*(s+2) >> 6) == 0x02) &&
174 ((*(s+3) >> 6) == 0x02) &&
175 ((*(s+4) >> 6) == 0x02));
176
177 case 6:
178 /* 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) */
179 return !((*s == 0xF8) &&
180 ((*(s+1) >> 2) == 0x20) &&
181 ((*(s+2) >> 6) == 0x02) &&
182 ((*(s+3) >> 6) == 0x02) &&
183 ((*(s+4) >> 6) == 0x02) &&
184 ((*(s+5) >> 6) == 0x02));
185 }
186
187 return 1;
188 }
189
190 /* test whether the byte sequence at the given pointer with the given
191 * length is an UTF-16 surrogate */
192 static inline int mb_is_surrogate(unsigned char *s, int n)
193 {
194 return ((n == 3) && (*s == 0xED) && (*(s+1) >= 0xA0) && (*(s+1) <= 0xBF));
195 }
196
197 /* test whether the byte sequence at the given pointer with the given
198 * length is an illegal UTF-8 code point */
199 static inline int mb_is_illegal(unsigned char *s, int n)
200 {
201 return ((n == 3) && (*s == 0xEF) && (*(s+1) == 0xBF) &&
202 (*(s+2) >= 0xBE) && (*(s+2) <= 0xBF));
203 }
204
205
206 /* scan given source string, validate UTF-8 sequence and store result
207 * in given buffer object */
208 static int _validate_utf8(unsigned char **s, int l, struct template_buffer *buf)
209 {
210 unsigned char *ptr = *s;
211 unsigned int o = 0, v, n;
212
213 /* ascii byte without null */
214 if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F))
215 {
216 if (!buf_putchar(buf, *ptr++))
217 return 0;
218
219 o = 1;
220 }
221
222 /* multi byte sequence */
223 else if ((n = mb_num_chars(*ptr)) > 1)
224 {
225 /* count valid chars */
226 for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++);
227
228 switch (n)
229 {
230 case 6:
231 case 5:
232 /* five and six byte sequences are always invalid */
233 if (!buf_putchar(buf, '?'))
234 return 0;
235
236 break;
237
238 default:
239 /* if the number of valid continuation bytes matches the
240 * expected number and if the sequence is legal, copy
241 * the bytes to the destination buffer */
242 if ((v == n) && mb_is_shortest(ptr, n) &&
243 !mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n))
244 {
245 /* copy sequence */
246 if (!buf_append(buf, (char *)ptr, n))
247 return 0;
248 }
249
250 /* the found sequence is illegal, skip it */
251 else
252 {
253 /* invalid sequence */
254 if (!buf_putchar(buf, '?'))
255 return 0;
256 }
257
258 break;
259 }
260
261 /* advance beyound the last found valid continuation char */
262 o = v;
263 ptr += v;
264 }
265
266 /* invalid byte (0x00) */
267 else
268 {
269 if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */
270 return 0;
271
272 o = 1;
273 ptr++;
274 }
275
276 *s = ptr;
277 return o;
278 }
279
280 /* sanitize given string and replace all invalid UTF-8 sequences with "?" */
281 char * utf8(const char *s, unsigned int l)
282 {
283 struct template_buffer *buf = buf_init(l);
284 unsigned char *ptr = (unsigned char *)s;
285 unsigned int v, o;
286
287 if (!buf)
288 return NULL;
289
290 for (o = 0; o < l; o++)
291 {
292 /* ascii char */
293 if ((*ptr >= 0x01) && (*ptr <= 0x7F))
294 {
295 if (!buf_putchar(buf, (char)*ptr++))
296 break;
297 }
298
299 /* invalid byte or multi byte sequence */
300 else
301 {
302 if (!(v = _validate_utf8(&ptr, l - o, buf)))
303 break;
304
305 o += (v - 1);
306 }
307 }
308
309 return buf_destroy(buf);
310 }
311
312 /* Sanitize given string and strip all invalid XML bytes
313 * Validate UTF-8 sequences
314 * Escape XML control chars */
315 char * pcdata(const char *s, unsigned int l)
316 {
317 struct template_buffer *buf = buf_init(l);
318 unsigned char *ptr = (unsigned char *)s;
319 unsigned int o, v;
320 char esq[8];
321 int esl;
322
323 if (!buf)
324 return NULL;
325
326 for (o = 0; o < l; o++)
327 {
328 /* Invalid XML bytes */
329 if (((*ptr >= 0x00) && (*ptr <= 0x08)) ||
330 ((*ptr >= 0x0B) && (*ptr <= 0x0C)) ||
331 ((*ptr >= 0x0E) && (*ptr <= 0x1F)) ||
332 (*ptr == 0x7F))
333 {
334 ptr++;
335 }
336
337 /* Escapes */
338 else if ((*ptr == 0x26) ||
339 (*ptr == 0x27) ||
340 (*ptr == 0x22) ||
341 (*ptr == 0x3C) ||
342 (*ptr == 0x3E))
343 {
344 esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
345
346 if (!buf_append(buf, esq, esl))
347 break;
348
349 ptr++;
350 }
351
352 /* ascii char */
353 else if (*ptr <= 0x7F)
354 {
355 buf_putchar(buf, (char)*ptr++);
356 }
357
358 /* multi byte sequence */
359 else
360 {
361 if (!(v = _validate_utf8(&ptr, l - o, buf)))
362 break;
363
364 o += (v - 1);
365 }
366 }
367
368 return buf_destroy(buf);
369 }
370
371 char * striptags(const char *s, unsigned int l)
372 {
373 struct template_buffer *buf = buf_init(l);
374 unsigned char *ptr = (unsigned char *)s;
375 unsigned char *end = ptr + l;
376 unsigned char *tag;
377 unsigned char prev;
378 char esq[8];
379 int esl;
380
381 for (prev = ' '; ptr < end; ptr++)
382 {
383 if ((*ptr == '<') && ((ptr + 2) < end) &&
384 ((*(ptr + 1) == '/') || isalpha(*(ptr + 1))))
385 {
386 for (tag = ptr; tag < end; tag++)
387 {
388 if (*tag == '>')
389 {
390 if (!isspace(prev))
391 buf_putchar(buf, ' ');
392
393 ptr = tag;
394 prev = ' ';
395 break;
396 }
397 }
398 }
399 else if (isspace(*ptr))
400 {
401 if (!isspace(prev))
402 buf_putchar(buf, *ptr);
403
404 prev = *ptr;
405 }
406 else
407 {
408 switch(*ptr)
409 {
410 case '"':
411 case '\'':
412 case '<':
413 case '>':
414 case '&':
415 esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
416 buf_append(buf, esq, esl);
417 break;
418
419 default:
420 buf_putchar(buf, *ptr);
421 break;
422 }
423
424 prev = *ptr;
425 }
426 }
427
428 return buf_destroy(buf);
429 }
430
431 void luastr_escape(struct template_buffer *out, const char *s, unsigned int l,
432 int escape_xml)
433 {
434 int esl;
435 char esq[8];
436 char *ptr;
437
438 for (ptr = (char *)s; ptr < (s + l); ptr++)
439 {
440 switch (*ptr)
441 {
442 case '\\':
443 buf_append(out, "\\\\", 2);
444 break;
445
446 case '"':
447 if (escape_xml)
448 buf_append(out, "&#34;", 5);
449 else
450 buf_append(out, "\\\"", 2);
451 break;
452
453 case '\n':
454 buf_append(out, "\\n", 2);
455 break;
456
457 case '\'':
458 case '&':
459 case '<':
460 case '>':
461 if (escape_xml)
462 {
463 esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
464 buf_append(out, esq, esl);
465 break;
466 }
467
468 default:
469 buf_putchar(out, *ptr);
470 }
471 }
472 }
473
474 void luastr_translate(struct template_buffer *out, const char *s, unsigned int l,
475 int escape_xml)
476 {
477 char *tr;
478 int trlen;
479
480 switch (lmo_translate(s, l, &tr, &trlen))
481 {
482 case 0:
483 luastr_escape(out, tr, trlen, escape_xml);
484 break;
485
486 case -1:
487 luastr_escape(out, s, l, escape_xml);
488 break;
489
490 default:
491 /* no catalog loaded */
492 break;
493 }
494 }