2 * LuCI Template - Utility functions
4 * Copyright (C) 2010 Jo-Philipp Wich <xm@subsignal.org>
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 #include "template_utils.h"
21 /* initialize a buffer object */
22 static struct template_buffer
* buf_init(void)
24 struct template_buffer
*buf
;
26 buf
= (struct template_buffer
*)malloc(sizeof(struct template_buffer
));
32 buf
->data
= (unsigned char *)malloc(buf
->size
);
34 if (buf
->data
!= NULL
)
36 buf
->dptr
= buf
->data
;
49 static int buf_grow(struct template_buffer
*buf
)
51 unsigned int off
= (buf
->dptr
- buf
->data
);
53 (unsigned char *)realloc(buf
->data
, buf
->size
+ 1024);
58 buf
->dptr
= data
+ off
;
67 /* put one char into buffer object */
68 static int buf_putchar(struct template_buffer
*buf
, unsigned char c
)
70 if( ((buf
->fill
+ 1) >= buf
->size
) && !buf_grow(buf
) )
80 /* append data to buffer */
81 static int buf_append(struct template_buffer
*buf
, unsigned char *s
, int len
)
83 while ((buf
->fill
+ len
+ 1) >= buf
->size
)
89 memcpy(buf
->dptr
, s
, len
);
98 /* destroy buffer object and return pointer to data */
99 static char * buf_destroy(struct template_buffer
*buf
)
101 unsigned char *data
= buf
->data
;
108 /* calculate the number of expected continuation chars */
109 static inline int mb_num_chars(unsigned char c
)
111 if ((c
& 0xE0) == 0xC0)
113 else if ((c
& 0xF0) == 0xE0)
115 else if ((c
& 0xF8) == 0xF0)
117 else if ((c
& 0xFC) == 0xF8)
119 else if ((c
& 0xFE) == 0xFC)
125 /* test whether the given byte is a valid continuation char */
126 static inline int mb_is_cont(unsigned char c
)
128 return ((c
>= 0x80) && (c
<= 0xBF));
131 /* test whether the byte sequence at the given pointer with the given
132 * length is the shortest possible representation of the code point */
133 static inline int mb_is_shortest(unsigned char *s
, int n
)
138 /* 1100000x (10xxxxxx) */
139 return !(((*s
>> 1) == 0x60) &&
140 ((*(s
+1) >> 6) == 0x02));
143 /* 11100000 100xxxxx (10xxxxxx) */
144 return !((*s
== 0xE0) &&
145 ((*(s
+1) >> 5) == 0x04) &&
146 ((*(s
+2) >> 6) == 0x02));
149 /* 11110000 1000xxxx (10xxxxxx 10xxxxxx) */
150 return !((*s
== 0xF0) &&
151 ((*(s
+1) >> 4) == 0x08) &&
152 ((*(s
+2) >> 6) == 0x02) &&
153 ((*(s
+3) >> 6) == 0x02));
156 /* 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) */
157 return !((*s
== 0xF8) &&
158 ((*(s
+1) >> 3) == 0x10) &&
159 ((*(s
+2) >> 6) == 0x02) &&
160 ((*(s
+3) >> 6) == 0x02) &&
161 ((*(s
+4) >> 6) == 0x02));
164 /* 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) */
165 return !((*s
== 0xF8) &&
166 ((*(s
+1) >> 2) == 0x20) &&
167 ((*(s
+2) >> 6) == 0x02) &&
168 ((*(s
+3) >> 6) == 0x02) &&
169 ((*(s
+4) >> 6) == 0x02) &&
170 ((*(s
+5) >> 6) == 0x02));
176 /* test whether the byte sequence at the given pointer with the given
177 * length is an UTF-16 surrogate */
178 static inline int mb_is_surrogate(unsigned char *s
, int n
)
180 return ((n
== 3) && (*s
== 0xED) && (*(s
+1) >= 0xA0) && (*(s
+1) <= 0xBF));
183 /* test whether the byte sequence at the given pointer with the given
184 * length is an illegal UTF-8 code point */
185 static inline int mb_is_illegal(unsigned char *s
, int n
)
187 return ((n
== 3) && (*s
== 0xEF) && (*(s
+1) == 0xBF) &&
188 (*(s
+2) >= 0xBE) && (*(s
+2) <= 0xBF));
192 /* scan given source string, validate UTF-8 sequence and store result
193 * in given buffer object */
194 static int _validate_utf8(unsigned char **s
, int l
, struct template_buffer
*buf
)
196 unsigned char *ptr
= *s
;
197 unsigned int o
= 0, v
, n
;
199 /* ascii byte without null */
200 if ((*(ptr
+0) >= 0x01) && (*(ptr
+0) <= 0x7F))
202 if (!buf_putchar(buf
, *ptr
++))
208 /* multi byte sequence */
209 else if ((n
= mb_num_chars(*ptr
)) > 1)
211 /* count valid chars */
212 for (v
= 1; (v
<= n
) && ((o
+v
) < l
) && mb_is_cont(*(ptr
+v
)); v
++);
218 /* five and six byte sequences are always invalid */
219 if (!buf_putchar(buf
, '?'))
225 /* if the number of valid continuation bytes matches the
226 * expected number and if the sequence is legal, copy
227 * the bytes to the destination buffer */
228 if ((v
== n
) && mb_is_shortest(ptr
, n
) &&
229 !mb_is_surrogate(ptr
, n
) && !mb_is_illegal(ptr
, n
))
232 if (!buf_append(buf
, ptr
, n
))
236 /* the found sequence is illegal, skip it */
239 /* invalid sequence */
240 if (!buf_putchar(buf
, '?'))
247 /* advance beyound the last found valid continuation char */
252 /* invalid byte (0x00) */
255 if (!buf_putchar(buf
, '?')) /* or 0xEF, 0xBF, 0xBD */
266 /* sanitize given string and replace all invalid UTF-8 sequences with "?" */
267 char * sanitize_utf8(const char *s
, unsigned int l
)
269 struct template_buffer
*buf
= buf_init();
270 unsigned char *ptr
= (unsigned char *)s
;
276 for (o
= 0; o
< l
; o
++)
279 if ((*ptr
>= 0x01) && (*ptr
<= 0x7F))
281 if (!buf_putchar(buf
, *ptr
++))
285 /* invalid byte or multi byte sequence */
288 if (!(v
= _validate_utf8(&ptr
, l
- o
, buf
)))
295 return buf_destroy(buf
);
298 /* Sanitize given string and strip all invalid XML bytes
299 * Validate UTF-8 sequences
300 * Escape XML control chars */
301 char * sanitize_pcdata(const char *s
, unsigned int l
)
303 struct template_buffer
*buf
= buf_init();
304 unsigned char *ptr
= (unsigned char *)s
;
312 for (o
= 0; o
< l
; o
++)
314 /* Invalid XML bytes */
315 if (((*ptr
>= 0x00) && (*ptr
<= 0x08)) ||
316 ((*ptr
>= 0x0B) && (*ptr
<= 0x0C)) ||
317 ((*ptr
>= 0x0E) && (*ptr
<= 0x1F)) ||
324 else if ((*ptr
== 0x26) ||
330 esl
= snprintf(esq
, sizeof(esq
), "&#%i;", *ptr
);
332 if (!buf_append(buf
, (unsigned char *)esq
, esl
))
339 else if (*ptr
<= 0x7F)
341 buf_putchar(buf
, *ptr
++);
344 /* multi byte sequence */
347 if (!(v
= _validate_utf8(&ptr
, l
- o
, buf
)))
354 return buf_destroy(buf
);