*/
#include "template_utils.h"
+#include "template_lmo.h"
/* initialize a buffer object */
-static struct template_buffer * buf_init(void)
+struct template_buffer * buf_init(int size)
{
struct template_buffer *buf;
+ if (size <= 0)
+ size = 1024;
+
buf = (struct template_buffer *)malloc(sizeof(struct template_buffer));
if (buf != NULL)
{
buf->fill = 0;
- buf->size = 1024;
- buf->data = (unsigned char *)malloc(buf->size);
+ buf->size = size;
+ buf->data = malloc(buf->size);
if (buf->data != NULL)
{
}
/* grow buffer */
-static int buf_grow(struct template_buffer *buf)
+int buf_grow(struct template_buffer *buf, int size)
{
unsigned int off = (buf->dptr - buf->data);
- unsigned char *data =
- (unsigned char *)realloc(buf->data, buf->size + 1024);
+ char *data;
+
+ if (size <= 0)
+ size = 1024;
+
+ data = realloc(buf->data, buf->size + size);
if (data != NULL)
{
buf->data = data;
buf->dptr = data + off;
- buf->size += 1024;
+ buf->size += size;
return buf->size;
}
}
/* put one char into buffer object */
-static int buf_putchar(struct template_buffer *buf, unsigned char c)
+int buf_putchar(struct template_buffer *buf, char c)
{
- if( ((buf->fill + 1) >= buf->size) && !buf_grow(buf) )
+ if( ((buf->fill + 1) >= buf->size) && !buf_grow(buf, 0) )
return 0;
*(buf->dptr++) = c;
}
/* append data to buffer */
-static int buf_append(struct template_buffer *buf, unsigned char *s, int len)
+int buf_append(struct template_buffer *buf, const char *s, int len)
{
- while ((buf->fill + len + 1) >= buf->size)
+ if ((buf->fill + len + 1) >= buf->size)
{
- if (!buf_grow(buf))
+ if (!buf_grow(buf, len + 1))
return 0;
}
return len;
}
+/* read buffer length */
+int buf_length(struct template_buffer *buf)
+{
+ return buf->fill;
+}
+
/* destroy buffer object and return pointer to data */
-static char * buf_destroy(struct template_buffer *buf)
+char * buf_destroy(struct template_buffer *buf)
{
- unsigned char *data = buf->data;
+ char *data = buf->data;
free(buf);
- return (char *)data;
+ return data;
}
{
case 2:
/* 1100000x (10xxxxxx) */
- return ((*s & 0x1E) > 0);
+ return !(((*s >> 1) == 0x60) &&
+ ((*(s+1) >> 6) == 0x02));
case 3:
/* 11100000 100xxxxx (10xxxxxx) */
- return ((*s & 0x1F) > 0) && ((*(s+1) & 0x60) > 0);
+ return !((*s == 0xE0) &&
+ ((*(s+1) >> 5) == 0x04) &&
+ ((*(s+2) >> 6) == 0x02));
case 4:
/* 11110000 1000xxxx (10xxxxxx 10xxxxxx) */
- return ((*s & 0x0F) > 0) && ((*(s+1) & 0x70) > 0);
+ return !((*s == 0xF0) &&
+ ((*(s+1) >> 4) == 0x08) &&
+ ((*(s+2) >> 6) == 0x02) &&
+ ((*(s+3) >> 6) == 0x02));
case 5:
/* 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) */
- return ((*s & 0x07) > 0) && ((*(s+1) & 0x78) > 0);
+ return !((*s == 0xF8) &&
+ ((*(s+1) >> 3) == 0x10) &&
+ ((*(s+2) >> 6) == 0x02) &&
+ ((*(s+3) >> 6) == 0x02) &&
+ ((*(s+4) >> 6) == 0x02));
case 6:
/* 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) */
- return ((*s & 0x03) > 0) && ((*(s+1) & 0x7C) > 0);
+ return !((*s == 0xF8) &&
+ ((*(s+1) >> 2) == 0x20) &&
+ ((*(s+2) >> 6) == 0x02) &&
+ ((*(s+3) >> 6) == 0x02) &&
+ ((*(s+4) >> 6) == 0x02) &&
+ ((*(s+5) >> 6) == 0x02));
}
return 1;
unsigned char *ptr = *s;
unsigned int o = 0, v, n;
- //for (o = 0; o < l; o++)
+ /* ascii byte without null */
+ if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F))
{
- /* ascii byte without null */
- if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F))
- {
- if (!buf_putchar(buf, *ptr++))
- return 0;
+ if (!buf_putchar(buf, *ptr++))
+ return 0;
- o = 1;
- }
+ o = 1;
+ }
- /* multi byte sequence */
- else if ((n = mb_num_chars(*ptr)) > 1)
- {
- /* count valid chars */
- for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++);
+ /* multi byte sequence */
+ else if ((n = mb_num_chars(*ptr)) > 1)
+ {
+ /* count valid chars */
+ for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++);
- switch (n)
- {
- case 6:
- case 5:
- /* five and six byte sequences are always invalid */
- if (!buf_putchar(buf, '?'))
- return 0;
+ switch (n)
+ {
+ case 6:
+ case 5:
+ /* five and six byte sequences are always invalid */
+ if (!buf_putchar(buf, '?'))
+ return 0;
- break;
+ break;
- default:
- /* if the number of valid continuation bytes matches the
- * expected number and if the sequence is legal, copy
- * the bytes to the destination buffer */
- if ((v == n) && mb_is_shortest(ptr, n) &&
- !mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n))
- {
- /* copy sequence */
- if (!buf_append(buf, ptr, n))
- return 0;
- }
-
- /* the found sequence is illegal, skip it */
- else
- {
- /* invalid sequence */
- if (!buf_putchar(buf, '?'))
- return 0;
- }
+ default:
+ /* if the number of valid continuation bytes matches the
+ * expected number and if the sequence is legal, copy
+ * the bytes to the destination buffer */
+ if ((v == n) && mb_is_shortest(ptr, n) &&
+ !mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n))
+ {
+ /* copy sequence */
+ if (!buf_append(buf, (char *)ptr, n))
+ return 0;
+ }
- break;
- }
+ /* the found sequence is illegal, skip it */
+ else
+ {
+ /* invalid sequence */
+ if (!buf_putchar(buf, '?'))
+ return 0;
+ }
- /* advance beyound the last found valid continuation char */
- o = v;
- ptr += v;
+ break;
}
- /* invalid byte (0x00) */
- else
- {
- if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */
- return 0;
+ /* advance beyound the last found valid continuation char */
+ o = v;
+ ptr += v;
+ }
- o = 1;
- ptr++;
- }
+ /* invalid byte (0x00) */
+ else
+ {
+ if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */
+ return 0;
+
+ o = 1;
+ ptr++;
}
*s = ptr;
}
/* sanitize given string and replace all invalid UTF-8 sequences with "?" */
-char * sanitize_utf8(const char *s, unsigned int l)
+char * utf8(const char *s, unsigned int l)
{
- struct template_buffer *buf = buf_init();
+ struct template_buffer *buf = buf_init(l);
unsigned char *ptr = (unsigned char *)s;
+ unsigned int v, o;
if (!buf)
return NULL;
- if (!_validate_utf8(&ptr, l, buf))
+ for (o = 0; o < l; o++)
{
- free(buf->data);
- free(buf);
- return NULL;
+ /* ascii char */
+ if ((*ptr >= 0x01) && (*ptr <= 0x7F))
+ {
+ if (!buf_putchar(buf, (char)*ptr++))
+ break;
+ }
+
+ /* invalid byte or multi byte sequence */
+ else
+ {
+ if (!(v = _validate_utf8(&ptr, l - o, buf)))
+ break;
+
+ o += (v - 1);
+ }
}
return buf_destroy(buf);
/* Sanitize given string and strip all invalid XML bytes
* Validate UTF-8 sequences
* Escape XML control chars */
-char * sanitize_pcdata(const char *s, unsigned int l)
+char * pcdata(const char *s, unsigned int l)
{
- struct template_buffer *buf = buf_init();
+ struct template_buffer *buf = buf_init(l);
unsigned char *ptr = (unsigned char *)s;
unsigned int o, v;
char esq[8];
{
esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
- if (!buf_append(buf, (unsigned char *)esq, esl))
+ if (!buf_append(buf, esq, esl))
break;
ptr++;
/* ascii char */
else if (*ptr <= 0x7F)
{
- buf_putchar(buf, *ptr++);
+ buf_putchar(buf, (char)*ptr++);
}
/* multi byte sequence */
return buf_destroy(buf);
}
+
+char * striptags(const char *s, unsigned int l)
+{
+ struct template_buffer *buf = buf_init(l);
+ unsigned char *ptr = (unsigned char *)s;
+ unsigned char *end = ptr + l;
+ unsigned char *tag;
+ unsigned char prev;
+ char esq[8];
+ int esl;
+
+ for (prev = ' '; ptr < end; ptr++)
+ {
+ if ((*ptr == '<') && ((ptr + 2) < end) &&
+ ((*(ptr + 1) == '/') || isalpha(*(ptr + 1))))
+ {
+ for (tag = ptr; tag < end; tag++)
+ {
+ if (*tag == '>')
+ {
+ if (!isspace(prev))
+ buf_putchar(buf, ' ');
+
+ ptr = tag;
+ prev = ' ';
+ break;
+ }
+ }
+ }
+ else if (isspace(*ptr))
+ {
+ if (!isspace(prev))
+ buf_putchar(buf, *ptr);
+
+ prev = *ptr;
+ }
+ else
+ {
+ switch(*ptr)
+ {
+ case '"':
+ case '\'':
+ case '<':
+ case '>':
+ case '&':
+ esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
+ buf_append(buf, esq, esl);
+ break;
+
+ default:
+ buf_putchar(buf, *ptr);
+ break;
+ }
+
+ prev = *ptr;
+ }
+ }
+
+ return buf_destroy(buf);
+}
+
+void luastr_escape(struct template_buffer *out, const char *s, unsigned int l,
+ int escape_xml)
+{
+ int esl;
+ char esq[8];
+ char *ptr;
+
+ for (ptr = (char *)s; ptr < (s + l); ptr++)
+ {
+ switch (*ptr)
+ {
+ case '\\':
+ buf_append(out, "\\\\", 2);
+ break;
+
+ case '"':
+ if (escape_xml)
+ buf_append(out, """, 5);
+ else
+ buf_append(out, "\\\"", 2);
+ break;
+
+ case '\n':
+ buf_append(out, "\\n", 2);
+ break;
+
+ case '\'':
+ case '&':
+ case '<':
+ case '>':
+ if (escape_xml)
+ {
+ esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
+ buf_append(out, esq, esl);
+ break;
+ }
+
+ default:
+ buf_putchar(out, *ptr);
+ }
+ }
+}
+
+void luastr_translate(struct template_buffer *out, const char *s, unsigned int l,
+ int escape_xml)
+{
+ char *tr;
+ int trlen;
+
+ switch (lmo_translate(s, l, &tr, &trlen))
+ {
+ case 0:
+ luastr_escape(out, tr, trlen, escape_xml);
+ break;
+
+ case -1:
+ luastr_escape(out, s, l, escape_xml);
+ break;
+
+ default:
+ /* no catalog loaded */
+ break;
+ }
+}