64009f249f9d033542b782c4c4dbf892420bbda0
16 /* builtin charmaps */
19 /* only 0-7 are valid as dest charset */
29 /* additional charsets with algorithmic conversion */
34 /* some programs like php need this */
35 int _libiconv_version
= _LIBICONV_VERSION
;
37 /* these must match the constants above */
38 static const unsigned char charsets
[] =
41 "\000" "UTF-16BE" "\0"
42 "\001" "UTF-16LE" "\0"
43 "\002" "UTF-32BE" "\0"
44 "\003" "UTF-32LE" "\0"
45 "\006" "US-ASCII" "\0"
46 "\007" "ISO-8859-1" "\0"
48 "\010" "ISO-8859-15""\0"
50 "\011" "ISO-8859-11""\0"
52 "\012" "JIS-0201" "\0"
55 /* separate identifiers for sbcs/dbcs/etc map type */
64 /* FIXME: these are not implemented yet
66 // GBK: 81-FE 40-7E,80-FE
67 // Big5: A1-FE 40-7E,A1-FE
70 static const unsigned short maplen
[] = {
71 [UCS2_8BIT
] = 4+ 2* 128,
72 [UCS3_8BIT
] = 4+ 3* 128,
74 [SHIFT_JIS
] = 4+ 2* 94*94,
75 [BIG5
] = 4+ 2* 94*157,
76 [GBK
] = 4+ 2* 126*190,
77 [EUC_TW
] = 4+ 2* 2*94*94,
80 static int find_charmap(const char *name
)
83 for (i
= 0; i
< (sizeof(charmaps
) / sizeof(charmaps
[0])); i
++)
84 if (!strcasecmp(charmaps
[i
].name
, name
))
89 static int find_charset(const char *name
)
91 const unsigned char *s
;
92 for (s
=charsets
; *s
<0xff && strcasecmp(s
+1, name
); s
+=strlen(s
)+1);
96 iconv_t
iconv_open(const char *to
, const char *from
)
101 if ((t
= find_charset(to
)) >= 8)
104 if ((f
= find_charset(from
)) < 255)
105 return 0 | (t
<<1) | (f
<<4);
107 if ((m
= find_charmap(from
)) > -1)
108 return 1 | (t
<<1) | (m
<<4);
113 int iconv_close(iconv_t cd
)
118 static inline wchar_t get_16(const unsigned char *s
, int endian
)
121 return s
[endian
]<<8 | s
[endian
^1];
124 static inline void put_16(unsigned char *s
, wchar_t c
, int endian
)
131 size_t iconv(iconv_t cd
, char **in
, size_t *inb
, char **out
, size_t *outb
)
134 unsigned char to
= (cd
>>1)&7;
135 unsigned char from
= 255;
136 const unsigned char *map
= 0;
138 char tmp
[MB_LEN_MAX
];
143 if (!in
|| !*in
|| !*inb
) return 0;
146 map
= charmaps
[cd
>>4].map
;
150 for (; *inb
; *in
+=l
, *inb
-=l
) {
151 c
= *(unsigned char *)*in
;
153 if (from
>= UTF_8
&& c
< 0x80) goto charok
;
157 if (*inb
< l
) goto starved
;
161 l
= mbrtowc(&c
, *in
, *inb
, &st
);
163 else if (l
== (size_t)-1) goto ilseq
;
164 else if (l
== (size_t)-2) goto starved
;
169 if ((unsigned)c
- 0xa4 <= 0xbe - 0xa4) {
170 static const unsigned char map
[] = {
171 0, 0x60, 0, 0x61, 0, 0, 0, 0, 0, 0, 0,
172 0, 0, 0, 0, 0x7d, 0, 0, 0, 0x7e, 0, 0, 0,
175 if (c
== 0xa4) c
= 0x20ac;
176 else if (map
[c
-0xa5]) c
= 0x100 | map
[c
-0xa5];
181 if (c
>= 0xa1) c
+= 0x0e01-0xa1;
185 if (c
<= 0xdf) c
+= 0xff61-0xa1;
192 if (*inb
< 2) goto starved
;
193 c
= get_16(*in
, from
);
194 if ((unsigned)(c
-0xdc00) < 0x400) goto ilseq
;
195 if ((unsigned)(c
-0xd800) < 0x400) {
197 if (*inb
< 4) goto starved
;
198 d
= get_16(*in
+ 2, from
);
199 if ((unsigned)(c
-0xdc00) >= 0x400) goto ilseq
;
200 c
= ((c
-0xd800)<<10) | (d
-0xdc00);
206 if (*inb
< 4) goto starved
;
208 // c = get_32(*in, from);
211 /* only support ascii supersets */
218 if ((unsigned)c
- 0xa1 >= 94) goto ilseq
;
219 if ((unsigned)in
[0][1] - 0xa1 >= 94) goto ilseq
;
220 c
= (c
-0xa1)*94 + (in
[0][1]-0xa1);
224 if ((unsigned)c
- 0xa1 <= 0xdf-0xa1) {
234 c
= get_16(map
+ 4 + 2*c
, 0);
235 if (c
== 0xffff) goto ilseq
;
239 if ((unsigned)c
- 0xd800 < 0x800 || (unsigned)c
>= 0x110000)
244 if (*outb
< sizeof(wchar_t)) goto toobig
;
245 *(wchar_t *)*out
= c
;
246 *out
+= sizeof(wchar_t);
247 *outb
-= sizeof(wchar_t);
252 if (*outb
< k
) goto toobig
;
253 memcpy(*out
, tmp
, k
);
254 } else k
= wctomb(*out
, c
);
259 if (c
> 0x7f) c
= 0xfffd;
260 /* fall thru and count replacement in latin1 case */
262 if (!*outb
) goto toobig
;
263 if (c
< 0x100) **out
= c
;
264 else x
++, **out
= '*'; //FIXME: translit?
271 if (*outb
< 2) goto toobig
;
277 if (*outb
< 4) goto toobig
;
278 put_16(*out
, (c
>>10)|0xd800, to
);
279 put_16(*out
+ 2, (c
&0x3ff)|0xdc00, to
);