8b2bd97d99d3842255a7239e7dd150acd36955c4
16 /* builtin charmaps */
19 /* only 0-7 are valid as dest charset */
29 /* additional charsets with algorithmic conversion */
34 /* some programs like php need this */
35 int _libiconv_version
= _LIBICONV_VERSION
;
37 /* these must match the constants above */
38 static const unsigned char charsets
[] =
41 "\000" "UTF-16BE" "\0"
42 "\001" "UTF-16LE" "\0"
43 "\002" "UTF-32BE" "\0"
44 "\003" "UTF-32LE" "\0"
46 "\006" "US-ASCII" "\0"
47 "\007" "ISO-8859-1" "\0"
49 "\010" "ISO-8859-15""\0"
51 "\011" "ISO-8859-11""\0"
53 "\012" "JIS-0201" "\0"
56 /* separate identifiers for sbcs/dbcs/etc map type */
65 /* FIXME: these are not implemented yet
67 // GBK: 81-FE 40-7E,80-FE
68 // Big5: A1-FE 40-7E,A1-FE
71 static const unsigned short maplen
[] = {
72 [UCS2_8BIT
] = 4+ 2* 128,
73 [UCS3_8BIT
] = 4+ 3* 128,
75 [SHIFT_JIS
] = 4+ 2* 94*94,
76 [BIG5
] = 4+ 2* 94*157,
77 [GBK
] = 4+ 2* 126*190,
78 [EUC_TW
] = 4+ 2* 2*94*94,
81 static int find_charmap(const char *name
)
84 for (i
= 0; i
< (sizeof(charmaps
) / sizeof(charmaps
[0])); i
++)
85 if (!strcasecmp(charmaps
[i
].name
, name
))
90 static int find_charset(const char *name
)
92 const unsigned char *s
;
93 for (s
=charsets
; *s
<0xff && strcasecmp(s
+1, name
); s
+=strlen(s
)+1);
97 iconv_t
iconv_open(const char *to
, const char *from
)
102 if ((t
= find_charset(to
)) >= 8)
105 if ((f
= find_charset(from
)) < 255)
106 return 0 | (t
<<1) | (f
<<4);
108 if ((m
= find_charmap(from
)) > -1)
109 return 1 | (t
<<1) | (m
<<4);
114 int iconv_close(iconv_t cd
)
119 static inline wchar_t get_16(const unsigned char *s
, int endian
)
122 return s
[endian
]<<8 | s
[endian
^1];
125 static inline void put_16(unsigned char *s
, wchar_t c
, int endian
)
132 size_t iconv(iconv_t cd
, char **in
, size_t *inb
, char **out
, size_t *outb
)
135 unsigned char to
= (cd
>>1)&7;
136 unsigned char from
= 255;
137 const unsigned char *map
= 0;
139 char tmp
[MB_LEN_MAX
];
144 if (!in
|| !*in
|| !*inb
) return 0;
147 map
= charmaps
[cd
>>4].map
;
151 for (; *inb
; *in
+=l
, *inb
-=l
) {
152 c
= *(unsigned char *)*in
;
154 if (from
>= UTF_8
&& c
< 0x80) goto charok
;
158 if (*inb
< l
) goto starved
;
162 l
= mbrtowc(&c
, *in
, *inb
, &st
);
164 else if (l
== (size_t)-1) goto ilseq
;
165 else if (l
== (size_t)-2) goto starved
;
170 if ((unsigned)c
- 0xa4 <= 0xbe - 0xa4) {
171 static const unsigned char map
[] = {
172 0, 0x60, 0, 0x61, 0, 0, 0, 0, 0, 0, 0,
173 0, 0, 0, 0, 0x7d, 0, 0, 0, 0x7e, 0, 0, 0,
176 if (c
== 0xa4) c
= 0x20ac;
177 else if (map
[c
-0xa5]) c
= 0x100 | map
[c
-0xa5];
182 if (c
>= 0xa1) c
+= 0x0e01-0xa1;
186 if (c
<= 0xdf) c
+= 0xff61-0xa1;
193 if (*inb
< 2) goto starved
;
194 c
= get_16(*in
, from
);
195 if ((unsigned)(c
-0xdc00) < 0x400) goto ilseq
;
196 if ((unsigned)(c
-0xd800) < 0x400) {
198 if (*inb
< 4) goto starved
;
199 d
= get_16(*in
+ 2, from
);
200 if ((unsigned)(c
-0xdc00) >= 0x400) goto ilseq
;
201 c
= ((c
-0xd800)<<10) | (d
-0xdc00);
207 if (*inb
< 4) goto starved
;
209 // c = get_32(*in, from);
212 /* only support ascii supersets */
219 if ((unsigned)c
- 0xa1 >= 94) goto ilseq
;
220 if ((unsigned)in
[0][1] - 0xa1 >= 94) goto ilseq
;
221 c
= (c
-0xa1)*94 + (in
[0][1]-0xa1);
225 if ((unsigned)c
- 0xa1 <= 0xdf-0xa1) {
235 c
= get_16(map
+ 4 + 2*c
, 0);
236 if (c
== 0xffff) goto ilseq
;
240 if ((unsigned)c
- 0xd800 < 0x800 || (unsigned)c
>= 0x110000)
245 if (*outb
< sizeof(wchar_t)) goto toobig
;
246 *(wchar_t *)*out
= c
;
247 *out
+= sizeof(wchar_t);
248 *outb
-= sizeof(wchar_t);
253 if (*outb
< k
) goto toobig
;
254 memcpy(*out
, tmp
, k
);
255 } else k
= wctomb(*out
, c
);
260 if (c
> 0x7f) c
= 0xfffd;
261 /* fall thru and count replacement in latin1 case */
263 if (!*outb
) goto toobig
;
264 if (c
< 0x100) **out
= c
;
265 else x
++, **out
= '*'; //FIXME: translit?
272 if (*outb
< 2) goto toobig
;
278 if (*outb
< 4) goto toobig
;
279 put_16(*out
, (c
>>10)|0xd800, to
);
280 put_16(*out
+ 2, (c
&0x3ff)|0xdc00, to
);