Annotation of src/external/gpl3/binutils.old/dist/binutils/winduni.c, Revision 1.1.1.3
1.1 christos 1: /* winduni.c -- unicode support for the windres program.
1.1.1.3 ! christos 2: Copyright (C) 1997-2016 Free Software Foundation, Inc.
1.1 christos 3: Written by Ian Lance Taylor, Cygnus Support.
4: Rewritten by Kai Tietz, Onevision.
5:
6: This file is part of GNU Binutils.
7:
8: This program is free software; you can redistribute it and/or modify
9: it under the terms of the GNU General Public License as published by
10: the Free Software Foundation; either version 3 of the License, or
11: (at your option) any later version.
12:
13: This program is distributed in the hope that it will be useful,
14: but WITHOUT ANY WARRANTY; without even the implied warranty of
15: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16: GNU General Public License for more details.
17:
18: You should have received a copy of the GNU General Public License
19: along with this program; if not, write to the Free Software
20: Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21: 02110-1301, USA. */
22:
23:
24: /* This file contains unicode support routines for the windres
25: program. Ideally, we would have generic unicode support which
26: would work on all systems. However, we don't. Instead, on a
27: Windows host, we are prepared to call some Windows routines. This
28: means that we will generate different output on Windows and Unix
29: hosts, but that seems better than not really supporting unicode at
30: all. */
31:
32: #include "sysdep.h"
33: #include "bfd.h"
34: #include "libiberty.h" /* for xstrdup */
35: #include "bucomm.h"
36: /* Must be include before windows.h and winnls.h. */
37: #if defined (_WIN32) || defined (__CYGWIN__)
38: #include <windows.h>
39: #include <winnls.h>
40: #endif
41: #include "winduni.h"
42: #include "safe-ctype.h"
43:
44: #if HAVE_ICONV
45: #include <iconv.h>
46: #endif
47:
48: static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
49: static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
50: static int unichar_isascii (const unichar *, rc_uint_type);
51:
52: /* Convert an ASCII string to a unicode string. We just copy it,
53: expanding chars to shorts, rather than doing something intelligent. */
1.1.1.2 christos 54:
1.1 christos 55: #if !defined (_WIN32) && !defined (__CYGWIN__)
56:
57: /* Codepages mapped. */
58: static local_iconv_map codepages[] =
59: {
60: { 0, "MS-ANSI" },
61: { 1, "WINDOWS-1252" },
62: { 437, "MS-ANSI" },
63: { 737, "MS-GREEK" },
64: { 775, "WINBALTRIM" },
65: { 850, "MS-ANSI" },
66: { 852, "MS-EE" },
67: { 857, "MS-TURK" },
68: { 862, "CP862" },
69: { 864, "CP864" },
70: { 866, "MS-CYRL" },
71: { 874, "WINDOWS-874" },
72: { 932, "CP932" },
73: { 936, "CP936" },
74: { 949, "CP949" },
75: { 950, "CP950" },
76: { 1250, "WINDOWS-1250" },
77: { 1251, "WINDOWS-1251" },
78: { 1252, "WINDOWS-1252" },
79: { 1253, "WINDOWS-1253" },
80: { 1254, "WINDOWS-1254" },
81: { 1255, "WINDOWS-1255" },
82: { 1256, "WINDOWS-1256" },
83: { 1257, "WINDOWS-1257" },
84: { 1258, "WINDOWS-1258" },
85: { CP_UTF7, "UTF-7" },
86: { CP_UTF8, "UTF-8" },
1.1.1.2 christos 87: { CP_UTF16, "UTF-16LE" },
1.1 christos 88: { (rc_uint_type) -1, NULL }
89: };
90:
91: /* Languages supported. */
92: static const wind_language_t languages[] =
93: {
94: { 0x0000, 437, 1252, "Neutral", "Neutral" },
95: { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
96: { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
97: { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
98: { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
99: { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
100: { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
101: { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
102: { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
103: { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
1.1.1.2 christos 104: { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
1.1 christos 105: { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
106: { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
107: { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
108: { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
109: { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
110: { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
111: { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
112: { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
113: { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
114: { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
115: { 0x042D, 850, 1252, "Basque", "Spain" },
116: { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
117: { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
118: { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
119: { 0x043C, 437, 1252, "Irish", "Ireland" },
120: { 0x043E, 850, 1252, "Malay", "Malaysia" },
121: { 0x0801, 864, 1256, "Arabic", "Iraq" },
122: { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
123: { 0x0807, 850, 1252, "German", "Switzerland" },
124: { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
125: { 0x080C, 850, 1252, "French", "Belgium" },
126: { 0x0810, 850, 1252, "Italian", "Switzerland" },
127: { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
128: { 0x0816, 850, 1252, "Portuguese", "Portugal" },
129: { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
130: { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
131: { 0x0C01, 864, 1256, "Arabic", "Egypt" },
132: { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
133: { 0x0C07, 850, 1252, "German", "Austria" },
134: { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
135: { 0x0C0C, 850, 1252, "French", "Canada"},
136: { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
137: { 0x1001, 864, 1256, "Arabic", "Libya" },
138: { 0x1004, 936, 936, "Chinese", "Singapore" },
139: { 0x1007, 850, 1252, "German", "Luxembourg" },
140: { 0x1009, 850, 1252, "English", "Canada" },
141: { 0x100A, 850, 1252, "Spanish", "Guatemala" },
142: { 0x100C, 850, 1252, "French", "Switzerland" },
143: { 0x1401, 864, 1256, "Arabic", "Algeria" },
144: { 0x1407, 850, 1252, "German", "Liechtenstein" },
145: { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
146: { 0x140C, 850, 1252, "French", "Luxembourg" },
147: { 0x1801, 864, 1256, "Arabic", "Morocco" },
148: { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
149: { 0x180C, 850, 1252, "French", "Monaco" },
150: { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
151: { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
152: { 0x2001, 864, 1256, "Arabic", "Oman" },
153: { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
154: { 0x2401, 864, 1256, "Arabic", "Yemen" },
155: { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
156: { 0x2801, 864, 1256, "Arabic", "Syria" },
157: { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
158: { 0x2C01, 864, 1256, "Arabic", "Jordan" },
159: { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
160: { 0x3001, 864, 1256, "Arabic", "Lebanon" },
161: { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
162: { 0x3401, 864, 1256, "Arabic", "Kuwait" },
163: { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
164: { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
165: { 0x380A, 850, 1252, "Spanish", "Uruguay" },
166: { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
167: { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
168: { 0x4001, 864, 1256, "Arabic", "Qatar" },
169: { 0x400A, 850, 1252, "Spanish", "Bolivia" },
170: { 0x440A, 850, 1252, "Spanish", "El Salvador" },
171: { 0x480A, 850, 1252, "Spanish", "Honduras" },
172: { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
173: { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
174: { (unsigned) -1, 0, 0, NULL, NULL }
175: };
176:
177: #endif
178:
179: /* Specifies the default codepage to be used for unicode
180: transformations. By default this is CP_ACP. */
181: rc_uint_type wind_default_codepage = CP_ACP;
182:
183: /* Specifies the currently used codepage for unicode
184: transformations. By default this is CP_ACP. */
185: rc_uint_type wind_current_codepage = CP_ACP;
186:
187: /* Convert an ASCII string to a unicode string. We just copy it,
188: expanding chars to shorts, rather than doing something intelligent. */
189:
190: void
191: unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
192: {
193: unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
194: }
195:
196: /* Convert an ASCII string with length A_LENGTH to a unicode string. We just
197: copy it, expanding chars to shorts, rather than doing something intelligent.
198: This routine converts also \0 within a string. */
199:
200: void
201: unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
202: {
203: char *tmp, *p;
204: rc_uint_type tlen, elen, idx = 0;
205:
206: *unicode = NULL;
207:
208: if (!a_length)
209: {
210: if (length)
211: *length = 0;
212: return;
213: }
214:
215: /* Make sure we have zero terminated string. */
1.1.1.3 ! christos 216: p = tmp = (char *) xmalloc (a_length + 1);
1.1 christos 217: memcpy (tmp, ascii, a_length);
218: tmp[a_length] = 0;
219:
220: while (a_length > 0)
221: {
222: unichar *utmp, *up;
223:
224: tlen = strlen (p);
225:
226: if (tlen > a_length)
227: tlen = a_length;
228: if (*p == 0)
229: {
230: /* Make room for one more character. */
231: utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
232: if (idx > 0)
233: {
234: memcpy (utmp, *unicode, idx * sizeof (unichar));
235: }
236: *unicode = utmp;
237: utmp[idx++] = 0;
238: --a_length;
239: p++;
240: continue;
241: }
242: utmp = NULL;
243: elen = 0;
244: elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
245: if (elen)
246: {
247: utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
248: wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
249: elen /= sizeof (unichar);
250: elen --;
251: }
252: else
253: {
254: /* Make room for one more character. */
255: utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
256: if (idx > 0)
257: {
258: memcpy (utmp, *unicode, idx * sizeof (unichar));
259: }
260: *unicode = utmp;
261: utmp[idx++] = ((unichar) *p) & 0xff;
262: --a_length;
263: p++;
264: continue;
265: }
266: p += tlen;
267: a_length -= tlen;
268:
269: up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
270: if (idx > 0)
271: memcpy (up, *unicode, idx * sizeof (unichar));
272:
273: *unicode = up;
274: if (elen)
275: memcpy (&up[idx], utmp, sizeof (unichar) * elen);
276:
277: idx += elen;
278: }
279:
280: if (length)
281: *length = idx;
1.1.1.3 ! christos 282:
! 283: free (tmp);
1.1 christos 284: }
285:
286: /* Convert an unicode string to an ASCII string. We just copy it,
287: shrink shorts to chars, rather than doing something intelligent.
288: Shorts with not within the char range are replaced by '_'. */
289:
290: void
291: ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
292: {
293: codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
294: }
295:
296: /* Print the unicode string UNICODE to the file E. LENGTH is the
297: number of characters to print, or -1 if we should print until the
298: end of the string. FIXME: On a Windows host, we should be calling
299: some Windows function, probably WideCharToMultiByte. */
300:
301: void
302: unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
303: {
304: while (1)
305: {
306: unichar ch;
307:
308: if (length == 0)
309: return;
310: if ((bfd_signed_vma) length > 0)
311: --length;
312:
313: ch = *unicode;
314:
315: if (ch == 0 && (bfd_signed_vma) length < 0)
316: return;
317:
318: ++unicode;
319:
320: if ((ch & 0x7f) == ch)
321: {
322: if (ch == '\\')
323: fputs ("\\\\", e);
324: else if (ch == '"')
325: fputs ("\"\"", e);
326: else if (ISPRINT (ch))
327: putc (ch, e);
328: else
329: {
330: switch (ch)
331: {
332: case ESCAPE_A:
333: fputs ("\\a", e);
334: break;
335:
336: case ESCAPE_B:
337: fputs ("\\b", e);
338: break;
339:
340: case ESCAPE_F:
341: fputs ("\\f", e);
342: break;
343:
344: case ESCAPE_N:
345: fputs ("\\n", e);
346: break;
347:
348: case ESCAPE_R:
349: fputs ("\\r", e);
350: break;
351:
352: case ESCAPE_T:
353: fputs ("\\t", e);
354: break;
355:
356: case ESCAPE_V:
357: fputs ("\\v", e);
358: break;
359:
360: default:
361: fprintf (e, "\\%03o", (unsigned int) ch);
362: break;
363: }
364: }
365: }
366: else if ((ch & 0xff) == ch)
367: fprintf (e, "\\%03o", (unsigned int) ch);
368: else
369: fprintf (e, "\\x%04x", (unsigned int) ch);
370: }
371: }
372:
373: /* Print a unicode string to a file. */
374:
375: void
376: ascii_print (FILE *e, const char *s, rc_uint_type length)
377: {
378: while (1)
379: {
380: char ch;
381:
382: if (length == 0)
383: return;
384: if ((bfd_signed_vma) length > 0)
385: --length;
386:
387: ch = *s;
388:
389: if (ch == 0 && (bfd_signed_vma) length < 0)
390: return;
391:
392: ++s;
393:
394: if ((ch & 0x7f) == ch)
395: {
396: if (ch == '\\')
397: fputs ("\\\\", e);
398: else if (ch == '"')
399: fputs ("\"\"", e);
400: else if (ISPRINT (ch))
401: putc (ch, e);
402: else
403: {
404: switch (ch)
405: {
406: case ESCAPE_A:
407: fputs ("\\a", e);
408: break;
409:
410: case ESCAPE_B:
411: fputs ("\\b", e);
412: break;
413:
414: case ESCAPE_F:
415: fputs ("\\f", e);
416: break;
417:
418: case ESCAPE_N:
419: fputs ("\\n", e);
420: break;
421:
422: case ESCAPE_R:
423: fputs ("\\r", e);
424: break;
425:
426: case ESCAPE_T:
427: fputs ("\\t", e);
428: break;
429:
430: case ESCAPE_V:
431: fputs ("\\v", e);
432: break;
433:
434: default:
435: fprintf (e, "\\%03o", (unsigned int) ch);
436: break;
437: }
438: }
439: }
440: else
441: fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
442: }
443: }
444:
445: rc_uint_type
446: unichar_len (const unichar *unicode)
447: {
448: rc_uint_type r = 0;
449:
450: if (unicode)
451: while (unicode[r] != 0)
452: r++;
453: else
454: --r;
455: return r;
456: }
457:
458: unichar *
459: unichar_dup (const unichar *unicode)
460: {
461: unichar *r;
462: int len;
463:
464: if (! unicode)
465: return NULL;
466: for (len = 0; unicode[len] != 0; ++len)
467: ;
468: ++len;
469: r = ((unichar *) res_alloc (len * sizeof (unichar)));
470: memcpy (r, unicode, len * sizeof (unichar));
471: return r;
472: }
473:
474: unichar *
475: unichar_dup_uppercase (const unichar *u)
476: {
477: unichar *r = unichar_dup (u);
478: int i;
479:
480: if (! r)
481: return NULL;
482:
483: for (i = 0; r[i] != 0; ++i)
484: {
485: if (r[i] >= 'a' && r[i] <= 'z')
486: r[i] &= 0xdf;
487: }
488: return r;
489: }
490:
491: static int
492: unichar_isascii (const unichar *u, rc_uint_type len)
493: {
494: rc_uint_type i;
495:
496: if ((bfd_signed_vma) len < 0)
497: {
498: if (u)
499: len = (rc_uint_type) unichar_len (u);
500: else
501: len = 0;
502: }
503:
504: for (i = 0; i < len; i++)
505: if ((u[i] & 0xff80) != 0)
506: return 0;
507: return 1;
508: }
509:
510: void
511: unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
512: {
513: if (! unichar_isascii (u, len))
514: fputc ('L', e);
515: fputc ('"', e);
516: unicode_print (e, u, len);
517: fputc ('"', e);
518: }
519:
520: int
521: unicode_is_valid_codepage (rc_uint_type cp)
522: {
523: if ((cp & 0xffff) != cp)
524: return 0;
525: if (cp == CP_UTF16 || cp == CP_ACP)
526: return 1;
527:
528: #if !defined (_WIN32) && !defined (__CYGWIN__)
529: if (! wind_find_codepage_info (cp))
530: return 0;
531: return 1;
532: #else
533: return !! IsValidCodePage ((UINT) cp);
534: #endif
535: }
536:
537: #if defined (_WIN32) || defined (__CYGWIN__)
538:
539: #define max_cp_string_len 6
540:
541: static unsigned int
542: codepage_from_langid (unsigned short langid)
543: {
544: char cp_string [max_cp_string_len];
545: int c;
546:
547: memset (cp_string, 0, max_cp_string_len);
548: /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
549: but is unavailable on Win95. */
550: c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
551: LOCALE_IDEFAULTANSICODEPAGE,
552: cp_string, max_cp_string_len);
553: /* If codepage data for an LCID is not installed on users's system,
554: GetLocaleInfo returns an empty string. Fall back to system ANSI
555: default. */
556: if (c == 0)
557: return CP_ACP;
558: return strtoul (cp_string, 0, 10);
559: }
560:
561: static unsigned int
562: wincodepage_from_langid (unsigned short langid)
563: {
564: char cp_string [max_cp_string_len];
565: int c;
566:
567: memset (cp_string, 0, max_cp_string_len);
568: /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
569: but is unavailable on Win95. */
570: c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
571: LOCALE_IDEFAULTCODEPAGE,
572: cp_string, max_cp_string_len);
573: /* If codepage data for an LCID is not installed on users's system,
574: GetLocaleInfo returns an empty string. Fall back to system ANSI
575: default. */
576: if (c == 0)
577: return CP_OEM;
578: return strtoul (cp_string, 0, 10);
579: }
580:
581: static char *
582: lang_from_langid (unsigned short langid)
583: {
584: char cp_string[261];
585: int c;
586:
587: memset (cp_string, 0, 261);
588: c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
589: LOCALE_SENGLANGUAGE,
590: cp_string, 260);
591: /* If codepage data for an LCID is not installed on users's system,
592: GetLocaleInfo returns an empty string. Fall back to system ANSI
593: default. */
594: if (c == 0)
595: strcpy (cp_string, "Neutral");
596: return xstrdup (cp_string);
597: }
598:
599: static char *
600: country_from_langid (unsigned short langid)
601: {
602: char cp_string[261];
603: int c;
604:
605: memset (cp_string, 0, 261);
606: c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
607: LOCALE_SENGCOUNTRY,
608: cp_string, 260);
609: /* If codepage data for an LCID is not installed on users's system,
610: GetLocaleInfo returns an empty string. Fall back to system ANSI
611: default. */
612: if (c == 0)
613: strcpy (cp_string, "Neutral");
614: return xstrdup (cp_string);
615: }
616:
617: #endif
618:
619: const wind_language_t *
620: wind_find_language_by_id (unsigned id)
621: {
622: #if !defined (_WIN32) && !defined (__CYGWIN__)
623: int i;
624:
625: if (! id)
626: return NULL;
627: for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
628: ;
629: if (languages[i].id == id)
630: return &languages[i];
631: return NULL;
632: #else
633: static wind_language_t wl;
634:
635: wl.id = id;
636: wl.doscp = codepage_from_langid ((unsigned short) id);
637: wl.wincp = wincodepage_from_langid ((unsigned short) id);
638: wl.name = lang_from_langid ((unsigned short) id);
639: wl.country = country_from_langid ((unsigned short) id);
640:
641: return & wl;
642: #endif
643: }
644:
645: const local_iconv_map *
646: wind_find_codepage_info (unsigned cp)
647: {
648: #if !defined (_WIN32) && !defined (__CYGWIN__)
649: int i;
650:
651: for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
652: ;
653: if (codepages[i].codepage == (rc_uint_type) -1)
654: return NULL;
655: return &codepages[i];
656: #else
657: static local_iconv_map lim;
658: if (!unicode_is_valid_codepage (cp))
659: return NULL;
660: lim.codepage = cp;
661: lim.iconv_name = "";
662: return & lim;
663: #endif
664: }
665:
666: /* Convert an Codepage string to a unicode string. */
667:
668: void
669: unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
670: {
671: rc_uint_type len;
672:
673: len = wind_MultiByteToWideChar (cp, src, NULL, 0);
674: if (len)
675: {
676: *u = ((unichar *) res_alloc (len));
677: wind_MultiByteToWideChar (cp, src, *u, len);
678: }
679: /* Discount the trailing '/0'. If MultiByteToWideChar failed,
680: this will set *length to -1. */
681: len -= sizeof (unichar);
682:
683: if (length != NULL)
684: *length = len / sizeof (unichar);
685: }
686:
687: /* Convert an unicode string to an codepage string. */
688:
689: void
690: codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
691: {
692: rc_uint_type len;
693:
694: len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
695: if (len)
696: {
697: *ascii = (char *) res_alloc (len * sizeof (char));
698: wind_WideCharToMultiByte (cp, unicode, *ascii, len);
699: }
700: /* Discount the trailing '/0'. If MultiByteToWideChar failed,
701: this will set *length to -1. */
702: len--;
703:
704: if (length != NULL)
705: *length = len;
706: }
707:
708: #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
709: static int
710: iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
711: {
712: int i;
713:
714: for (i = 1; i <= 32; i++)
715: {
716: char *tmp_d = d;
717: ICONV_CONST char *tmp_s = s;
718: size_t ret;
719: size_t s_left = (size_t) i;
720: size_t d_left = (size_t) d_len;
721:
722: ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
723:
724: if (ret != (size_t) -1)
725: {
726: *n_s = tmp_s;
727: *n_d = tmp_d;
728: return 0;
729: }
730: }
731:
732: return 1;
733: }
734:
735: static const char *
736: wind_iconv_cp (rc_uint_type cp)
737: {
738: const local_iconv_map *lim = wind_find_codepage_info (cp);
739:
740: if (!lim)
741: return NULL;
742: return lim->iconv_name;
743: }
744: #endif /* HAVE_ICONV */
745:
746: static rc_uint_type
747: wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
748: unichar *u, rc_uint_type u_len)
749: {
750: rc_uint_type ret = 0;
751:
752: #if defined (_WIN32) || defined (__CYGWIN__)
753: rc_uint_type conv_flags = MB_PRECOMPOSED;
754:
1.1.1.2 christos 755: /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
1.1 christos 756: MultiByteToWideChar will set the last error to
757: ERROR_INVALID_FLAGS if we do. */
758: if (cp == CP_UTF8 || cp == CP_UTF7)
759: conv_flags = 0;
760:
761: ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
762: mb, -1, u, u_len);
763: /* Convert to bytes. */
764: ret *= sizeof (unichar);
765:
766: #elif defined (HAVE_ICONV)
767: int first = 1;
768: char tmp[32];
769: char *p_tmp;
770: const char *iconv_name = wind_iconv_cp (cp);
771:
772: if (!mb || !iconv_name)
773: return 0;
1.1.1.2 christos 774: iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
1.1 christos 775:
776: while (1)
777: {
778: int iret;
779: const char *n_mb = "";
780: char *n_tmp = "";
781:
782: p_tmp = tmp;
783: iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
784: if (first)
785: {
786: first = 0;
787: continue;
788: }
789: if (!iret)
790: {
791: size_t l_tmp = (size_t) (n_tmp - p_tmp);
792:
793: if (u)
794: {
795: if ((size_t) u_len < l_tmp)
796: break;
797: memcpy (u, tmp, l_tmp);
798: u += l_tmp/2;
799: u_len -= l_tmp;
800: }
801: ret += l_tmp;
802: }
803: else
804: break;
805: if (tmp[0] == 0 && tmp[1] == 0)
806: break;
807: mb = n_mb;
808: }
809: iconv_close (cd);
810: #else
811: if (cp)
812: ret = 0;
813: ret = strlen (mb) + 1;
814: ret *= sizeof (unichar);
815: if (u != NULL && u_len != 0)
816: {
817: do
818: {
819: *u++ = ((unichar) *mb) & 0xff;
820: --u_len; mb++;
821: }
822: while (u_len != 0 && mb[-1] != 0);
823: }
824: if (u != NULL && u_len != 0)
825: *u = 0;
826: #endif
827: return ret;
828: }
829:
830: static rc_uint_type
831: wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
832: {
833: rc_uint_type ret = 0;
834: #if defined (_WIN32) || defined (__CYGWIN__)
835: WINBOOL used_def = FALSE;
836:
837: ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
838: NULL, & used_def);
839: #elif defined (HAVE_ICONV)
840: int first = 1;
841: char tmp[32];
842: char *p_tmp;
843: const char *iconv_name = wind_iconv_cp (cp);
844:
845: if (!u || !iconv_name)
846: return 0;
1.1.1.2 christos 847: iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
1.1 christos 848:
849: while (1)
850: {
851: int iret;
852: const char *n_u = "";
853: char *n_tmp = "";
854:
855: p_tmp = tmp;
856: iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
857: if (first)
858: {
859: first = 0;
860: continue;
861: }
862: if (!iret)
863: {
864: size_t l_tmp = (size_t) (n_tmp - p_tmp);
865:
866: if (mb)
867: {
868: if ((size_t) mb_len < l_tmp)
869: break;
870: memcpy (mb, tmp, l_tmp);
871: mb += l_tmp;
872: mb_len -= l_tmp;
873: }
874: ret += l_tmp;
875: }
876: else
877: break;
878: if (u[0] == 0)
879: break;
880: u = (const unichar *) n_u;
881: }
882: iconv_close (cd);
883: #else
884: if (cp)
885: ret = 0;
886:
887: while (u[ret] != 0)
888: ++ret;
889:
890: ++ret;
891:
892: if (mb)
893: {
894: while (*u != 0 && mb_len != 0)
895: {
896: if (u[0] == (u[0] & 0x7f))
897: *mb++ = (char) u[0];
898: else
899: *mb++ = '_';
900: ++u; --mb_len;
901: }
902: if (mb_len != 0)
903: *mb = 0;
904: }
905: #endif
906: return ret;
907: }
CVSweb <webmaster@jp.NetBSD.org>