Annotation of src/lib/libc/citrus/modules/citrus_ues.c, Revision 1.5
1.5 ! rillig 1: /* $NetBSD: citrus_ues.c,v 1.4 2013/05/28 16:57:56 joerg Exp $ */
1.1 tnozaki 2:
3: /*-
4: * Copyright (c)2006 Citrus Project,
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26: * SUCH DAMAGE.
27: */
28:
29: #include <sys/cdefs.h>
30: #if defined(LIBC_SCCS) && !defined(lint)
1.5 ! rillig 31: __RCSID("$NetBSD: citrus_ues.c,v 1.4 2013/05/28 16:57:56 joerg Exp $");
1.1 tnozaki 32: #endif /* LIBC_SCCS and not lint */
33:
34: #include <assert.h>
35: #include <errno.h>
36: #include <string.h>
37: #include <stdio.h>
38: #include <stdint.h>
39: #include <stdlib.h>
40: #include <limits.h>
41: #include <wchar.h>
42:
43: #include "citrus_namespace.h"
44: #include "citrus_types.h"
45: #include "citrus_bcs.h"
46: #include "citrus_module.h"
47: #include "citrus_ctype.h"
48: #include "citrus_stdenc.h"
49: #include "citrus_ues.h"
50:
51: typedef struct {
52: int mode;
53: #define MODE_C99 1
54: size_t mb_cur_max;
55: } _UESEncodingInfo;
56:
57: typedef struct {
58: int chlen;
59: char ch[12];
60: } _UESState;
61:
62: typedef struct {
63: _UESEncodingInfo ei;
64: struct {
65: /* for future multi-locale facility */
66: _UESState s_mblen;
67: _UESState s_mbrlen;
68: _UESState s_mbrtowc;
69: _UESState s_mbtowc;
70: _UESState s_mbsrtowcs;
1.4 joerg 71: _UESState s_mbsnrtowcs;
1.1 tnozaki 72: _UESState s_wcrtomb;
73: _UESState s_wcsrtombs;
1.4 joerg 74: _UESState s_wcsnrtombs;
1.1 tnozaki 75: _UESState s_wctomb;
76: } states;
77: } _UESCTypeInfo;
78:
79: #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
80: #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
81:
82: #define _FUNCNAME(m) _citrus_UES_##m
83: #define _ENCODING_INFO _UESEncodingInfo
84: #define _CTYPE_INFO _UESCTypeInfo
85: #define _ENCODING_STATE _UESState
86: #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max
87: #define _ENCODING_IS_STATE_DEPENDENT 0
88: #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0
89:
90: static __inline void
91: /*ARGSUSED*/
92: _citrus_UES_init_state(_UESEncodingInfo * __restrict ei,
93: _UESState * __restrict psenc)
94: {
95: psenc->chlen = 0;
96: }
97:
98: static __inline void
99: /*ARGSUSED*/
100: _citrus_UES_pack_state(_UESEncodingInfo * __restrict ei,
101: void *__restrict pspriv, const _UESState * __restrict psenc)
102: {
103: /* ei seem to be unused */
104: _DIAGASSERT(pspriv != NULL);
105: _DIAGASSERT(psenc != NULL);
106:
107: memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
108: }
109:
110: static __inline void
111: /*ARGSUSED*/
112: _citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei,
113: _UESState * __restrict psenc, const void * __restrict pspriv)
114: {
115: /* ei seem to be unused */
116: _DIAGASSERT(psenc != NULL);
117: _DIAGASSERT(pspriv != NULL);
118:
119: memcpy((void *)psenc, pspriv, sizeof(*psenc));
120: }
121:
122: static __inline int
123: to_int(int ch)
124: {
125: if (ch >= '0' && ch <= '9')
126: return ch - '0';
127: else if (ch >= 'A' && ch <= 'F')
128: return (ch - 'A') + 10;
129: else if (ch >= 'a' && ch <= 'f')
130: return (ch - 'a') + 10;
131: return -1;
132: }
133:
134: #define ESCAPE '\\'
135: #define UCS2_ESC 'u'
136: #define UCS4_ESC 'U'
137:
138: #define UCS2_BIT 16
139: #define UCS4_BIT 32
140: #define BMP_MAX UINT32_C(0xFFFF)
141: #define UCS2_MAX UINT32_C(0x10FFFF)
142: #define UCS4_MAX UINT32_C(0x7FFFFFFF)
143:
144: static const char *xdig = "0123456789abcdef";
145:
146: static __inline int
147: to_str(char *s, wchar_t wc, int bit)
148: {
149: char *p;
150:
151: p = s;
152: *p++ = ESCAPE;
153: switch (bit) {
154: case UCS2_BIT:
155: *p++ = UCS2_ESC;
156: break;
157: case UCS4_BIT:
158: *p++ = UCS4_ESC;
159: break;
160: default:
161: abort();
162: }
163: do {
164: *p++ = xdig[(wc >> (bit -= 4)) & 0xF];
165: } while (bit > 0);
166: return p - s;
167: }
168:
169: static __inline int
170: is_hi_surrogate(wchar_t wc)
171: {
172: return wc >= 0xD800 && wc <= 0xDBFF;
173: }
174:
175: static __inline int
176: is_lo_surrogate(wchar_t wc)
177: {
178: return wc >= 0xDC00 && wc <= 0xDFFF;
179: }
180:
181: static __inline wchar_t
182: surrogate_to_ucs(wchar_t hi, wchar_t lo)
183: {
184: _DIAGASSERT(is_hi_surrogate(hi));
185: _DIAGASSERT(is_lo_surrogate(lo));
186:
187: hi -= 0xD800;
188: lo -= 0xDC00;
189: return (hi << 10 | lo) + 0x10000;
190: }
191:
192: static __inline void
193: ucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo)
194: {
195: _DIAGASSERT(hi != NULL);
196: _DIAGASSERT(lo != NULL);
197: _DIAGASSERT(wc >= 0x10000);
198:
199: wc -= 0x10000;
200: *hi = (wc >> 10) + 0xD800;
201: *lo = (wc & 0x3FF) + 0xDC00;
202: }
203:
204: static __inline int
205: is_basic(wchar_t wc)
206: {
207: return (uint32_t)wc <= 0x9F &&
208: wc != 0x24 && wc != 0x40 && wc != 0x60;
209: }
210:
211: static int
212: _citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei,
213: wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
214: _UESState * __restrict psenc, size_t * __restrict nresult)
215: {
216: const char *s0;
1.3 wiz 217: int ch, head, tail, num;
1.1 tnozaki 218: wchar_t hi, wc;
219:
220: _DIAGASSERT(ei != NULL);
221: /* pwc may be null */
222: _DIAGASSERT(s != NULL);
223: _DIAGASSERT(psenc != NULL);
224: _DIAGASSERT(nresult != NULL);
225:
226: if (*s == NULL) {
227: _citrus_UES_init_state(ei, psenc);
228: *nresult = 0;
229: return 0;
230: }
231: s0 = *s;
232:
233: hi = (wchar_t)0;
234: tail = 0;
235:
236: surrogate:
237: wc = (wchar_t)0;
238: head = tail;
239: if (psenc->chlen == head) {
240: if (n-- < 1)
241: goto restart;
242: psenc->ch[psenc->chlen++] = *s0++;
243: }
244: ch = (unsigned char)psenc->ch[head++];
245: if (ch == ESCAPE) {
246: if (psenc->chlen == head) {
247: if (n-- < 1)
248: goto restart;
249: psenc->ch[psenc->chlen++] = *s0++;
250: }
251: switch (psenc->ch[head]) {
252: case UCS2_ESC:
253: tail += 6;
254: break;
255: case UCS4_ESC:
256: if (ei->mode & MODE_C99) {
257: tail = 10;
258: break;
259: }
260: /*FALLTHROUGH*/
261: default:
262: tail = 0;
263: }
264: ++head;
265: }
266: for (; head < tail; ++head) {
267: if (psenc->chlen == head) {
268: if (n-- < 1) {
269: restart:
270: *s = s0;
271: *nresult = (size_t)-2;
272: return 0;
273: }
274: psenc->ch[psenc->chlen++] = *s0++;
275: }
276: num = to_int((int)(unsigned char)psenc->ch[head]);
277: if (num < 0) {
278: tail = 0;
279: break;
280: }
281: wc = (wc << 4) | num;
282: }
283: head = 0;
284: switch (tail) {
285: case 0:
286: break;
287: case 6:
288: if (hi != (wchar_t)0)
289: break;
290: if ((ei->mode & MODE_C99) == 0) {
291: if (is_hi_surrogate(wc) != 0) {
292: hi = wc;
293: goto surrogate;
294: }
295: if ((uint32_t)wc <= 0x7F /* XXX */ ||
296: is_lo_surrogate(wc) != 0)
297: break;
298: goto done;
299: }
300: /*FALLTHROUGH*/
301: case 10:
302: if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX &&
303: is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0)
304: goto done;
305: *nresult = (size_t)-1;
306: return EILSEQ;
307: case 12:
308: if (is_lo_surrogate(wc) == 0)
309: break;
310: wc = surrogate_to_ucs(hi, wc);
311: goto done;
312: }
313: ch = (unsigned char)psenc->ch[0];
314: head = psenc->chlen;
315: if (--head > 0)
316: memmove(&psenc->ch[0], &psenc->ch[1], head);
317: wc = (wchar_t)ch;
318: done:
319: psenc->chlen = head;
320: if (pwc != NULL)
321: *pwc = wc;
322: *nresult = (size_t)((wc == 0) ? 0 : (s0 - *s));
323: *s = s0;
324:
325: return 0;
326: }
327:
328: static int
329: _citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei,
330: char * __restrict s, size_t n, wchar_t wc,
331: _UESState * __restrict psenc, size_t * __restrict nresult)
332: {
333: wchar_t hi, lo;
334:
335: if (psenc->chlen != 0)
336: return EINVAL;
337:
338: if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) {
339: if (n-- < 1)
340: goto e2big;
341: psenc->ch[psenc->chlen++] = (char)wc;
342: } else if ((uint32_t)wc <= BMP_MAX) {
343: if (n < 6)
344: goto e2big;
345: psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT);
346: } else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) {
347: if (n < 12)
348: goto e2big;
349: ucs_to_surrogate(wc, &hi, &lo);
350: psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT);
351: psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT);
352: } else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) {
353: if (n < 10)
354: goto e2big;
355: psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT);
356: } else {
357: *nresult = (size_t)-1;
358: return EILSEQ;
359: }
360: memcpy(s, psenc->ch, psenc->chlen);
361: *nresult = psenc->chlen;
362: psenc->chlen = 0;
363:
364: return 0;
365:
366: e2big:
367: *nresult = (size_t)-1;
368: return E2BIG;
369: }
370:
371: /*ARGSUSED*/
1.2 joerg 372: static int
1.1 tnozaki 373: _citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei,
374: _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
375: {
376: /* ei seem to be unused */
377: _DIAGASSERT(csid != NULL);
378: _DIAGASSERT(idx != NULL);
379:
380: *csid = 0;
381: *idx = (_index_t)wc;
382:
383: return 0;
384: }
385:
386: static __inline int
387: /*ARGSUSED*/
388: _citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei,
389: wchar_t * __restrict wc, _csid_t csid, _index_t idx)
390: {
391: /* ei seem to be unused */
392: _DIAGASSERT(wc != NULL);
393:
394: if (csid != 0)
395: return EILSEQ;
396: *wc = (wchar_t)idx;
397:
398: return 0;
399: }
400:
401: static __inline int
402: /*ARGSUSED*/
403: _citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei,
404: _UESState * __restrict psenc, int * __restrict rstate)
405: {
406: _DIAGASSERT(psenc != NULL);
407: _DIAGASSERT(rstate != NULL);
408:
409: if (psenc->chlen == 0)
410: *rstate = _STDENC_SDGEN_INITIAL;
411: else
412: *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; /* XXX */
413:
414: return 0;
415: }
416:
417: static void
418: /*ARGSUSED*/
419: _citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei)
420: {
421: /* ei seems to be unused */
422: }
423:
424: static int
425: /*ARGSUSED*/
426: _citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei,
427: const void * __restrict var, size_t lenvar)
428: {
429: const char *p;
430:
431: _DIAGASSERT(ei != NULL);
432:
433: p = var;
434: #define MATCH(x, act) \
435: do { \
436: if (lenvar >= (sizeof(#x)-1) && \
437: _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) { \
438: act; \
439: lenvar -= sizeof(#x)-1; \
440: p += sizeof(#x)-1; \
441: } \
1.5 ! rillig 442: } while (0)
1.1 tnozaki 443: memset((void *)ei, 0, sizeof(*ei));
444: while (lenvar > 0) {
445: switch (_bcs_toupper(*p)) {
446: case 'C':
447: MATCH(C99, ei->mode |= MODE_C99);
448: break;
449: }
450: ++p;
451: --lenvar;
452: }
453: ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12;
454:
455: return 0;
456: }
457:
458: /* ----------------------------------------------------------------------
459: * public interface for ctype
460: */
461:
462: _CITRUS_CTYPE_DECLS(UES);
463: _CITRUS_CTYPE_DEF_OPS(UES);
464:
465: #include "citrus_ctype_template.h"
466:
467: /* ----------------------------------------------------------------------
468: * public interface for stdenc
469: */
470:
471: _CITRUS_STDENC_DECLS(UES);
472: _CITRUS_STDENC_DEF_OPS(UES);
473:
474: #include "citrus_stdenc_template.h"
CVSweb <webmaster@jp.NetBSD.org>