[BACK]Return to unicode.h CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / fs

Annotation of src/sys/fs/unicode.h, Revision 1.7

1.7     ! jakllsch    1: /* $NetBSD: unicode.h,v 1.6 2008/04/28 20:24:02 martin Exp $ */
1.1       jdolecek    2:
                      3: /*-
                      4:  * Copyright (c) 2001, 2004 The NetBSD Foundation, Inc.
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  *
                     16:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     17:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     18:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     19:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     20:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     21:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     22:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     23:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     24:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     25:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     26:  * POSSIBILITY OF SUCH DAMAGE.
                     27:  */
                     28:
1.4       yamt       29: /*-
                     30:  * Copyright (c) 1993
                     31:  *     The Regents of the University of California.  All rights reserved.
                     32:  *
                     33:  * This code is derived from software contributed to Berkeley by
                     34:  * Paul Borman at Krystal Technologies.
                     35:  *
                     36:  * Redistribution and use in source and binary forms, with or without
                     37:  * modification, are permitted provided that the following conditions
                     38:  * are met:
                     39:  * 1. Redistributions of source code must retain the above copyright
                     40:  *    notice, this list of conditions and the following disclaimer.
                     41:  * 2. Redistributions in binary form must reproduce the above copyright
                     42:  *    notice, this list of conditions and the following disclaimer in the
                     43:  *    documentation and/or other materials provided with the distribution.
                     44:  * 3. All advertising materials mentioning features or use of this software
                     45:  *    must display the following acknowledgement:
                     46:  *     This product includes software developed by the University of
                     47:  *     California, Berkeley and its contributors.
                     48:  * 4. Neither the name of the University nor the names of its contributors
                     49:  *    may be used to endorse or promote products derived from this software
                     50:  *    without specific prior written permission.
                     51:  *
                     52:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     53:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     54:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     55:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     56:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     57:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     58:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     59:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     60:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     61:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     62:  * SUCH DAMAGE.
                     63:  */
                     64:
1.1       jdolecek   65: /*
                     66:  * Routines for handling Unicode encoded in UTF-8 form, code derived from
                     67:  * src/lib/libc/locale/utf2.c.
                     68:  */
1.5       drochner   69: static u_int16_t wget_utf8(const char **, size_t *) __unused;
                     70: static int wput_utf8(char *, size_t, u_int16_t) __unused;
1.1       jdolecek   71:
                     72: /*
1.2       jdolecek   73:  * Read one UTF8-encoded character off the string, shift the string pointer
1.1       jdolecek   74:  * and return the character.
                     75:  */
                     76: static u_int16_t
1.2       jdolecek   77: wget_utf8(const char **str, size_t *sz)
1.1       jdolecek   78: {
1.7     ! jakllsch   79:        size_t c;
1.1       jdolecek   80:        u_int16_t rune = 0;
                     81:        const char *s = *str;
                     82:        static const int _utf_count[16] = {
                     83:                1, 1, 1, 1, 1, 1, 1, 1,
                     84:                0, 0, 0, 0, 2, 2, 3, 0,
                     85:        };
                     86:
1.2       jdolecek   87:        /* must be called with at least one byte remaining */
                     88:        KASSERT(*sz > 0);
1.1       jdolecek   89:
                     90:        c = _utf_count[(s[0] & 0xf0) >> 4];
1.2       jdolecek   91:        if (c == 0 || c > *sz) {
1.1       jdolecek   92:     decoding_error:
                     93:                /*
                     94:                 * The first character is in range 128-255 and doesn't
                     95:                 * mark valid a valid UTF-8 sequence. There is not much
                     96:                 * we can do with this, so handle by returning
                     97:                 * the first character as if it would be a correctly
                     98:                 * encoded ISO-8859-1 character.
                     99:                 */
                    100:                c = 1;
                    101:        }
                    102:
                    103:        switch (c) {
                    104:        case 1:
                    105:                rune = s[0] & 0xff;
                    106:                break;
                    107:        case 2:
                    108:                if ((s[1] & 0xc0) != 0x80)
                    109:                        goto decoding_error;
                    110:                rune = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
                    111:                break;
                    112:        case 3:
                    113:                if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
                    114:                        goto decoding_error;
1.5       drochner  115:                rune = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6)
1.1       jdolecek  116:                    | (s[2] & 0x3F);
                    117:                break;
                    118:        }
                    119:
1.2       jdolecek  120:        *str += c;
                    121:        *sz -= c;
1.1       jdolecek  122:        return rune;
                    123: }
                    124:
                    125: /*
                    126:  * Encode wide character and write it to the string. 'n' specifies
                    127:  * how much buffer space remains in 's'. Returns number of bytes written
                    128:  * to the target string 's'.
                    129:  */
                    130: static int
                    131: wput_utf8(char *s, size_t n, u_int16_t wc)
                    132: {
                    133:        if (wc & 0xf800) {
                    134:                if (n < 3) {
                    135:                        /* bound check failure */
                    136:                        return 0;
                    137:                }
                    138:
1.5       drochner  139:                s[0] = 0xE0 | (wc >> 12);
1.1       jdolecek  140:                s[1] = 0x80 | ((wc >> 6) & 0x3F);
                    141:                s[2] = 0x80 | ((wc) & 0x3F);
                    142:                return 3;
                    143:        } else if (wc & 0x0780) {
                    144:                if (n < 2) {
                    145:                        /* bound check failure */
                    146:                        return 0;
                    147:                }
                    148:
1.5       drochner  149:                s[0] = 0xC0 | (wc >> 6);
1.1       jdolecek  150:                s[1] = 0x80 | ((wc) & 0x3F);
                    151:                return 2;
                    152:        } else {
                    153:                if (n < 1) {
                    154:                        /* bound check failure */
                    155:                        return 0;
                    156:                }
                    157:
                    158:                s[0] = wc;
                    159:                return 1;
                    160:        }
                    161: }

CVSweb <webmaster@jp.NetBSD.org>