[BACK]Return to chartype.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / lib / libedit

Annotation of src/lib/libedit/chartype.c, Revision 1.13

1.13    ! christos    1: /*     $NetBSD: chartype.c,v 1.12 2015/02/22 02:16:19 christos Exp $   */
1.1       christos    2:
                      3: /*-
                      4:  * Copyright (c) 2009 The NetBSD Foundation, Inc.
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  * 3. All advertising materials mentioning features or use of this software
                     16:  *    must display the following acknowledgement:
                     17:  *        This product includes software developed by the NetBSD
                     18:  *        Foundation, Inc. and its contributors.
                     19:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     20:  *    contributors may be used to endorse or promote products derived
                     21:  *    from this software without specific prior written permission.
                     22:  *
                     23:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     24:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     25:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     26:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     27:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     28:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     29:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     30:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     31:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     32:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     33:  * POSSIBILITY OF SUCH DAMAGE.
                     34:  */
                     35:
                     36: /*
                     37:  * chartype.c: character classification and meta information
                     38:  */
                     39: #include "config.h"
                     40: #if !defined(lint) && !defined(SCCSID)
1.13    ! christos   41: __RCSID("$NetBSD: chartype.c,v 1.12 2015/02/22 02:16:19 christos Exp $");
1.1       christos   42: #endif /* not lint && not SCCSID */
                     43: #include "el.h"
                     44: #include <stdlib.h>
                     45:
1.9       christos   46: #define CT_BUFSIZ ((size_t)1024)
1.1       christos   47:
                     48: #ifdef WIDECHAR
1.11      christos   49: protected int
1.12      christos   50: ct_conv_cbuff_resize(ct_buffer_t *conv, size_t csize)
1.1       christos   51: {
                     52:        void *p;
1.12      christos   53:
                     54:        if (csize <= conv->csize)
                     55:                return 0;
                     56:
                     57:        conv->csize = csize;
                     58:
                     59:        p = el_realloc(conv->cbuff, conv->csize * sizeof(*conv->cbuff));
                     60:        if (p == NULL) {
                     61:                conv->csize = 0;
                     62:                el_free(conv->cbuff);
                     63:                conv->cbuff = NULL;
                     64:                return -1;
1.1       christos   65:        }
1.12      christos   66:        conv->cbuff = p;
                     67:        return 0;
                     68: }
                     69:
                     70: protected int
                     71: ct_conv_wbuff_resize(ct_buffer_t *conv, size_t wsize)
                     72: {
                     73:        void *p;
                     74:
                     75:        if (wsize <= conv->wsize)
                     76:                return 0;
                     77:
                     78:        conv->wsize = wsize;
1.1       christos   79:
1.12      christos   80:        p = el_realloc(conv->wbuff, conv->wsize * sizeof(*conv->wbuff));
                     81:        if (p == NULL) {
                     82:                conv->wsize = 0;
                     83:                el_free(conv->wbuff);
                     84:                conv->wbuff = NULL;
                     85:                return -1;
1.1       christos   86:        }
1.12      christos   87:        conv->wbuff = p;
1.11      christos   88:        return 0;
1.1       christos   89: }
                     90:
                     91:
1.2       christos   92: public char *
1.1       christos   93: ct_encode_string(const Char *s, ct_buffer_t *conv)
                     94: {
                     95:        char *dst;
1.11      christos   96:        ssize_t used;
1.1       christos   97:
                     98:        if (!s)
                     99:                return NULL;
                    100:
                    101:        dst = conv->cbuff;
1.11      christos  102:        for (;;) {
                    103:                used = (ssize_t)(dst - conv->cbuff);
                    104:                if ((conv->csize - (size_t)used) < 5) {
1.12      christos  105:                        if (ct_conv_cbuff_resize(conv,
                    106:                            conv->csize + CT_BUFSIZ) == -1)
1.1       christos  107:                                return NULL;
                    108:                        dst = conv->cbuff + used;
                    109:                }
1.11      christos  110:                if (!*s)
                    111:                        break;
1.9       christos  112:                used = ct_encode_char(dst, (size_t)5, *s);
1.5       christos  113:                if (used == -1) /* failed to encode, need more buffer space */
                    114:                        abort();
                    115:                ++s;
1.1       christos  116:                dst += used;
                    117:        }
                    118:        *dst = '\0';
                    119:        return conv->cbuff;
                    120: }
                    121:
1.2       christos  122: public Char *
1.1       christos  123: ct_decode_string(const char *s, ct_buffer_t *conv)
                    124: {
1.11      christos  125:        size_t len;
1.1       christos  126:
                    127:        if (!s)
                    128:                return NULL;
                    129:
1.9       christos  130:        len = ct_mbstowcs(NULL, s, (size_t)0);
1.6       christos  131:        if (len == (size_t)-1)
                    132:                return NULL;
1.11      christos  133:
1.12      christos  134:        if (conv->wsize < ++len)
                    135:                if (ct_conv_wbuff_resize(conv, len + CT_BUFSIZ) == -1)
1.11      christos  136:                        return NULL;
                    137:
1.1       christos  138:        ct_mbstowcs(conv->wbuff, s, conv->wsize);
                    139:        return conv->wbuff;
                    140: }
                    141:
                    142:
                    143: protected Char **
                    144: ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv)
                    145: {
                    146:        size_t bufspace;
                    147:        int i;
                    148:        Char *p;
                    149:        Char **wargv;
                    150:        ssize_t bytes;
                    151:
                    152:        /* Make sure we have enough space in the conversion buffer to store all
                    153:         * the argv strings. */
                    154:        for (i = 0, bufspace = 0; i < argc; ++i)
                    155:                bufspace += argv[i] ? strlen(argv[i]) + 1 : 0;
1.12      christos  156:        if (conv->wsize < ++bufspace)
                    157:                if (ct_conv_wbuff_resize(conv, bufspace + CT_BUFSIZ) == -1)
1.11      christos  158:                        return NULL;
1.1       christos  159:
1.10      christos  160:        wargv = el_malloc((size_t)argc * sizeof(*wargv));
1.1       christos  161:
                    162:        for (i = 0, p = conv->wbuff; i < argc; ++i) {
                    163:                if (!argv[i]) {   /* don't pass null pointers to mbstowcs */
                    164:                        wargv[i] = NULL;
1.3       christos  165:                        continue;
1.1       christos  166:                } else {
                    167:                        wargv[i] = p;
1.10      christos  168:                        bytes = (ssize_t)mbstowcs(p, argv[i], bufspace);
1.1       christos  169:                }
                    170:                if (bytes == -1) {
                    171:                        el_free(wargv);
                    172:                        return NULL;
                    173:                } else
                    174:                        bytes++;  /* include '\0' in the count */
1.10      christos  175:                bufspace -= (size_t)bytes;
1.1       christos  176:                p += bytes;
                    177:        }
                    178:
                    179:        return wargv;
                    180: }
                    181:
                    182:
1.4       christos  183: protected size_t
                    184: ct_enc_width(Char c)
1.1       christos  185: {
                    186:        /* UTF-8 encoding specific values */
                    187:        if (c < 0x80)
                    188:                return 1;
                    189:        else if (c < 0x0800)
                    190:                return 2;
                    191:        else if (c < 0x10000)
                    192:                return 3;
                    193:        else if (c < 0x110000)
                    194:                return 4;
                    195:        else
                    196:                return 0; /* not a valid codepoint */
                    197: }
                    198:
                    199: protected ssize_t
                    200: ct_encode_char(char *dst, size_t len, Char c)
                    201: {
                    202:        ssize_t l = 0;
1.4       christos  203:        if (len < ct_enc_width(c))
1.1       christos  204:                return -1;
                    205:        l = ct_wctomb(dst, c);
                    206:
                    207:        if (l < 0) {
                    208:                ct_wctomb_reset;
                    209:                l = 0;
                    210:        }
                    211:        return l;
                    212: }
                    213: #endif
                    214:
                    215: protected const Char *
                    216: ct_visual_string(const Char *s)
                    217: {
                    218:        static Char *buff = NULL;
                    219:        static size_t buffsize = 0;
                    220:        void *p;
                    221:        Char *dst;
                    222:        ssize_t used = 0;
                    223:
                    224:        if (!s)
                    225:                return NULL;
                    226:        if (!buff) {
                    227:            buffsize = CT_BUFSIZ;
                    228:            buff = el_malloc(buffsize * sizeof(*buff));
                    229:        }
                    230:        dst = buff;
                    231:        while (*s) {
1.10      christos  232:                used = ct_visual_char(dst, buffsize - (size_t)(dst - buff), *s);
1.1       christos  233:                if (used == -1) { /* failed to encode, need more buffer space */
                    234:                        used = dst - buff;
                    235:                        buffsize += CT_BUFSIZ;
                    236:                        p = el_realloc(buff, buffsize * sizeof(*buff));
                    237:                        if (p == NULL)
                    238:                                goto out;
                    239:                        buff = p;
                    240:                        dst = buff + used;
                    241:                        /* don't increment s here - we want to retry it! */
                    242:                }
                    243:                else
                    244:                    ++s;
                    245:                dst += used;
                    246:        }
                    247:        if (dst >= (buff + buffsize)) { /* sigh */
                    248:                buffsize += 1;
                    249:                p = el_realloc(buff, buffsize * sizeof(*buff));
                    250:                if (p == NULL)
                    251:                        goto out;
                    252:                buff = p;
                    253:                dst = buff + buffsize - 1;
                    254:        }
                    255:        *dst = 0;
                    256:        return buff;
                    257: out:
                    258:        el_free(buff);
                    259:        buffsize = 0;
                    260:        return NULL;
                    261: }
                    262:
                    263:
                    264:
                    265: protected int
                    266: ct_visual_width(Char c)
                    267: {
                    268:        int t = ct_chr_class(c);
                    269:        switch (t) {
                    270:        case CHTYPE_ASCIICTL:
                    271:                return 2; /* ^@ ^? etc. */
                    272:        case CHTYPE_TAB:
                    273:                return 1; /* Hmm, this really need to be handled outside! */
                    274:        case CHTYPE_NL:
                    275:                return 0; /* Should this be 1 instead? */
                    276: #ifdef WIDECHAR
                    277:        case CHTYPE_PRINT:
                    278:                return wcwidth(c);
                    279:        case CHTYPE_NONPRINT:
                    280:                if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
                    281:                        return 8; /* \U+12345 */
                    282:                else
                    283:                        return 7; /* \U+1234 */
                    284: #else
                    285:        case CHTYPE_PRINT:
                    286:                return 1;
                    287:        case CHTYPE_NONPRINT:
                    288:                return 4; /* \123 */
                    289: #endif
                    290:        default:
                    291:                return 0; /* should not happen */
                    292:        }
                    293: }
                    294:
                    295:
                    296: protected ssize_t
                    297: ct_visual_char(Char *dst, size_t len, Char c)
                    298: {
                    299:        int t = ct_chr_class(c);
                    300:        switch (t) {
1.3       christos  301:        case CHTYPE_TAB:
                    302:        case CHTYPE_NL:
1.1       christos  303:        case CHTYPE_ASCIICTL:
                    304:                if (len < 2)
                    305:                        return -1;   /* insufficient space */
                    306:                *dst++ = '^';
                    307:                if (c == '\177')
                    308:                        *dst = '?'; /* DEL -> ^? */
                    309:                else
                    310:                        *dst = c | 0100;    /* uncontrolify it */
                    311:                return 2;
                    312:        case CHTYPE_PRINT:
                    313:                if (len < 1)
                    314:                        return -1;  /* insufficient space */
                    315:                *dst = c;
                    316:                return 1;
                    317:        case CHTYPE_NONPRINT:
                    318:                /* we only use single-width glyphs for display,
                    319:                 * so this is right */
                    320:                if ((ssize_t)len < ct_visual_width(c))
                    321:                        return -1;   /* insufficient space */
                    322: #ifdef WIDECHAR
                    323:                *dst++ = '\\';
                    324:                *dst++ = 'U';
                    325:                *dst++ = '+';
                    326: #define tohexdigit(v) "0123456789ABCDEF"[v]
                    327:                if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
                    328:                        *dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf);
                    329:                *dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf);
                    330:                *dst++ = tohexdigit(((unsigned int) c >>  8) & 0xf);
                    331:                *dst++ = tohexdigit(((unsigned int) c >>  4) & 0xf);
                    332:                *dst   = tohexdigit(((unsigned int) c      ) & 0xf);
1.8       christos  333:                return c > 0xffff ? 8 : 7;
1.1       christos  334: #else
                    335:                *dst++ = '\\';
1.13    ! christos  336: #define tooctaldigit(v) (Char)((v) + '0')
1.1       christos  337:                *dst++ = tooctaldigit(((unsigned int) c >> 6) & 0x7);
                    338:                *dst++ = tooctaldigit(((unsigned int) c >> 3) & 0x7);
                    339:                *dst++ = tooctaldigit(((unsigned int) c     ) & 0x7);
                    340: #endif
                    341:                /*FALLTHROUGH*/
                    342:        /* these two should be handled outside this function */
                    343:        default:            /* we should never hit the default */
                    344:                return 0;
                    345:        }
                    346: }
                    347:
                    348:
                    349:
                    350:
                    351: protected int
                    352: ct_chr_class(Char c)
                    353: {
                    354:        if (c == '\t')
                    355:                return CHTYPE_TAB;
                    356:        else if (c == '\n')
                    357:                return CHTYPE_NL;
                    358:        else if (IsASCII(c) && Iscntrl(c))
                    359:                return CHTYPE_ASCIICTL;
                    360:        else if (Isprint(c))
                    361:                return CHTYPE_PRINT;
                    362:        else
                    363:                return CHTYPE_NONPRINT;
                    364: }

CVSweb <webmaster@jp.NetBSD.org>