[BACK]Return to tokenizer.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / lib / libedit

Annotation of src/lib/libedit/tokenizer.c, Revision 1.5

1.5     ! simonb      1: /*     $NetBSD: tokenizer.c,v 1.4 1998/12/12 20:08:23 christos Exp $   */
1.2       lukem       2:
1.1       cgd         3: /*-
                      4:  * Copyright (c) 1992, 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Christos Zoulas of Cornell University.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
                     18:  * 3. All advertising materials mentioning features or use of this software
                     19:  *    must display the following acknowledgement:
                     20:  *     This product includes software developed by the University of
                     21:  *     California, Berkeley and its contributors.
                     22:  * 4. Neither the name of the University nor the names of its contributors
                     23:  *    may be used to endorse or promote products derived from this software
                     24:  *    without specific prior written permission.
                     25:  *
                     26:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     27:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     30:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     31:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     32:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     33:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     34:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     35:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     36:  * SUCH DAMAGE.
                     37:  */
                     38:
1.3       christos   39: #include <sys/cdefs.h>
1.1       cgd        40: #if !defined(lint) && !defined(SCCSID)
1.2       lukem      41: #if 0
1.1       cgd        42: static char sccsid[] = "@(#)tokenizer.c        8.1 (Berkeley) 6/4/93";
1.2       lukem      43: #else
1.5     ! simonb     44: __RCSID("$NetBSD: tokenizer.c,v 1.4 1998/12/12 20:08:23 christos Exp $");
1.2       lukem      45: #endif
1.1       cgd        46: #endif /* not lint && not SCCSID */
                     47:
                     48: /*
                     49:  * tokenize.c: Bourne shell like tokenizer
                     50:  */
                     51: #include "sys.h"
                     52: #include <string.h>
                     53: #include <stdlib.h>
                     54: #include "tokenizer.h"
                     55:
                     56: typedef enum { Q_none, Q_single, Q_double, Q_one, Q_doubleone } quote_t;
                     57:
                     58: #define IFS "\t \n"
                     59:
                     60: #define TOK_KEEP       1
                     61: #define TOK_EAT                2
                     62:
                     63: #define WINCR 20
                     64: #define AINCR 10
                     65:
                     66: #define tok_malloc(a)          malloc(a)
                     67: #define tok_free(a)            free(a)
                     68: #define tok_realloc(a, b)      realloc(a, b)
                     69:
                     70:
                     71: struct tokenizer {
                     72:     char   *ifs;               /* In field separator                   */
                     73:     int     argc, amax;                /* Current and maximum number of args   */
                     74:     char  **argv;              /* Argument list                        */
                     75:     char   *wptr, *wmax;       /* Space and limit on the word buffer   */
                     76:     char   *wstart;            /* Beginning of next word               */
                     77:     char   *wspace;            /* Space of word buffer                 */
                     78:     quote_t quote;             /* Quoting state                        */
                     79:     int            flags;              /* flags;                               */
                     80: };
                     81:
                     82:
                     83: private void tok_finish        __P((Tokenizer *));
                     84:
                     85:
                     86: /* tok_finish():
                     87:  *     Finish a word in the tokenizer.
                     88:  */
                     89: private void
                     90: tok_finish(tok)
                     91:     Tokenizer *tok;
                     92: {
                     93:     *tok->wptr = '\0';
                     94:     if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) {
                     95:        tok->argv[tok->argc++] = tok->wstart;
                     96:        tok->argv[tok->argc] = NULL;
                     97:        tok->wstart = ++tok->wptr;
                     98:     }
                     99:     tok->flags &= ~TOK_KEEP;
                    100: }
                    101:
                    102:
                    103: /* tok_init():
                    104:  *     Initialize the tokenizer
                    105:  */
                    106: public Tokenizer *
                    107: tok_init(ifs)
                    108:     const char *ifs;
                    109: {
                    110:     Tokenizer* tok = (Tokenizer*) tok_malloc(sizeof(Tokenizer));
                    111:
                    112:     tok->ifs     = strdup(ifs ? ifs : IFS);
                    113:     tok->argc    = 0;
                    114:     tok->amax    = AINCR;
                    115:     tok->argv    = (char **) tok_malloc(sizeof(char *) * tok->amax);
                    116:     tok->argv[0] = NULL;
                    117:     tok->wspace  = (char *) tok_malloc(WINCR);
                    118:     tok->wmax    = tok->wspace + WINCR;
                    119:     tok->wstart  = tok->wspace;
                    120:     tok->wptr    = tok->wspace;
                    121:     tok->flags   = 0;
                    122:     tok->quote   = Q_none;
                    123:
                    124:     return tok;
                    125: }
                    126:
                    127:
                    128: /* tok_reset():
                    129:  *     Reset the tokenizer
                    130:  */
                    131: public void
                    132: tok_reset(tok)
                    133:     Tokenizer *tok;
                    134: {
                    135:     tok->argc  = 0;
                    136:     tok->wstart = tok->wspace;
                    137:     tok->wptr = tok->wspace;
                    138:     tok->flags = 0;
                    139:     tok->quote = Q_none;
                    140: }
                    141:
                    142:
                    143: /* tok_end():
                    144:  *     Clean up
                    145:  */
                    146: public void
                    147: tok_end(tok)
                    148:     Tokenizer *tok;
                    149: {
                    150:     tok_free((ptr_t) tok->ifs);
                    151:     tok_free((ptr_t) tok->wspace);
                    152:     tok_free((ptr_t) tok->argv);
                    153:     tok_free((ptr_t) tok);
                    154: }
                    155:
                    156:
                    157:
                    158: /* tok_line():
                    159:  *     Bourne shell like tokenizing
                    160:  *     Return:
                    161:  *             -1: Internal error
                    162:  *              3: Quoted return
                    163:  *              2: Unmatched double quote
                    164:  *              1: Unmatched single quote
1.5     ! simonb    165:  *              0: Ok
1.1       cgd       166:  */
                    167: public int
                    168: tok_line(tok, line, argc, argv)
                    169:     Tokenizer *tok;
                    170:     const char* line;
                    171:     int *argc;
                    172:     char ***argv;
                    173: {
                    174:     const char *ptr;
                    175:
1.4       christos  176:     for (;;) {
1.1       cgd       177:        switch (*(ptr = line++)) {
                    178:        case '\'':
                    179:            tok->flags |= TOK_KEEP;
                    180:            tok->flags &= ~TOK_EAT;
                    181:            switch (tok->quote) {
                    182:            case Q_none:
                    183:                tok->quote = Q_single;  /* Enter single quote mode */
                    184:                break;
                    185:
                    186:            case Q_single:              /* Exit single quote mode */
                    187:                tok->quote = Q_none;
                    188:                break;
                    189:
                    190:            case Q_one:                 /* Quote this ' */
                    191:                tok->quote = Q_none;
                    192:                *tok->wptr++ = *ptr;
                    193:                break;
                    194:
                    195:            case Q_double:              /* Stay in double quote mode */
                    196:                *tok->wptr++ = *ptr;
                    197:                break;
                    198:
                    199:            case Q_doubleone:           /* Quote this ' */
                    200:                tok->quote = Q_double;
                    201:                *tok->wptr++ = *ptr;
                    202:                break;
                    203:
                    204:            default:
                    205:                return(-1);
                    206:            }
                    207:            break;
                    208:
                    209:        case '"':
                    210:            tok->flags &= ~TOK_EAT;
                    211:            tok->flags |= TOK_KEEP;
                    212:            switch (tok->quote) {
                    213:            case Q_none:                /* Enter double quote mode */
                    214:                tok->quote = Q_double;
                    215:                break;
                    216:
                    217:            case Q_double:
                    218:                tok->quote = Q_none;    /* Exit double quote mode */
                    219:                break;
                    220:
                    221:            case Q_one:                 /* Quote this " */
                    222:                tok->quote = Q_none;
                    223:                *tok->wptr++ = *ptr;
                    224:                break;
                    225:
                    226:            case Q_single:              /* Stay in single quote mode */
                    227:                *tok->wptr++ = *ptr;
                    228:                break;
                    229:
                    230:            case Q_doubleone:           /* Quote this " */
                    231:                tok->quote = Q_double;
                    232:                *tok->wptr++ = *ptr;
                    233:                break;
                    234:
1.5     ! simonb    235:            default:
1.1       cgd       236:                return(-1);
                    237:            }
                    238:            break;
                    239:
                    240:        case '\\':
                    241:            tok->flags |= TOK_KEEP;
                    242:            tok->flags &= ~TOK_EAT;
                    243:            switch (tok->quote) {
                    244:            case Q_none:                /* Quote next character */
                    245:                tok->quote = Q_one;
                    246:                break;
                    247:
                    248:            case Q_double:
                    249:                tok->quote = Q_doubleone;/* Quote next character */
                    250:                break;
                    251:
1.5     ! simonb    252:            case Q_one:
1.1       cgd       253:                *tok->wptr++ = *ptr;
                    254:                tok->quote = Q_none;    /* Quote this, restore state */
                    255:                break;
                    256:
                    257:            case Q_single:              /* Stay in single quote mode */
                    258:                *tok->wptr++ = *ptr;
                    259:                break;
                    260:
                    261:            case Q_doubleone:           /* Quote this \ */
                    262:                tok->quote = Q_double;
                    263:                *tok->wptr++ = *ptr;
                    264:                break;
                    265:
                    266:            default:
                    267:                return(-1);
                    268:            }
                    269:            break;
                    270:
                    271:        case '\n':
                    272:            tok->flags &= ~TOK_EAT;
                    273:            switch (tok->quote) {
                    274:            case Q_none:
                    275:                tok_finish(tok);
                    276:                *argv = tok->argv;
                    277:                *argc = tok->argc;
                    278:                return(0);
                    279:
                    280:            case Q_single:
                    281:            case Q_double:
                    282:                *tok->wptr++ = *ptr;    /* Add the return               */
                    283:                break;
1.5     ! simonb    284:
1.1       cgd       285:            case Q_doubleone:
                    286:                tok->flags |= TOK_EAT;
                    287:                tok->quote = Q_double;  /* Back to double, eat the '\n' */
                    288:                break;
                    289:
                    290:            case Q_one:
                    291:                tok->flags |= TOK_EAT;
                    292:                tok->quote = Q_none;    /* No quote, more eat the '\n' */
                    293:                break;
                    294:
                    295:            default:
                    296:                return(0);
                    297:            }
                    298:            break;
                    299:
                    300:        case '\0':
                    301:            switch (tok->quote) {
                    302:            case Q_none:
                    303:                /* Finish word and return */
                    304:                if (tok->flags & TOK_EAT) {
                    305:                    tok->flags &= ~TOK_EAT;
                    306:                    return 3;
                    307:                }
                    308:                tok_finish(tok);
                    309:                *argv = tok->argv;
                    310:                *argc = tok->argc;
                    311:                return(0);
                    312:
                    313:            case Q_single:
                    314:                return(1);
                    315:
                    316:            case Q_double:
                    317:                return(2);
                    318:
                    319:            case Q_doubleone:
                    320:                tok->quote = Q_double;
                    321:                *tok->wptr++ = *ptr;
                    322:                break;
                    323:
                    324:            case Q_one:
                    325:                tok->quote = Q_none;
                    326:                *tok->wptr++ = *ptr;
                    327:                break;
                    328:
                    329:            default:
                    330:                return(-1);
                    331:            }
                    332:            break;
                    333:
                    334:        default:
                    335:            tok->flags &= ~TOK_EAT;
                    336:            switch (tok->quote) {
                    337:            case Q_none:
                    338:                if (strchr(tok->ifs, *ptr) != NULL)
                    339:                    tok_finish(tok);
                    340:                else
                    341:                    *tok->wptr++ = *ptr;
                    342:                break;
                    343:
                    344:            case Q_single:
                    345:            case Q_double:
                    346:                *tok->wptr++ = *ptr;
                    347:                break;
                    348:
                    349:
                    350:            case Q_doubleone:
                    351:                *tok->wptr++ = '\\';
                    352:                tok->quote = Q_double;
                    353:                *tok->wptr++ = *ptr;
                    354:                break;
                    355:
                    356:            case Q_one:
                    357:                tok->quote = Q_none;
                    358:                *tok->wptr++ = *ptr;
                    359:                break;
                    360:
                    361:            default:
                    362:                return(-1);
                    363:
                    364:            }
                    365:            break;
                    366:        }
                    367:
                    368:        if (tok->wptr >= tok->wmax - 4) {
                    369:            size_t size = tok->wmax - tok->wspace + WINCR;
                    370:            char *s = (char *) tok_realloc(tok->wspace, size);
                    371:            /*SUPPRESS 22*/
                    372:            int offs = s - tok->wspace;
                    373:
                    374:            if (offs != 0) {
                    375:                int i;
                    376:                for (i = 0; i < tok->argc; i++)
                    377:                    tok->argv[i] = tok->argv[i] + offs;
                    378:                tok->wptr   = tok->wptr + offs;
                    379:                tok->wstart = tok->wstart + offs;
                    380:                tok->wmax   = s + size;
                    381:                tok->wspace = s;
                    382:            }
                    383:        }
                    384:
                    385:        if (tok->argc >= tok->amax - 4) {
                    386:            tok->amax += AINCR;
1.5     ! simonb    387:            tok->argv = (char **) tok_realloc(tok->argv,
1.1       cgd       388:                                              tok->amax * sizeof(char*));
                    389:        }
                    390:
                    391:     }
                    392: }

CVSweb <webmaster@jp.NetBSD.org>