[BACK]Return to lexi.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / usr.bin / indent

Annotation of src/usr.bin/indent/lexi.c, Revision 1.91

1.91    ! rillig      1: /*     $NetBSD: lexi.c,v 1.90 2021/10/11 20:13:46 rillig Exp $ */
1.3       tls         2:
1.16      kamil       3: /*-
                      4:  * SPDX-License-Identifier: BSD-4-Clause
                      5:  *
                      6:  * Copyright (c) 1985 Sun Microsystems, Inc.
1.5       mrg         7:  * Copyright (c) 1980, 1993
                      8:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd         9:  * All rights reserved.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the University of
                     22:  *     California, Berkeley and its contributors.
                     23:  * 4. Neither the name of the University nor the names of its contributors
                     24:  *    may be used to endorse or promote products derived from this software
                     25:  *    without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     28:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     29:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     30:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     31:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     32:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     33:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     34:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     35:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     36:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     37:  * SUCH DAMAGE.
                     38:  */
                     39:
1.16      kamil      40: #if 0
                     41: static char sccsid[] = "@(#)lexi.c     8.1 (Berkeley) 6/6/93";
                     42: #endif
                     43:
1.6       lukem      44: #include <sys/cdefs.h>
1.16      kamil      45: #if defined(__NetBSD__)
1.91    ! rillig     46: __RCSID("$NetBSD: lexi.c,v 1.90 2021/10/11 20:13:46 rillig Exp $");
1.16      kamil      47: #elif defined(__FreeBSD__)
                     48: __FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $");
                     49: #endif
1.1       cgd        50:
1.20      rillig     51: #include <assert.h>
1.1       cgd        52: #include <stdio.h>
                     53: #include <ctype.h>
                     54: #include <stdlib.h>
                     55: #include <string.h>
1.16      kamil      56: #include <sys/param.h>
                     57:
                     58: #include "indent.h"
1.1       cgd        59:
1.60      rillig     60: /* must be sorted alphabetically, is used in binary search */
1.62      rillig     61: static const struct keyword {
                     62:     const char *name;
                     63:     enum keyword_kind kind;
                     64: } keywords[] = {
                     65:     {"_Bool", kw_type},
                     66:     {"_Complex", kw_type},
                     67:     {"_Imaginary", kw_type},
                     68:     {"auto", kw_storage_class},
                     69:     {"bool", kw_type},
                     70:     {"break", kw_jump},
                     71:     {"case", kw_case_or_default},
                     72:     {"char", kw_type},
                     73:     {"complex", kw_type},
                     74:     {"const", kw_type},
                     75:     {"continue", kw_jump},
                     76:     {"default", kw_case_or_default},
                     77:     {"do", kw_do_or_else},
                     78:     {"double", kw_type},
                     79:     {"else", kw_do_or_else},
                     80:     {"enum", kw_struct_or_union_or_enum},
                     81:     {"extern", kw_storage_class},
                     82:     {"float", kw_type},
                     83:     {"for", kw_for_or_if_or_while},
                     84:     {"goto", kw_jump},
                     85:     {"if", kw_for_or_if_or_while},
                     86:     {"imaginary", kw_type},
                     87:     {"inline", kw_inline_or_restrict},
                     88:     {"int", kw_type},
                     89:     {"long", kw_type},
                     90:     {"offsetof", kw_offsetof},
                     91:     {"register", kw_storage_class},
                     92:     {"restrict", kw_inline_or_restrict},
                     93:     {"return", kw_jump},
                     94:     {"short", kw_type},
                     95:     {"signed", kw_type},
                     96:     {"sizeof", kw_sizeof},
                     97:     {"static", kw_storage_class},
                     98:     {"struct", kw_struct_or_union_or_enum},
                     99:     {"switch", kw_switch},
                    100:     {"typedef", kw_typedef},
                    101:     {"union", kw_struct_or_union_or_enum},
                    102:     {"unsigned", kw_type},
                    103:     {"void", kw_type},
                    104:     {"volatile", kw_type},
                    105:     {"while", kw_for_or_if_or_while}
1.1       cgd       106: };
                    107:
1.84      rillig    108: static struct {
1.64      rillig    109:     const char **items;
                    110:     unsigned int len;
                    111:     unsigned int cap;
                    112: } typenames;
1.16      kamil     113:
                    114: /*
                    115:  * The transition table below was rewritten by hand from lx's output, given
                    116:  * the following definitions. lx is Katherine Flavel's lexer generator.
                    117:  *
                    118:  * O  = /[0-7]/;        D  = /[0-9]/;          NZ = /[1-9]/;
                    119:  * H  = /[a-f0-9]/i;    B  = /[0-1]/;          HP = /0x/i;
                    120:  * BP = /0b/i;          E  = /e[+\-]?/i D+;    P  = /p[+\-]?/i D+;
                    121:  * FS = /[fl]/i;        IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
                    122:  *
                    123:  * D+           E  FS? -> $float;
                    124:  * D*    "." D+ E? FS? -> $float;
                    125:  * D+    "."    E? FS? -> $float;    HP H+           IS? -> $int;
                    126:  * HP H+        P  FS? -> $float;    NZ D*           IS? -> $int;
                    127:  * HP H* "." H+ P  FS? -> $float;    "0" O*          IS? -> $int;
                    128:  * HP H+ "."    P  FS  -> $float;    BP B+           IS? -> $int;
                    129:  */
1.71      rillig    130: /* INDENT OFF */
1.82      rillig    131: static const unsigned char lex_number_state[][26] = {
1.16      kamil     132:     /*                examples:
                    133:                                      00
                    134:              s                      0xx
                    135:              t                    00xaa
                    136:              a     11       101100xxa..
                    137:              r   11ee0001101lbuuxx.a.pp
                    138:              t.01.e+008bLuxll0Ll.aa.p+0
                    139:     states:  ABCDEFGHIJKLMNOPQRSTUVWXYZ */
1.83      rillig    140:     [0] =   "uuiifuufiuuiiuiiiiiuiuuuuu",      /* (other) */
                    141:     [1] =   "CEIDEHHHIJQ  U  Q  VUVVZZZ",      /* 0 */
                    142:     [2] =   "DEIDEHHHIJQ  U  Q  VUVVZZZ",      /* 1 */
                    143:     [3] =   "DEIDEHHHIJ   U     VUVVZZZ",      /* 2 3 4 5 6 7 */
                    144:     [4] =   "DEJDEHHHJJ   U     VUVVZZZ",      /* 8 9 */
                    145:     [5] =   "             U     VUVV   ",      /* A a C c D d */
                    146:     [6] =   "  K          U     VUVV   ",      /* B b */
                    147:     [7] =   "  FFF   FF   U     VUVV   ",      /* E e */
                    148:     [8] =   "    f  f     U     VUVV  f",      /* F f */
                    149:     [9] =   "  LLf  fL  PR   Li  L    f",      /* L */
                    150:     [10] =  "  OOf  fO   S P O i O    f",      /* l */
                    151:     [11] =  "                    FFX   ",      /* P p */
                    152:     [12] =  "  MM    M  i  iiM   M     ",      /* U u */
                    153:     [13] =  "  N                       ",      /* X x */
                    154:     [14] =  "     G                 Y  ",      /* + - */
                    155:     [15] =  "B EE    EE   T      W     ",      /* . */
1.16      kamil     156:     /*       ABCDEFGHIJKLMNOPQRSTUVWXYZ */
1.1       cgd       157: };
1.71      rillig    158: /* INDENT ON */
1.1       cgd       159:
1.82      rillig    160: static const uint8_t lex_number_row[] = {
1.56      rillig    161:     ['0'] = 1,
                    162:     ['1'] = 2,
                    163:     ['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3,
                    164:     ['8'] = 4, ['9'] = 4,
                    165:     ['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5,
                    166:     ['B'] = 6, ['b'] = 6,
                    167:     ['E'] = 7, ['e'] = 7,
                    168:     ['F'] = 8, ['f'] = 8,
                    169:     ['L'] = 9,
                    170:     ['l'] = 10,
                    171:     ['P'] = 11, ['p'] = 11,
                    172:     ['U'] = 12, ['u'] = 12,
                    173:     ['X'] = 13, ['x'] = 13,
                    174:     ['+'] = 14, ['-'] = 14,
                    175:     ['.'] = 15,
                    176: };
1.36      rillig    177:
1.32      rillig    178: static char
                    179: inbuf_peek(void)
                    180: {
1.78      rillig    181:     return *inp.s;
1.32      rillig    182: }
                    183:
1.66      rillig    184: void
1.32      rillig    185: inbuf_skip(void)
                    186: {
1.78      rillig    187:     inp.s++;
                    188:     if (inp.s >= inp.e)
1.81      rillig    189:        inbuf_read_line();
1.32      rillig    190: }
                    191:
1.66      rillig    192: char
1.32      rillig    193: inbuf_next(void)
                    194: {
                    195:     char ch = inbuf_peek();
                    196:     inbuf_skip();
                    197:     return ch;
                    198: }
                    199:
1.25      rillig    200: static void
                    201: check_size_token(size_t desired_size)
                    202: {
1.58      rillig    203:     if (token.e + desired_size >= token.l)
                    204:        buf_expand(&token, desired_size);
1.25      rillig    205: }
                    206:
1.87      rillig    207: static void
                    208: token_add_char(char ch)
                    209: {
                    210:     check_size_token(1);
                    211:     *token.e++ = ch;
                    212: }
                    213:
1.16      kamil     214: static int
1.62      rillig    215: cmp_keyword_by_name(const void *key, const void *elem)
1.16      kamil     216: {
1.62      rillig    217:     return strcmp(key, ((const struct keyword *)elem)->name);
1.27      rillig    218: }
                    219:
1.20      rillig    220: #ifdef debug
                    221: const char *
1.47      rillig    222: token_type_name(token_type ttype)
1.20      rillig    223: {
                    224:     static const char *const name[] = {
1.79      rillig    225:        "end_of_file", "newline", "lparen_or_lbracket", "rparen_or_rbracket",
                    226:        "unary_op", "binary_op", "postfix_op", "question",
                    227:        "case_label", "colon",
1.20      rillig    228:        "semicolon", "lbrace", "rbrace", "ident", "comma",
1.31      rillig    229:        "comment", "switch_expr", "preprocessing", "form_feed", "decl",
1.29      rillig    230:        "keyword_for_if_while", "keyword_do_else",
1.30      rillig    231:        "if_expr", "while_expr", "for_exprs",
                    232:        "stmt", "stmt_list", "keyword_else", "keyword_do", "do_stmt",
1.31      rillig    233:        "if_expr_stmt", "if_expr_stmt_else", "period", "string_prefix",
                    234:        "storage_class", "funcname", "type_def", "keyword_struct_union_enum"
1.20      rillig    235:     };
                    236:
1.62      rillig    237:     assert(0 <= ttype && ttype < nitems(name));
1.20      rillig    238:
1.47      rillig    239:     return name[ttype];
1.20      rillig    240: }
                    241:
                    242: static void
1.72      rillig    243: debug_print_buf(const char *name, const struct buffer *buf)
1.20      rillig    244: {
1.72      rillig    245:     if (buf->s < buf->e) {
1.39      rillig    246:        debug_printf(" %s ", name);
1.72      rillig    247:        debug_vis_range("\"", buf->s, buf->e, "\"");
1.20      rillig    248:     }
                    249: }
                    250:
                    251: static token_type
1.47      rillig    252: lexi_end(token_type ttype)
1.20      rillig    253: {
1.39      rillig    254:     debug_printf("in line %d, lexi returns '%s'",
1.47      rillig    255:        line_no, token_type_name(ttype));
1.72      rillig    256:     debug_print_buf("token", &token);
                    257:     debug_print_buf("label", &lab);
                    258:     debug_print_buf("code", &code);
                    259:     debug_print_buf("comment", &com);
1.39      rillig    260:     debug_printf("\n");
1.20      rillig    261:
1.47      rillig    262:     return ttype;
1.20      rillig    263: }
                    264: #else
1.71      rillig    265: #define lexi_end(tk) (tk)
1.20      rillig    266: #endif
                    267:
1.43      rillig    268: static void
                    269: lex_number(void)
                    270: {
1.71      rillig    271:     for (uint8_t s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
1.78      rillig    272:        uint8_t ch = (uint8_t)*inp.s;
1.82      rillig    273:        if (ch >= nitems(lex_number_row) || lex_number_row[ch] == 0)
1.56      rillig    274:            break;
1.75      rillig    275:
1.82      rillig    276:        uint8_t row = lex_number_row[ch];
                    277:        if (lex_number_state[row][s - 'A'] == ' ') {
1.71      rillig    278:            /*-
1.82      rillig    279:             * lex_number_state[0][s - 'A'] now indicates the type:
1.74      rillig    280:             * f = floating, i = integer, u = unknown
1.56      rillig    281:             */
1.43      rillig    282:            break;
                    283:        }
1.75      rillig    284:
1.82      rillig    285:        s = lex_number_state[row][s - 'A'];
1.87      rillig    286:        token_add_char(inbuf_next());
1.43      rillig    287:     }
                    288: }
                    289:
                    290: static void
                    291: lex_word(void)
                    292: {
1.78      rillig    293:     while (isalnum((unsigned char)*inp.s) ||
                    294:           *inp.s == '\\' ||
                    295:           *inp.s == '_' || *inp.s == '$') {
1.75      rillig    296:
1.78      rillig    297:        if (*inp.s == '\\') {
                    298:            if (inp.s[1] == '\n') {
                    299:                inp.s += 2;
                    300:                if (inp.s >= inp.e)
1.81      rillig    301:                    inbuf_read_line();
1.43      rillig    302:            } else
                    303:                break;
                    304:        }
1.75      rillig    305:
1.87      rillig    306:        token_add_char(inbuf_next());
1.43      rillig    307:     }
                    308: }
                    309:
                    310: static void
                    311: lex_char_or_string(void)
                    312: {
1.52      rillig    313:     for (char delim = *token.s;;) {
1.78      rillig    314:        if (*inp.s == '\n') {
1.52      rillig    315:            diag(1, "Unterminated literal");
                    316:            return;
                    317:        }
1.75      rillig    318:
1.87      rillig    319:        token_add_char(inbuf_next());
1.52      rillig    320:        if (token.e[-1] == delim)
                    321:            return;
1.75      rillig    322:
1.52      rillig    323:        if (token.e[-1] == '\\') {
1.78      rillig    324:            if (*inp.s == '\n')
1.52      rillig    325:                ++line_no;
1.87      rillig    326:            token_add_char(inbuf_next());
1.52      rillig    327:        }
                    328:     }
1.43      rillig    329: }
                    330:
1.84      rillig    331: /* Guess whether the current token is a declared type. */
1.57      rillig    332: static bool
1.84      rillig    333: probably_typename(const struct parser_state *state)
1.57      rillig    334: {
1.70      rillig    335:     if (state->p_l_follow != 0)
                    336:        return false;
                    337:     if (state->block_init || state->in_stmt)
                    338:        return false;
1.78      rillig    339:     if (inp.s[0] == '*' && inp.s[1] != '=')
1.70      rillig    340:        goto maybe;
1.78      rillig    341:     if (isalpha((unsigned char)*inp.s))
1.70      rillig    342:        goto maybe;
                    343:     return false;
                    344: maybe:
                    345:     return state->last_token == semicolon ||
1.71      rillig    346:        state->last_token == lbrace ||
                    347:        state->last_token == rbrace;
1.57      rillig    348: }
                    349:
1.84      rillig    350: static int
                    351: bsearch_typenames(const char *key)
                    352: {
                    353:     const char **arr = typenames.items;
                    354:     int lo = 0;
                    355:     int hi = (int)typenames.len - 1;
                    356:
                    357:     while (lo <= hi) {
                    358:        int mid = (int)((unsigned)(lo + hi) >> 1);
                    359:        int cmp = strcmp(arr[mid], key);
                    360:        if (cmp < 0)
                    361:            lo = mid + 1;
                    362:        else if (cmp > 0)
                    363:            hi = mid - 1;
                    364:        else
                    365:            return mid;
                    366:     }
                    367:     return -(lo + 1);
                    368: }
                    369:
1.63      rillig    370: static bool
                    371: is_typename(void)
                    372: {
1.84      rillig    373:     if (opt.auto_typedefs &&
                    374:        token.e - token.s >= 2 && memcmp(token.e - 2, "_t", 2) == 0)
                    375:        return true;
1.63      rillig    376:
1.84      rillig    377:     return bsearch_typenames(token.s) >= 0;
1.63      rillig    378: }
                    379:
1.90      rillig    380: /* Read an alphanumeric token into 'token', or return end_of_file. */
1.89      rillig    381: static token_type
                    382: lexi_alnum(struct parser_state *state)
1.1       cgd       383: {
1.89      rillig    384:     if (!(isalnum((unsigned char)*inp.s) ||
                    385:        *inp.s == '_' || *inp.s == '$' ||
                    386:        (inp.s[0] == '.' && isdigit((unsigned char)inp.s[1]))))
1.90      rillig    387:        return end_of_file;     /* just as a placeholder */
1.89      rillig    388:
                    389:     if (isdigit((unsigned char)*inp.s) ||
                    390:        (inp.s[0] == '.' && isdigit((unsigned char)inp.s[1]))) {
                    391:        lex_number();
                    392:     } else {
                    393:        lex_word();
                    394:     }
                    395:     *token.e = '\0';
1.16      kamil     396:
1.89      rillig    397:     if (token.s[0] == 'L' && token.s[1] == '\0' &&
                    398:        (*inp.s == '"' || *inp.s == '\''))
                    399:        return string_prefix;
1.16      kamil     400:
1.89      rillig    401:     while (is_hspace(inbuf_peek()))
1.32      rillig    402:        inbuf_skip();
1.89      rillig    403:     state->keyword = kw_0;
                    404:
                    405:     if (state->last_token == keyword_struct_union_enum &&
                    406:            state->p_l_follow == 0) {
                    407:        state->last_u_d = true;
                    408:        return decl;
1.16      kamil     409:     }
1.6       lukem     410:
1.89      rillig    411:     /* Operator after identifier is binary unless last token was 'struct'. */
                    412:     state->last_u_d = (state->last_token == keyword_struct_union_enum);
1.16      kamil     413:
1.89      rillig    414:     const struct keyword *kw = bsearch(token.s, keywords,
                    415:        nitems(keywords), sizeof(keywords[0]), cmp_keyword_by_name);
                    416:     if (kw == NULL) {
                    417:        if (is_typename()) {
                    418:            state->keyword = kw_type;
1.16      kamil     419:            state->last_u_d = true;
1.89      rillig    420:            goto found_typename;
1.16      kamil     421:        }
1.89      rillig    422:
                    423:     } else {                   /* we have a keyword */
                    424:        state->keyword = kw->kind;
                    425:        state->last_u_d = true;
                    426:
                    427:        switch (kw->kind) {
                    428:        case kw_switch:
                    429:            return switch_expr;
                    430:
                    431:        case kw_case_or_default:
                    432:            return case_label;
                    433:
                    434:        case kw_struct_or_union_or_enum:
                    435:        case kw_type:
                    436:     found_typename:
                    437:            if (state->p_l_follow != 0) {
                    438:                /* inside parens: cast, param list, offsetof or sizeof */
                    439:                state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask;
                    440:            }
                    441:            if (state->last_token == period || state->last_token == unary_op) {
                    442:                state->keyword = kw_0;
                    443:                break;
1.16      kamil     444:            }
1.89      rillig    445:            if (kw != NULL && kw->kind == kw_struct_or_union_or_enum)
                    446:                return keyword_struct_union_enum;
                    447:            if (state->p_l_follow != 0)
                    448:                break;
                    449:            return decl;
1.75      rillig    450:
1.89      rillig    451:        case kw_for_or_if_or_while:
                    452:            return keyword_for_if_while;
1.75      rillig    453:
1.89      rillig    454:        case kw_do_or_else:
                    455:            return keyword_do_else;
1.16      kamil     456:
1.89      rillig    457:        case kw_storage_class:
                    458:            return storage_class;
1.16      kamil     459:
1.89      rillig    460:        case kw_typedef:
                    461:            return type_def;
1.16      kamil     462:
1.89      rillig    463:        default:                /* all others are treated like any other
1.16      kamil     464:                                 * identifier */
1.89      rillig    465:            return ident;
1.90      rillig    466:        }
                    467:     }
1.89      rillig    468:
                    469:     if (*inp.s == '(' && state->tos <= 1 && state->ind_level == 0 &&
                    470:        !state->in_parameter_declaration && !state->block_init) {
                    471:
                    472:        for (const char *p = inp.s; p < inp.e;)
                    473:            if (*p++ == ')' && (*p == ';' || *p == ','))
                    474:                goto not_proc;
                    475:
                    476:        strncpy(state->procname, token.s, sizeof state->procname - 1);
                    477:        if (state->in_decl)
                    478:            state->in_parameter_declaration = true;
                    479:        return funcname;
                    480: not_proc:;
                    481:
                    482:     } else if (probably_typename(state)) {
                    483:        state->keyword = kw_type;
                    484:        state->last_u_d = true;
                    485:        return decl;
                    486:     }
                    487:
                    488:     if (state->last_token == decl)     /* if this is a declared variable,
                    489:                                         * then following sign is unary */
                    490:        state->last_u_d = true; /* will make "int a -1" work */
                    491:
                    492:     return ident;              /* the ident is not in the list */
                    493: }
1.75      rillig    494:
1.89      rillig    495: /* Reads the next token, placing it in the global variable "token". */
                    496: token_type
                    497: lexi(struct parser_state *state)
                    498: {
1.90      rillig    499:     token.e = token.s;
                    500:     state->col_1 = state->last_nl;
1.89      rillig    501:     state->last_nl = false;
1.75      rillig    502:
1.89      rillig    503:     while (is_hspace(*inp.s)) {
                    504:        state->col_1 = false;
                    505:        inbuf_skip();
                    506:     }
1.75      rillig    507:
1.91    ! rillig    508:     token_type alnum_ttype = lexi_alnum(state);
        !           509:     if (alnum_ttype != end_of_file)
        !           510:        return lexi_end(alnum_ttype);
1.16      kamil     511:
                    512:     /* Scan a non-alphanumeric token */
                    513:
1.90      rillig    514:     check_size_token(3);       /* for things like "<<=" */
                    515:     *token.e++ = inbuf_next();
1.50      rillig    516:     *token.e = '\0';
1.16      kamil     517:
1.91    ! rillig    518:     token_type ttype;
1.89      rillig    519:     bool unary_delim = false;  /* whether the current token forces a
                    520:                                 * following operator to be unary */
                    521:
1.50      rillig    522:     switch (*token.s) {
1.16      kamil     523:     case '\n':
                    524:        unary_delim = state->last_u_d;
                    525:        state->last_nl = true;  /* remember that we just had a newline */
1.47      rillig    526:        /* if data has been exhausted, the newline is a dummy. */
                    527:        ttype = had_eof ? end_of_file : newline;
1.16      kamil     528:        break;
                    529:
1.43      rillig    530:     case '\'':
                    531:     case '"':
1.44      rillig    532:        lex_char_or_string();
1.47      rillig    533:        ttype = ident;
1.16      kamil     534:        break;
1.6       lukem     535:
1.40      rillig    536:     case '(':
                    537:     case '[':
1.16      kamil     538:        unary_delim = true;
1.79      rillig    539:        ttype = lparen_or_lbracket;
1.16      kamil     540:        break;
                    541:
1.40      rillig    542:     case ')':
                    543:     case ']':
1.79      rillig    544:        ttype = rparen_or_rbracket;
1.16      kamil     545:        break;
                    546:
                    547:     case '#':
                    548:        unary_delim = state->last_u_d;
1.47      rillig    549:        ttype = preprocessing;
1.16      kamil     550:        break;
                    551:
                    552:     case '?':
                    553:        unary_delim = true;
1.47      rillig    554:        ttype = question;
1.16      kamil     555:        break;
                    556:
1.40      rillig    557:     case ':':
1.47      rillig    558:        ttype = colon;
1.16      kamil     559:        unary_delim = true;
                    560:        break;
                    561:
1.40      rillig    562:     case ';':
1.16      kamil     563:        unary_delim = true;
1.47      rillig    564:        ttype = semicolon;
1.16      kamil     565:        break;
                    566:
1.40      rillig    567:     case '{':
1.16      kamil     568:        unary_delim = true;
1.47      rillig    569:        ttype = lbrace;
1.16      kamil     570:        break;
                    571:
1.40      rillig    572:     case '}':
1.16      kamil     573:        unary_delim = true;
1.47      rillig    574:        ttype = rbrace;
1.16      kamil     575:        break;
                    576:
1.69      rillig    577:     case '\f':
1.16      kamil     578:        unary_delim = state->last_u_d;
1.74      rillig    579:        state->last_nl = true;  /* remember this, so we can set 'state->col_1'
1.16      kamil     580:                                 * right */
1.47      rillig    581:        ttype = form_feed;
1.16      kamil     582:        break;
                    583:
1.40      rillig    584:     case ',':
1.16      kamil     585:        unary_delim = true;
1.47      rillig    586:        ttype = comma;
1.16      kamil     587:        break;
                    588:
                    589:     case '.':
                    590:        unary_delim = false;
1.47      rillig    591:        ttype = period;
1.16      kamil     592:        break;
1.1       cgd       593:
1.16      kamil     594:     case '-':
1.90      rillig    595:     case '+':
1.47      rillig    596:        ttype = state->last_u_d ? unary_op : binary_op;
1.16      kamil     597:        unary_delim = true;
                    598:
1.90      rillig    599:        if (*inp.s == token.s[0]) {     /* ++, -- */
1.78      rillig    600:            *token.e++ = *inp.s++;
1.79      rillig    601:            if (state->last_token == ident ||
                    602:                    state->last_token == rparen_or_rbracket) {
1.47      rillig    603:                ttype = state->last_u_d ? unary_op : postfix_op;
1.1       cgd       604:                unary_delim = false;
1.16      kamil     605:            }
1.75      rillig    606:
1.90      rillig    607:        } else if (*inp.s == '=') {     /* += */
1.78      rillig    608:            *token.e++ = *inp.s++;
1.75      rillig    609:
1.90      rillig    610:        } else if (*inp.s == '>') {     /* -> */
1.78      rillig    611:            *token.e++ = *inp.s++;
1.16      kamil     612:            unary_delim = false;
1.47      rillig    613:            ttype = unary_op;
1.16      kamil     614:            state->want_blank = false;
                    615:        }
1.90      rillig    616:        break;
1.16      kamil     617:
                    618:     case '=':
1.86      rillig    619:        if (state->init_or_struct)
1.54      rillig    620:            state->block_init = true;
1.78      rillig    621:        if (*inp.s == '=') {    /* == */
                    622:            *token.e++ = *inp.s++;
1.67      rillig    623:            *token.e = '\0';
1.16      kamil     624:        }
1.47      rillig    625:        ttype = binary_op;
1.16      kamil     626:        unary_delim = true;
                    627:        break;
                    628:
                    629:     case '>':
                    630:     case '<':
                    631:     case '!':                  /* ops like <, <<, <=, !=, etc */
1.78      rillig    632:        if (*inp.s == '>' || *inp.s == '<' || *inp.s == '=')
1.50      rillig    633:            *token.e++ = inbuf_next();
1.78      rillig    634:        if (*inp.s == '=')
                    635:            *token.e++ = *inp.s++;
1.47      rillig    636:        ttype = state->last_u_d ? unary_op : binary_op;
1.16      kamil     637:        unary_delim = true;
                    638:        break;
                    639:
                    640:     case '*':
                    641:        unary_delim = true;
                    642:        if (!state->last_u_d) {
1.78      rillig    643:            if (*inp.s == '=')
                    644:                *token.e++ = *inp.s++;
1.47      rillig    645:            ttype = binary_op;
1.16      kamil     646:            break;
                    647:        }
1.75      rillig    648:
1.78      rillig    649:        while (*inp.s == '*' || isspace((unsigned char)*inp.s)) {
1.87      rillig    650:            if (*inp.s == '*')
                    651:                token_add_char('*');
1.32      rillig    652:            inbuf_skip();
1.16      kamil     653:        }
1.75      rillig    654:
1.16      kamil     655:        if (ps.in_decl) {
1.78      rillig    656:            char *tp = inp.s;
1.6       lukem     657:
1.16      kamil     658:            while (isalpha((unsigned char)*tp) ||
                    659:                   isspace((unsigned char)*tp)) {
1.78      rillig    660:                if (++tp >= inp.e)
1.81      rillig    661:                    inbuf_read_line();
1.16      kamil     662:            }
                    663:            if (*tp == '(')
                    664:                ps.procname[0] = ' ';
                    665:        }
1.75      rillig    666:
1.47      rillig    667:        ttype = unary_op;
1.16      kamil     668:        break;
1.1       cgd       669:
1.16      kamil     670:     default:
1.78      rillig    671:        if (token.s[0] == '/' && (*inp.s == '*' || *inp.s == '/')) {
1.16      kamil     672:            /* it is start of comment */
1.50      rillig    673:            *token.e++ = inbuf_next();
1.1       cgd       674:
1.47      rillig    675:            ttype = comment;
1.16      kamil     676:            unary_delim = state->last_u_d;
                    677:            break;
1.1       cgd       678:        }
1.75      rillig    679:
1.78      rillig    680:        while (token.e[-1] == *inp.s || *inp.s == '=') {
1.87      rillig    681:            /* handle '||', '&&', etc., and also things as in 'int *****i' */
                    682:            token_add_char(inbuf_next());
1.16      kamil     683:        }
1.75      rillig    684:
1.47      rillig    685:        ttype = state->last_u_d ? unary_op : binary_op;
1.16      kamil     686:        unary_delim = true;
1.47      rillig    687:     }
1.16      kamil     688:
1.78      rillig    689:     if (inp.s >= inp.e)        /* check for input buffer empty */
1.81      rillig    690:        inbuf_read_line();
1.75      rillig    691:
1.16      kamil     692:     state->last_u_d = unary_delim;
1.75      rillig    693:
1.25      rillig    694:     check_size_token(1);
1.50      rillig    695:     *token.e = '\0';
1.75      rillig    696:
1.47      rillig    697:     return lexi_end(ttype);
1.1       cgd       698: }
1.16      kamil     699:
1.6       lukem     700: void
1.64      rillig    701: add_typename(const char *name)
1.1       cgd       702: {
1.64      rillig    703:     if (typenames.len >= typenames.cap) {
                    704:        typenames.cap = 16 + 2 * typenames.cap;
                    705:        typenames.items = xrealloc(typenames.items,
                    706:            sizeof(typenames.items[0]) * typenames.cap);
                    707:     }
1.16      kamil     708:
1.84      rillig    709:     int pos = bsearch_typenames(name);
1.64      rillig    710:     if (pos >= 0)
                    711:        return;                 /* already in the list */
1.75      rillig    712:
1.64      rillig    713:     pos = -(pos + 1);
                    714:     memmove(typenames.items + pos + 1, typenames.items + pos,
1.73      rillig    715:        sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos));
1.64      rillig    716:     typenames.items[pos] = xstrdup(name);
1.1       cgd       717: }

CVSweb <webmaster@jp.NetBSD.org>