src/usr.bin/indent/lexi.c - annotate

Return to lexi.c CVS log
Up to [cvs.NetBSD.org] / src / usr.bin / indent
Annotation of src/usr.bin/indent/lexi.c, Revision 1.114

1.114   ! rillig      1: /*     $NetBSD: lexi.c,v 1.113 2021/10/29 21:31:29 rillig Exp $        */
1.3       tls         2:
1.16      kamil       3: /*-
                      4:  * SPDX-License-Identifier: BSD-4-Clause
                      5:  *
                      6:  * Copyright (c) 1985 Sun Microsystems, Inc.
1.5       mrg         7:  * Copyright (c) 1980, 1993
                      8:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd         9:  * All rights reserved.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the University of
                     22:  *     California, Berkeley and its contributors.
                     23:  * 4. Neither the name of the University nor the names of its contributors
                     24:  *    may be used to endorse or promote products derived from this software
                     25:  *    without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     28:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     29:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     30:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     31:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     32:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     33:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     34:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     35:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     36:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     37:  * SUCH DAMAGE.
                     38:  */
                     39:
1.16      kamil      40: #if 0
                     41: static char sccsid[] = "@(#)lexi.c     8.1 (Berkeley) 6/6/93";
                     42: #endif
                     43:
1.6       lukem      44: #include <sys/cdefs.h>
1.16      kamil      45: #if defined(__NetBSD__)
1.114   ! rillig     46: __RCSID("$NetBSD: lexi.c,v 1.113 2021/10/29 21:31:29 rillig Exp $");
1.16      kamil      47: #elif defined(__FreeBSD__)
                     48: __FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $");
                     49: #endif
1.1       cgd        50:
1.93      rillig     51: #include <sys/param.h>
1.20      rillig     52: #include <assert.h>
1.1       cgd        53: #include <stdio.h>
                     54: #include <ctype.h>
                     55: #include <stdlib.h>
                     56: #include <string.h>
1.16      kamil      57:
                     58: #include "indent.h"
1.1       cgd        59:
1.60      rillig     60: /* must be sorted alphabetically, is used in binary search */
1.62      rillig     61: static const struct keyword {
                     62:     const char *name;
                     63:     enum keyword_kind kind;
                     64: } keywords[] = {
                     65:     {"_Bool", kw_type},
                     66:     {"_Complex", kw_type},
                     67:     {"_Imaginary", kw_type},
                     68:     {"auto", kw_storage_class},
                     69:     {"bool", kw_type},
                     70:     {"break", kw_jump},
                     71:     {"case", kw_case_or_default},
                     72:     {"char", kw_type},
                     73:     {"complex", kw_type},
                     74:     {"const", kw_type},
                     75:     {"continue", kw_jump},
                     76:     {"default", kw_case_or_default},
1.97      rillig     77:     {"do", kw_do},
1.62      rillig     78:     {"double", kw_type},
1.97      rillig     79:     {"else", kw_else},
1.62      rillig     80:     {"enum", kw_struct_or_union_or_enum},
                     81:     {"extern", kw_storage_class},
                     82:     {"float", kw_type},
1.98      rillig     83:     {"for", kw_for},
1.62      rillig     84:     {"goto", kw_jump},
1.98      rillig     85:     {"if", kw_if},
1.62      rillig     86:     {"imaginary", kw_type},
                     87:     {"inline", kw_inline_or_restrict},
                     88:     {"int", kw_type},
                     89:     {"long", kw_type},
                     90:     {"offsetof", kw_offsetof},
                     91:     {"register", kw_storage_class},
                     92:     {"restrict", kw_inline_or_restrict},
                     93:     {"return", kw_jump},
                     94:     {"short", kw_type},
                     95:     {"signed", kw_type},
                     96:     {"sizeof", kw_sizeof},
                     97:     {"static", kw_storage_class},
                     98:     {"struct", kw_struct_or_union_or_enum},
                     99:     {"switch", kw_switch},
                    100:     {"typedef", kw_typedef},
                    101:     {"union", kw_struct_or_union_or_enum},
                    102:     {"unsigned", kw_type},
                    103:     {"void", kw_type},
                    104:     {"volatile", kw_type},
1.98      rillig    105:     {"while", kw_while}
1.1       cgd       106: };
                    107:
1.84      rillig    108: static struct {
1.64      rillig    109:     const char **items;
                    110:     unsigned int len;
                    111:     unsigned int cap;
                    112: } typenames;
1.16      kamil     113:
                    114: /*
                    115:  * The transition table below was rewritten by hand from lx's output, given
                    116:  * the following definitions. lx is Katherine Flavel's lexer generator.
                    117:  *
                    118:  * O  = /[0-7]/;        D  = /[0-9]/;          NZ = /[1-9]/;
                    119:  * H  = /[a-f0-9]/i;    B  = /[0-1]/;          HP = /0x/i;
                    120:  * BP = /0b/i;          E  = /e[+\-]?/i D+;    P  = /p[+\-]?/i D+;
                    121:  * FS = /[fl]/i;        IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
                    122:  *
                    123:  * D+           E  FS? -> $float;
                    124:  * D*    "." D+ E? FS? -> $float;
                    125:  * D+    "."    E? FS? -> $float;    HP H+           IS? -> $int;
                    126:  * HP H+        P  FS? -> $float;    NZ D*           IS? -> $int;
                    127:  * HP H* "." H+ P  FS? -> $float;    "0" O*          IS? -> $int;
                    128:  * HP H+ "."    P  FS  -> $float;    BP B+           IS? -> $int;
                    129:  */
1.71      rillig    130: /* INDENT OFF */
1.82      rillig    131: static const unsigned char lex_number_state[][26] = {
1.16      kamil     132:     /*                examples:
                    133:                                      00
                    134:              s                      0xx
                    135:              t                    00xaa
                    136:              a     11       101100xxa..
                    137:              r   11ee0001101lbuuxx.a.pp
                    138:              t.01.e+008bLuxll0Ll.aa.p+0
                    139:     states:  ABCDEFGHIJKLMNOPQRSTUVWXYZ */
1.83      rillig    140:     [0] =   "uuiifuufiuuiiuiiiiiuiuuuuu",      /* (other) */
                    141:     [1] =   "CEIDEHHHIJQ  U  Q  VUVVZZZ",      /* 0 */
                    142:     [2] =   "DEIDEHHHIJQ  U  Q  VUVVZZZ",      /* 1 */
                    143:     [3] =   "DEIDEHHHIJ   U     VUVVZZZ",      /* 2 3 4 5 6 7 */
                    144:     [4] =   "DEJDEHHHJJ   U     VUVVZZZ",      /* 8 9 */
                    145:     [5] =   "             U     VUVV   ",      /* A a C c D d */
                    146:     [6] =   "  K          U     VUVV   ",      /* B b */
                    147:     [7] =   "  FFF   FF   U     VUVV   ",      /* E e */
                    148:     [8] =   "    f  f     U     VUVV  f",      /* F f */
                    149:     [9] =   "  LLf  fL  PR   Li  L    f",      /* L */
                    150:     [10] =  "  OOf  fO   S P O i O    f",      /* l */
                    151:     [11] =  "                    FFX   ",      /* P p */
                    152:     [12] =  "  MM    M  i  iiM   M     ",      /* U u */
                    153:     [13] =  "  N                       ",      /* X x */
                    154:     [14] =  "     G                 Y  ",      /* + - */
                    155:     [15] =  "B EE    EE   T      W     ",      /* . */
1.16      kamil     156:     /*       ABCDEFGHIJKLMNOPQRSTUVWXYZ */
1.1       cgd       157: };
1.71      rillig    158: /* INDENT ON */
1.1       cgd       159:
1.82      rillig    160: static const uint8_t lex_number_row[] = {
1.56      rillig    161:     ['0'] = 1,
                    162:     ['1'] = 2,
                    163:     ['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3,
                    164:     ['8'] = 4, ['9'] = 4,
                    165:     ['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5,
                    166:     ['B'] = 6, ['b'] = 6,
                    167:     ['E'] = 7, ['e'] = 7,
                    168:     ['F'] = 8, ['f'] = 8,
                    169:     ['L'] = 9,
                    170:     ['l'] = 10,
                    171:     ['P'] = 11, ['p'] = 11,
                    172:     ['U'] = 12, ['u'] = 12,
                    173:     ['X'] = 13, ['x'] = 13,
                    174:     ['+'] = 14, ['-'] = 14,
                    175:     ['.'] = 15,
                    176: };
1.36      rillig    177:
1.32      rillig    178: static char
                    179: inbuf_peek(void)
                    180: {
1.78      rillig    181:     return *inp.s;
1.32      rillig    182: }
                    183:
1.66      rillig    184: void
1.32      rillig    185: inbuf_skip(void)
                    186: {
1.78      rillig    187:     inp.s++;
                    188:     if (inp.s >= inp.e)
1.81      rillig    189:        inbuf_read_line();
1.32      rillig    190: }
                    191:
1.66      rillig    192: char
1.32      rillig    193: inbuf_next(void)
                    194: {
                    195:     char ch = inbuf_peek();
                    196:     inbuf_skip();
                    197:     return ch;
                    198: }
                    199:
1.25      rillig    200: static void
                    201: check_size_token(size_t desired_size)
                    202: {
1.58      rillig    203:     if (token.e + desired_size >= token.l)
                    204:        buf_expand(&token, desired_size);
1.25      rillig    205: }
                    206:
1.87      rillig    207: static void
                    208: token_add_char(char ch)
                    209: {
                    210:     check_size_token(1);
                    211:     *token.e++ = ch;
                    212: }
                    213:
1.16      kamil     214: static int
1.62      rillig    215: cmp_keyword_by_name(const void *key, const void *elem)
1.16      kamil     216: {
1.62      rillig    217:     return strcmp(key, ((const struct keyword *)elem)->name);
1.27      rillig    218: }
                    219:
1.20      rillig    220: #ifdef debug
1.100     rillig    221: static const char *
                    222: lsym_name(lexer_symbol sym)
1.20      rillig    223: {
                    224:     static const char *const name[] = {
1.100     rillig    225:        "eof",
                    226:        "preprocessing",
                    227:        "newline",
                    228:        "form_feed",
                    229:        "comment",
                    230:        "lparen_or_lbracket",
                    231:        "rparen_or_rbracket",
                    232:        "lbrace",
                    233:        "rbrace",
                    234:        "period",
                    235:        "unary_op",
                    236:        "binary_op",
                    237:        "postfix_op",
                    238:        "question",
                    239:        "colon",
                    240:        "comma",
                    241:        "semicolon",
                    242:        "typedef",
                    243:        "storage_class",
                    244:        "type",
                    245:        "tag",
                    246:        "case_label",
                    247:        "string_prefix",
                    248:        "ident",
                    249:        "funcname",
                    250:        "do",
                    251:        "else",
                    252:        "for",
                    253:        "if",
                    254:        "switch",
                    255:        "while",
1.20      rillig    256:     };
                    257:
1.100     rillig    258:     assert(array_length(name) == (int)lsym_while + 1);
1.20      rillig    259:
1.100     rillig    260:     return name[sym];
1.20      rillig    261: }
                    262:
1.101     rillig    263: static const char *
1.103     rillig    264: kw_name(enum keyword_kind kw)
                    265: {
1.101     rillig    266:     static const char *name[] = {
                    267:        "0",
                    268:        "offsetof",
                    269:        "sizeof",
                    270:        "struct_or_union_or_enum",
                    271:        "type",
                    272:        "for",
                    273:        "if",
                    274:        "while",
                    275:        "do",
                    276:        "else",
                    277:        "switch",
                    278:        "case_or_default",
                    279:        "jump",
                    280:        "storage_class",
                    281:        "typedef",
                    282:        "inline_or_restrict",
                    283:     };
                    284:
                    285:     return name[kw];
                    286: }
                    287:
1.20      rillig    288: static void
1.72      rillig    289: debug_print_buf(const char *name, const struct buffer *buf)
1.20      rillig    290: {
1.72      rillig    291:     if (buf->s < buf->e) {
1.101     rillig    292:        debug_printf("%s ", name);
                    293:        debug_vis_range("\"", buf->s, buf->e, "\"\n");
1.20      rillig    294:     }
                    295: }
                    296:
1.112     rillig    297: #define debug_ps_bool(name) \
1.113     rillig    298:         if (ps.name != prev_ps.name) \
                    299:            debug_println("[%c] ps." #name, ps.name ? 'x' : ' ')
1.112     rillig    300: #define debug_ps_int(name) \
1.113     rillig    301:        if (ps.name != prev_ps.name) \
                    302:            debug_println("%3d ps." #name, ps.name)
1.112     rillig    303: #define debug_ps_keyword(name) \
                    304:        if (ps.name != kw_0) \
1.113     rillig    305:            debug_println("    ps." #name " = %s", kw_name(ps.name))
1.112     rillig    306:
1.101     rillig    307: static void
1.107     rillig    308: debug_lexi(lexer_symbol lsym)
1.20      rillig    309: {
1.113     rillig    310:     /*
                    311:      * Watch out for 'rolled back parser state' in the debug output; the
                    312:      * differences around these are unreliable.
                    313:      */
                    314:     static struct parser_state prev_ps;
                    315:
1.104     rillig    316:     debug_println("");
1.101     rillig    317:     debug_printf("line %d\n", line_no);
1.72      rillig    318:     debug_print_buf("label", &lab);
                    319:     debug_print_buf("code", &code);
                    320:     debug_print_buf("comment", &com);
1.114   ! rillig    321:     debug_printf("lexi: %s", lsym_name(lsym));
1.112     rillig    322:     debug_vis_range(" \"", token.s, token.e, "\"\n");
                    323:
                    324:     // prev_token
                    325:     debug_ps_bool(prev_newline);
                    326:     debug_ps_bool(prev_col_1);
                    327:     debug_ps_keyword(prev_keyword);
                    328:     debug_ps_keyword(curr_keyword);
                    329:     debug_ps_bool(next_unary);
                    330:     // procname
                    331:     debug_ps_bool(want_blank);
                    332:     debug_ps_int(paren_level);
                    333:     debug_ps_int(p_l_follow);
                    334:     // paren_indents
                    335:     debug_ps_int(cast_mask);
                    336:     debug_ps_int(not_cast_mask);
                    337:
                    338:     debug_ps_int(comment_delta);
                    339:     debug_ps_int(n_comment_delta);
                    340:     debug_ps_int(com_ind);
                    341:
                    342:     debug_ps_bool(block_init);
                    343:     debug_ps_int(block_init_level);
                    344:     debug_ps_bool(init_or_struct);
                    345:
                    346:     debug_ps_int(ind_level);
                    347:     debug_ps_int(ind_level_follow);
                    348:
                    349:     debug_ps_int(decl_nest);
                    350:     debug_ps_bool(decl_on_line);
                    351:     debug_ps_bool(in_decl);
                    352:     debug_ps_int(just_saw_decl);
                    353:     debug_ps_bool(in_parameter_declaration);
                    354:     debug_ps_bool(decl_indent_done);
                    355:
                    356:     debug_ps_bool(in_stmt);
                    357:     debug_ps_bool(ind_stmt);
                    358:     debug_ps_bool(is_case_label);
                    359:
                    360:     debug_ps_bool(search_stmt);
1.113     rillig    361:
                    362:     prev_ps = ps;
1.101     rillig    363: }
1.96      rillig    364: #endif
1.20      rillig    365:
1.104     rillig    366: /* ARGSUSED */
1.101     rillig    367: static lexer_symbol
1.107     rillig    368: lexi_end(lexer_symbol lsym)
1.101     rillig    369: {
                    370: #ifdef debug
1.107     rillig    371:     debug_lexi(lsym);
1.101     rillig    372: #endif
1.100     rillig    373:     return lsym;
1.20      rillig    374: }
                    375:
1.43      rillig    376: static void
                    377: lex_number(void)
                    378: {
1.71      rillig    379:     for (uint8_t s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
1.78      rillig    380:        uint8_t ch = (uint8_t)*inp.s;
1.94      rillig    381:        if (ch >= array_length(lex_number_row) || lex_number_row[ch] == 0)
1.56      rillig    382:            break;
1.75      rillig    383:
1.82      rillig    384:        uint8_t row = lex_number_row[ch];
                    385:        if (lex_number_state[row][s - 'A'] == ' ') {
1.71      rillig    386:            /*-
1.82      rillig    387:             * lex_number_state[0][s - 'A'] now indicates the type:
1.74      rillig    388:             * f = floating, i = integer, u = unknown
1.56      rillig    389:             */
1.43      rillig    390:            break;
                    391:        }
1.75      rillig    392:
1.82      rillig    393:        s = lex_number_state[row][s - 'A'];
1.87      rillig    394:        token_add_char(inbuf_next());
1.43      rillig    395:     }
                    396: }
                    397:
                    398: static void
                    399: lex_word(void)
                    400: {
1.78      rillig    401:     while (isalnum((unsigned char)*inp.s) ||
1.95      rillig    402:            *inp.s == '\\' ||
                    403:            *inp.s == '_' || *inp.s == '$') {
1.75      rillig    404:
1.78      rillig    405:        if (*inp.s == '\\') {
                    406:            if (inp.s[1] == '\n') {
                    407:                inp.s += 2;
                    408:                if (inp.s >= inp.e)
1.81      rillig    409:                    inbuf_read_line();
1.43      rillig    410:            } else
                    411:                break;
                    412:        }
1.75      rillig    413:
1.87      rillig    414:        token_add_char(inbuf_next());
1.43      rillig    415:     }
                    416: }
                    417:
                    418: static void
                    419: lex_char_or_string(void)
                    420: {
1.52      rillig    421:     for (char delim = *token.s;;) {
1.78      rillig    422:        if (*inp.s == '\n') {
1.52      rillig    423:            diag(1, "Unterminated literal");
                    424:            return;
                    425:        }
1.75      rillig    426:
1.87      rillig    427:        token_add_char(inbuf_next());
1.52      rillig    428:        if (token.e[-1] == delim)
                    429:            return;
1.75      rillig    430:
1.52      rillig    431:        if (token.e[-1] == '\\') {
1.78      rillig    432:            if (*inp.s == '\n')
1.52      rillig    433:                ++line_no;
1.87      rillig    434:            token_add_char(inbuf_next());
1.52      rillig    435:        }
                    436:     }
1.43      rillig    437: }
                    438:
1.84      rillig    439: /* Guess whether the current token is a declared type. */
1.57      rillig    440: static bool
1.107     rillig    441: probably_typename(void)
1.57      rillig    442: {
1.109     rillig    443:     if (ps.p_l_follow > 0)
1.70      rillig    444:        return false;
1.107     rillig    445:     if (ps.block_init || ps.in_stmt)
1.70      rillig    446:        return false;
1.78      rillig    447:     if (inp.s[0] == '*' && inp.s[1] != '=')
1.70      rillig    448:        goto maybe;
1.78      rillig    449:     if (isalpha((unsigned char)*inp.s))
1.70      rillig    450:        goto maybe;
                    451:     return false;
                    452: maybe:
1.110     rillig    453:     return ps.prev_token == lsym_semicolon ||
                    454:        ps.prev_token == lsym_lbrace ||
                    455:        ps.prev_token == lsym_rbrace;
1.57      rillig    456: }
                    457:
1.84      rillig    458: static int
                    459: bsearch_typenames(const char *key)
                    460: {
                    461:     const char **arr = typenames.items;
                    462:     int lo = 0;
                    463:     int hi = (int)typenames.len - 1;
                    464:
                    465:     while (lo <= hi) {
                    466:        int mid = (int)((unsigned)(lo + hi) >> 1);
                    467:        int cmp = strcmp(arr[mid], key);
                    468:        if (cmp < 0)
                    469:            lo = mid + 1;
                    470:        else if (cmp > 0)
                    471:            hi = mid - 1;
                    472:        else
                    473:            return mid;
                    474:     }
                    475:     return -(lo + 1);
                    476: }
                    477:
1.63      rillig    478: static bool
                    479: is_typename(void)
                    480: {
1.84      rillig    481:     if (opt.auto_typedefs &&
                    482:        token.e - token.s >= 2 && memcmp(token.e - 2, "_t", 2) == 0)
                    483:        return true;
1.63      rillig    484:
1.84      rillig    485:     return bsearch_typenames(token.s) >= 0;
1.63      rillig    486: }
                    487:
1.90      rillig    488: /* Read an alphanumeric token into 'token', or return end_of_file. */
1.100     rillig    489: static lexer_symbol
1.107     rillig    490: lexi_alnum(void)
1.1       cgd       491: {
1.89      rillig    492:     if (isdigit((unsigned char)*inp.s) ||
                    493:        (inp.s[0] == '.' && isdigit((unsigned char)inp.s[1]))) {
                    494:        lex_number();
1.103     rillig    495:     } else if (isalnum((unsigned char)*inp.s) ||
                    496:            *inp.s == '_' || *inp.s == '$') {
1.89      rillig    497:        lex_word();
1.102     rillig    498:     } else
                    499:        return lsym_eof;        /* just as a placeholder */
                    500:
1.89      rillig    501:     *token.e = '\0';
1.16      kamil     502:
1.89      rillig    503:     if (token.s[0] == 'L' && token.s[1] == '\0' &&
                    504:        (*inp.s == '"' || *inp.s == '\''))
1.100     rillig    505:        return lsym_string_prefix;
1.16      kamil     506:
1.111     rillig    507:     while (ch_isblank(inbuf_peek()))
1.32      rillig    508:        inbuf_skip();
1.89      rillig    509:
1.110     rillig    510:     if (ps.prev_token == lsym_tag && ps.p_l_follow == 0) {
1.107     rillig    511:        ps.next_unary = true;
1.100     rillig    512:        return lsym_type;
1.16      kamil     513:     }
1.6       lukem     514:
1.89      rillig    515:     /* Operator after identifier is binary unless last token was 'struct'. */
1.110     rillig    516:     ps.next_unary = ps.prev_token == lsym_tag;
1.16      kamil     517:
1.89      rillig    518:     const struct keyword *kw = bsearch(token.s, keywords,
1.94      rillig    519:        array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name);
1.89      rillig    520:     if (kw == NULL) {
                    521:        if (is_typename()) {
1.107     rillig    522:            ps.curr_keyword = kw_type;
                    523:            ps.next_unary = true;
1.89      rillig    524:            goto found_typename;
1.16      kamil     525:        }
1.89      rillig    526:
                    527:     } else {                   /* we have a keyword */
1.107     rillig    528:        ps.curr_keyword = kw->kind;
                    529:        ps.next_unary = true;
1.89      rillig    530:
                    531:        switch (kw->kind) {
                    532:        case kw_switch:
1.100     rillig    533:            return lsym_switch;
1.89      rillig    534:
                    535:        case kw_case_or_default:
1.100     rillig    536:            return lsym_case_label;
1.89      rillig    537:
                    538:        case kw_struct_or_union_or_enum:
                    539:        case kw_type:
                    540:     found_typename:
1.109     rillig    541:            if (ps.p_l_follow > 0) {
1.108     rillig    542:                /* inside parentheses: cast, param list, offsetof or sizeof */
1.107     rillig    543:                ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask;
1.89      rillig    544:            }
1.110     rillig    545:            if (ps.prev_token == lsym_period ||
                    546:                    ps.prev_token == lsym_unary_op)
1.89      rillig    547:                break;
                    548:            if (kw != NULL && kw->kind == kw_struct_or_union_or_enum)
1.100     rillig    549:                return lsym_tag;
1.109     rillig    550:            if (ps.p_l_follow > 0)
1.89      rillig    551:                break;
1.100     rillig    552:            return lsym_type;
1.75      rillig    553:
1.98      rillig    554:        case kw_for:
1.100     rillig    555:            return lsym_for;
1.98      rillig    556:
                    557:        case kw_if:
1.100     rillig    558:            return lsym_if;
1.98      rillig    559:
                    560:        case kw_while:
1.100     rillig    561:            return lsym_while;
1.75      rillig    562:
1.97      rillig    563:        case kw_do:
1.100     rillig    564:            return lsym_do;
1.97      rillig    565:
                    566:        case kw_else:
1.100     rillig    567:            return lsym_else;
1.16      kamil     568:
1.89      rillig    569:        case kw_storage_class:
1.100     rillig    570:            return lsym_storage_class;
1.16      kamil     571:
1.89      rillig    572:        case kw_typedef:
1.100     rillig    573:            return lsym_typedef;
1.16      kamil     574:
1.89      rillig    575:        default:                /* all others are treated like any other
1.16      kamil     576:                                 * identifier */
1.100     rillig    577:            return lsym_ident;
1.90      rillig    578:        }
                    579:     }
1.89      rillig    580:
1.107     rillig    581:     if (*inp.s == '(' && ps.tos <= 1 && ps.ind_level == 0 &&
                    582:        !ps.in_parameter_declaration && !ps.block_init) {
1.89      rillig    583:
                    584:        for (const char *p = inp.s; p < inp.e;)
                    585:            if (*p++ == ')' && (*p == ';' || *p == ','))
                    586:                goto not_proc;
                    587:
1.107     rillig    588:        strncpy(ps.procname, token.s, sizeof ps.procname - 1);
                    589:        if (ps.in_decl)
                    590:            ps.in_parameter_declaration = true;
1.100     rillig    591:        return lsym_funcname;
1.89      rillig    592: not_proc:;
                    593:
1.107     rillig    594:     } else if (probably_typename()) {
                    595:        ps.curr_keyword = kw_type;
                    596:        ps.next_unary = true;
1.100     rillig    597:        return lsym_type;
1.89      rillig    598:     }
                    599:
1.110     rillig    600:     if (ps.prev_token == lsym_type)    /* if this is a declared variable,
1.89      rillig    601:                                         * then following sign is unary */
1.107     rillig    602:        ps.next_unary = true;   /* will make "int a -1" work */
1.89      rillig    603:
1.100     rillig    604:     return lsym_ident;         /* the ident is not in the list */
1.89      rillig    605: }
1.75      rillig    606:
1.89      rillig    607: /* Reads the next token, placing it in the global variable "token". */
1.100     rillig    608: lexer_symbol
1.106     rillig    609: lexi(void)
1.89      rillig    610: {
1.90      rillig    611:     token.e = token.s;
1.110     rillig    612:     ps.prev_col_1 = ps.prev_newline;
                    613:     ps.prev_newline = false;
1.107     rillig    614:     ps.prev_keyword = ps.curr_keyword;
                    615:     ps.curr_keyword = kw_0;
1.75      rillig    616:
1.111     rillig    617:     while (ch_isblank(*inp.s)) {
1.110     rillig    618:        ps.prev_col_1 = false;
1.89      rillig    619:        inbuf_skip();
                    620:     }
1.75      rillig    621:
1.107     rillig    622:     lexer_symbol alnum_lsym = lexi_alnum();
1.100     rillig    623:     if (alnum_lsym != lsym_eof)
1.107     rillig    624:        return lexi_end(alnum_lsym);
1.16      kamil     625:
                    626:     /* Scan a non-alphanumeric token */
                    627:
1.90      rillig    628:     check_size_token(3);       /* for things like "<<=" */
                    629:     *token.e++ = inbuf_next();
1.50      rillig    630:     *token.e = '\0';
1.16      kamil     631:
1.100     rillig    632:     lexer_symbol lsym;
1.89      rillig    633:     bool unary_delim = false;  /* whether the current token forces a
                    634:                                 * following operator to be unary */
                    635:
1.50      rillig    636:     switch (*token.s) {
1.16      kamil     637:     case '\n':
1.107     rillig    638:        unary_delim = ps.next_unary;
1.110     rillig    639:        ps.prev_newline = true;
1.47      rillig    640:        /* if data has been exhausted, the newline is a dummy. */
1.100     rillig    641:        lsym = had_eof ? lsym_eof : lsym_newline;
1.16      kamil     642:        break;
                    643:
1.43      rillig    644:     case '\'':
                    645:     case '"':
1.44      rillig    646:        lex_char_or_string();
1.100     rillig    647:        lsym = lsym_ident;
1.16      kamil     648:        break;
1.6       lukem     649:
1.40      rillig    650:     case '(':
                    651:     case '[':
1.16      kamil     652:        unary_delim = true;
1.100     rillig    653:        lsym = lsym_lparen_or_lbracket;
1.16      kamil     654:        break;
                    655:
1.40      rillig    656:     case ')':
                    657:     case ']':
1.100     rillig    658:        lsym = lsym_rparen_or_rbracket;
1.16      kamil     659:        break;
                    660:
                    661:     case '#':
1.107     rillig    662:        unary_delim = ps.next_unary;
1.100     rillig    663:        lsym = lsym_preprocessing;
1.16      kamil     664:        break;
                    665:
                    666:     case '?':
                    667:        unary_delim = true;
1.100     rillig    668:        lsym = lsym_question;
1.16      kamil     669:        break;
                    670:
1.40      rillig    671:     case ':':
1.100     rillig    672:        lsym = lsym_colon;
1.16      kamil     673:        unary_delim = true;
                    674:        break;
                    675:
1.40      rillig    676:     case ';':
1.16      kamil     677:        unary_delim = true;
1.100     rillig    678:        lsym = lsym_semicolon;
1.16      kamil     679:        break;
                    680:
1.40      rillig    681:     case '{':
1.16      kamil     682:        unary_delim = true;
1.100     rillig    683:        lsym = lsym_lbrace;
1.16      kamil     684:        break;
                    685:
1.40      rillig    686:     case '}':
1.16      kamil     687:        unary_delim = true;
1.100     rillig    688:        lsym = lsym_rbrace;
1.16      kamil     689:        break;
                    690:
1.69      rillig    691:     case '\f':
1.107     rillig    692:        unary_delim = ps.next_unary;
1.110     rillig    693:        ps.prev_newline = true;
1.100     rillig    694:        lsym = lsym_form_feed;
1.16      kamil     695:        break;
                    696:
1.40      rillig    697:     case ',':
1.16      kamil     698:        unary_delim = true;
1.100     rillig    699:        lsym = lsym_comma;
1.16      kamil     700:        break;
                    701:
                    702:     case '.':
                    703:        unary_delim = false;
1.100     rillig    704:        lsym = lsym_period;
1.16      kamil     705:        break;
1.1       cgd       706:
1.16      kamil     707:     case '-':
1.90      rillig    708:     case '+':
1.107     rillig    709:        lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
1.16      kamil     710:        unary_delim = true;
                    711:
1.90      rillig    712:        if (*inp.s == token.s[0]) {     /* ++, -- */
1.78      rillig    713:            *token.e++ = *inp.s++;
1.110     rillig    714:            if (ps.prev_token == lsym_ident ||
                    715:                    ps.prev_token == lsym_rparen_or_rbracket) {
1.107     rillig    716:                lsym = ps.next_unary ? lsym_unary_op : lsym_postfix_op;
1.1       cgd       717:                unary_delim = false;
1.16      kamil     718:            }
1.75      rillig    719:
1.90      rillig    720:        } else if (*inp.s == '=') {     /* += */
1.78      rillig    721:            *token.e++ = *inp.s++;
1.75      rillig    722:
1.90      rillig    723:        } else if (*inp.s == '>') {     /* -> */
1.78      rillig    724:            *token.e++ = *inp.s++;
1.16      kamil     725:            unary_delim = false;
1.100     rillig    726:            lsym = lsym_unary_op;
1.107     rillig    727:            ps.want_blank = false;
1.16      kamil     728:        }
1.90      rillig    729:        break;
1.16      kamil     730:
                    731:     case '=':
1.107     rillig    732:        if (ps.init_or_struct)
                    733:            ps.block_init = true;
1.78      rillig    734:        if (*inp.s == '=') {    /* == */
                    735:            *token.e++ = *inp.s++;
1.67      rillig    736:            *token.e = '\0';
1.16      kamil     737:        }
1.100     rillig    738:        lsym = lsym_binary_op;
1.16      kamil     739:        unary_delim = true;
                    740:        break;
                    741:
                    742:     case '>':
                    743:     case '<':
                    744:     case '!':                  /* ops like <, <<, <=, !=, etc */
1.78      rillig    745:        if (*inp.s == '>' || *inp.s == '<' || *inp.s == '=')
1.50      rillig    746:            *token.e++ = inbuf_next();
1.78      rillig    747:        if (*inp.s == '=')
                    748:            *token.e++ = *inp.s++;
1.107     rillig    749:        lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
1.16      kamil     750:        unary_delim = true;
                    751:        break;
                    752:
                    753:     case '*':
                    754:        unary_delim = true;
1.107     rillig    755:        if (!ps.next_unary) {
1.78      rillig    756:            if (*inp.s == '=')
                    757:                *token.e++ = *inp.s++;
1.100     rillig    758:            lsym = lsym_binary_op;
1.16      kamil     759:            break;
                    760:        }
1.75      rillig    761:
1.78      rillig    762:        while (*inp.s == '*' || isspace((unsigned char)*inp.s)) {
1.87      rillig    763:            if (*inp.s == '*')
                    764:                token_add_char('*');
1.32      rillig    765:            inbuf_skip();
1.16      kamil     766:        }
1.75      rillig    767:
1.16      kamil     768:        if (ps.in_decl) {
1.78      rillig    769:            char *tp = inp.s;
1.6       lukem     770:
1.16      kamil     771:            while (isalpha((unsigned char)*tp) ||
1.103     rillig    772:                    isspace((unsigned char)*tp)) {
1.78      rillig    773:                if (++tp >= inp.e)
1.81      rillig    774:                    inbuf_read_line();
1.16      kamil     775:            }
                    776:            if (*tp == '(')
                    777:                ps.procname[0] = ' ';
                    778:        }
1.75      rillig    779:
1.100     rillig    780:        lsym = lsym_unary_op;
1.16      kamil     781:        break;
1.1       cgd       782:
1.16      kamil     783:     default:
1.78      rillig    784:        if (token.s[0] == '/' && (*inp.s == '*' || *inp.s == '/')) {
1.16      kamil     785:            /* it is start of comment */
1.50      rillig    786:            *token.e++ = inbuf_next();
1.1       cgd       787:
1.100     rillig    788:            lsym = lsym_comment;
1.107     rillig    789:            unary_delim = ps.next_unary;
1.16      kamil     790:            break;
1.1       cgd       791:        }
1.75      rillig    792:
1.78      rillig    793:        while (token.e[-1] == *inp.s || *inp.s == '=') {
1.87      rillig    794:            /* handle '||', '&&', etc., and also things as in 'int *****i' */
                    795:            token_add_char(inbuf_next());
1.16      kamil     796:        }
1.75      rillig    797:
1.107     rillig    798:        lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
1.16      kamil     799:        unary_delim = true;
1.47      rillig    800:     }
1.16      kamil     801:
1.95      rillig    802:     if (inp.s >= inp.e)                /* check for input buffer empty */
1.81      rillig    803:        inbuf_read_line();
1.75      rillig    804:
1.107     rillig    805:     ps.next_unary = unary_delim;
1.75      rillig    806:
1.25      rillig    807:     check_size_token(1);
1.50      rillig    808:     *token.e = '\0';
1.75      rillig    809:
1.107     rillig    810:     return lexi_end(lsym);
1.1       cgd       811: }
1.16      kamil     812:
1.6       lukem     813: void
1.64      rillig    814: add_typename(const char *name)
1.1       cgd       815: {
1.64      rillig    816:     if (typenames.len >= typenames.cap) {
                    817:        typenames.cap = 16 + 2 * typenames.cap;
                    818:        typenames.items = xrealloc(typenames.items,
                    819:            sizeof(typenames.items[0]) * typenames.cap);
                    820:     }
1.16      kamil     821:
1.84      rillig    822:     int pos = bsearch_typenames(name);
1.64      rillig    823:     if (pos >= 0)
                    824:        return;                 /* already in the list */
1.75      rillig    825:
1.64      rillig    826:     pos = -(pos + 1);
                    827:     memmove(typenames.items + pos + 1, typenames.items + pos,
1.73      rillig    828:        sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos));
1.64      rillig    829:     typenames.items[pos] = xstrdup(name);
1.1       cgd       830: }
CVSweb <webmaster@jp.NetBSD.org>