Annotation of src/usr.bin/indent/lexi.c, Revision 1.91
1.91 ! rillig 1: /* $NetBSD: lexi.c,v 1.90 2021/10/11 20:13:46 rillig Exp $ */
1.3 tls 2:
1.16 kamil 3: /*-
4: * SPDX-License-Identifier: BSD-4-Clause
5: *
6: * Copyright (c) 1985 Sun Microsystems, Inc.
1.5 mrg 7: * Copyright (c) 1980, 1993
8: * The Regents of the University of California. All rights reserved.
1.1 cgd 9: * All rights reserved.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the University of
22: * California, Berkeley and its contributors.
23: * 4. Neither the name of the University nor the names of its contributors
24: * may be used to endorse or promote products derived from this software
25: * without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37: * SUCH DAMAGE.
38: */
39:
1.16 kamil 40: #if 0
41: static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
42: #endif
43:
1.6 lukem 44: #include <sys/cdefs.h>
1.16 kamil 45: #if defined(__NetBSD__)
1.91 ! rillig 46: __RCSID("$NetBSD: lexi.c,v 1.90 2021/10/11 20:13:46 rillig Exp $");
1.16 kamil 47: #elif defined(__FreeBSD__)
48: __FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $");
49: #endif
1.1 cgd 50:
1.20 rillig 51: #include <assert.h>
1.1 cgd 52: #include <stdio.h>
53: #include <ctype.h>
54: #include <stdlib.h>
55: #include <string.h>
1.16 kamil 56: #include <sys/param.h>
57:
58: #include "indent.h"
1.1 cgd 59:
1.60 rillig 60: /* must be sorted alphabetically, is used in binary search */
1.62 rillig 61: static const struct keyword {
62: const char *name;
63: enum keyword_kind kind;
64: } keywords[] = {
65: {"_Bool", kw_type},
66: {"_Complex", kw_type},
67: {"_Imaginary", kw_type},
68: {"auto", kw_storage_class},
69: {"bool", kw_type},
70: {"break", kw_jump},
71: {"case", kw_case_or_default},
72: {"char", kw_type},
73: {"complex", kw_type},
74: {"const", kw_type},
75: {"continue", kw_jump},
76: {"default", kw_case_or_default},
77: {"do", kw_do_or_else},
78: {"double", kw_type},
79: {"else", kw_do_or_else},
80: {"enum", kw_struct_or_union_or_enum},
81: {"extern", kw_storage_class},
82: {"float", kw_type},
83: {"for", kw_for_or_if_or_while},
84: {"goto", kw_jump},
85: {"if", kw_for_or_if_or_while},
86: {"imaginary", kw_type},
87: {"inline", kw_inline_or_restrict},
88: {"int", kw_type},
89: {"long", kw_type},
90: {"offsetof", kw_offsetof},
91: {"register", kw_storage_class},
92: {"restrict", kw_inline_or_restrict},
93: {"return", kw_jump},
94: {"short", kw_type},
95: {"signed", kw_type},
96: {"sizeof", kw_sizeof},
97: {"static", kw_storage_class},
98: {"struct", kw_struct_or_union_or_enum},
99: {"switch", kw_switch},
100: {"typedef", kw_typedef},
101: {"union", kw_struct_or_union_or_enum},
102: {"unsigned", kw_type},
103: {"void", kw_type},
104: {"volatile", kw_type},
105: {"while", kw_for_or_if_or_while}
1.1 cgd 106: };
107:
1.84 rillig 108: static struct {
1.64 rillig 109: const char **items;
110: unsigned int len;
111: unsigned int cap;
112: } typenames;
1.16 kamil 113:
114: /*
115: * The transition table below was rewritten by hand from lx's output, given
116: * the following definitions. lx is Katherine Flavel's lexer generator.
117: *
118: * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/;
119: * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i;
120: * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+;
121: * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
122: *
123: * D+ E FS? -> $float;
124: * D* "." D+ E? FS? -> $float;
125: * D+ "." E? FS? -> $float; HP H+ IS? -> $int;
126: * HP H+ P FS? -> $float; NZ D* IS? -> $int;
127: * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int;
128: * HP H+ "." P FS -> $float; BP B+ IS? -> $int;
129: */
1.71 rillig 130: /* INDENT OFF */
1.82 rillig 131: static const unsigned char lex_number_state[][26] = {
1.16 kamil 132: /* examples:
133: 00
134: s 0xx
135: t 00xaa
136: a 11 101100xxa..
137: r 11ee0001101lbuuxx.a.pp
138: t.01.e+008bLuxll0Ll.aa.p+0
139: states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */
1.83 rillig 140: [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", /* (other) */
141: [1] = "CEIDEHHHIJQ U Q VUVVZZZ", /* 0 */
142: [2] = "DEIDEHHHIJQ U Q VUVVZZZ", /* 1 */
143: [3] = "DEIDEHHHIJ U VUVVZZZ", /* 2 3 4 5 6 7 */
144: [4] = "DEJDEHHHJJ U VUVVZZZ", /* 8 9 */
145: [5] = " U VUVV ", /* A a C c D d */
146: [6] = " K U VUVV ", /* B b */
147: [7] = " FFF FF U VUVV ", /* E e */
148: [8] = " f f U VUVV f", /* F f */
149: [9] = " LLf fL PR Li L f", /* L */
150: [10] = " OOf fO S P O i O f", /* l */
151: [11] = " FFX ", /* P p */
152: [12] = " MM M i iiM M ", /* U u */
153: [13] = " N ", /* X x */
154: [14] = " G Y ", /* + - */
155: [15] = "B EE EE T W ", /* . */
1.16 kamil 156: /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
1.1 cgd 157: };
1.71 rillig 158: /* INDENT ON */
1.1 cgd 159:
1.82 rillig 160: static const uint8_t lex_number_row[] = {
1.56 rillig 161: ['0'] = 1,
162: ['1'] = 2,
163: ['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3,
164: ['8'] = 4, ['9'] = 4,
165: ['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5,
166: ['B'] = 6, ['b'] = 6,
167: ['E'] = 7, ['e'] = 7,
168: ['F'] = 8, ['f'] = 8,
169: ['L'] = 9,
170: ['l'] = 10,
171: ['P'] = 11, ['p'] = 11,
172: ['U'] = 12, ['u'] = 12,
173: ['X'] = 13, ['x'] = 13,
174: ['+'] = 14, ['-'] = 14,
175: ['.'] = 15,
176: };
1.36 rillig 177:
1.32 rillig 178: static char
179: inbuf_peek(void)
180: {
1.78 rillig 181: return *inp.s;
1.32 rillig 182: }
183:
1.66 rillig 184: void
1.32 rillig 185: inbuf_skip(void)
186: {
1.78 rillig 187: inp.s++;
188: if (inp.s >= inp.e)
1.81 rillig 189: inbuf_read_line();
1.32 rillig 190: }
191:
1.66 rillig 192: char
1.32 rillig 193: inbuf_next(void)
194: {
195: char ch = inbuf_peek();
196: inbuf_skip();
197: return ch;
198: }
199:
1.25 rillig 200: static void
201: check_size_token(size_t desired_size)
202: {
1.58 rillig 203: if (token.e + desired_size >= token.l)
204: buf_expand(&token, desired_size);
1.25 rillig 205: }
206:
1.87 rillig 207: static void
208: token_add_char(char ch)
209: {
210: check_size_token(1);
211: *token.e++ = ch;
212: }
213:
1.16 kamil 214: static int
1.62 rillig 215: cmp_keyword_by_name(const void *key, const void *elem)
1.16 kamil 216: {
1.62 rillig 217: return strcmp(key, ((const struct keyword *)elem)->name);
1.27 rillig 218: }
219:
1.20 rillig 220: #ifdef debug
221: const char *
1.47 rillig 222: token_type_name(token_type ttype)
1.20 rillig 223: {
224: static const char *const name[] = {
1.79 rillig 225: "end_of_file", "newline", "lparen_or_lbracket", "rparen_or_rbracket",
226: "unary_op", "binary_op", "postfix_op", "question",
227: "case_label", "colon",
1.20 rillig 228: "semicolon", "lbrace", "rbrace", "ident", "comma",
1.31 rillig 229: "comment", "switch_expr", "preprocessing", "form_feed", "decl",
1.29 rillig 230: "keyword_for_if_while", "keyword_do_else",
1.30 rillig 231: "if_expr", "while_expr", "for_exprs",
232: "stmt", "stmt_list", "keyword_else", "keyword_do", "do_stmt",
1.31 rillig 233: "if_expr_stmt", "if_expr_stmt_else", "period", "string_prefix",
234: "storage_class", "funcname", "type_def", "keyword_struct_union_enum"
1.20 rillig 235: };
236:
1.62 rillig 237: assert(0 <= ttype && ttype < nitems(name));
1.20 rillig 238:
1.47 rillig 239: return name[ttype];
1.20 rillig 240: }
241:
242: static void
1.72 rillig 243: debug_print_buf(const char *name, const struct buffer *buf)
1.20 rillig 244: {
1.72 rillig 245: if (buf->s < buf->e) {
1.39 rillig 246: debug_printf(" %s ", name);
1.72 rillig 247: debug_vis_range("\"", buf->s, buf->e, "\"");
1.20 rillig 248: }
249: }
250:
251: static token_type
1.47 rillig 252: lexi_end(token_type ttype)
1.20 rillig 253: {
1.39 rillig 254: debug_printf("in line %d, lexi returns '%s'",
1.47 rillig 255: line_no, token_type_name(ttype));
1.72 rillig 256: debug_print_buf("token", &token);
257: debug_print_buf("label", &lab);
258: debug_print_buf("code", &code);
259: debug_print_buf("comment", &com);
1.39 rillig 260: debug_printf("\n");
1.20 rillig 261:
1.47 rillig 262: return ttype;
1.20 rillig 263: }
264: #else
1.71 rillig 265: #define lexi_end(tk) (tk)
1.20 rillig 266: #endif
267:
1.43 rillig 268: static void
269: lex_number(void)
270: {
1.71 rillig 271: for (uint8_t s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
1.78 rillig 272: uint8_t ch = (uint8_t)*inp.s;
1.82 rillig 273: if (ch >= nitems(lex_number_row) || lex_number_row[ch] == 0)
1.56 rillig 274: break;
1.75 rillig 275:
1.82 rillig 276: uint8_t row = lex_number_row[ch];
277: if (lex_number_state[row][s - 'A'] == ' ') {
1.71 rillig 278: /*-
1.82 rillig 279: * lex_number_state[0][s - 'A'] now indicates the type:
1.74 rillig 280: * f = floating, i = integer, u = unknown
1.56 rillig 281: */
1.43 rillig 282: break;
283: }
1.75 rillig 284:
1.82 rillig 285: s = lex_number_state[row][s - 'A'];
1.87 rillig 286: token_add_char(inbuf_next());
1.43 rillig 287: }
288: }
289:
290: static void
291: lex_word(void)
292: {
1.78 rillig 293: while (isalnum((unsigned char)*inp.s) ||
294: *inp.s == '\\' ||
295: *inp.s == '_' || *inp.s == '$') {
1.75 rillig 296:
1.78 rillig 297: if (*inp.s == '\\') {
298: if (inp.s[1] == '\n') {
299: inp.s += 2;
300: if (inp.s >= inp.e)
1.81 rillig 301: inbuf_read_line();
1.43 rillig 302: } else
303: break;
304: }
1.75 rillig 305:
1.87 rillig 306: token_add_char(inbuf_next());
1.43 rillig 307: }
308: }
309:
310: static void
311: lex_char_or_string(void)
312: {
1.52 rillig 313: for (char delim = *token.s;;) {
1.78 rillig 314: if (*inp.s == '\n') {
1.52 rillig 315: diag(1, "Unterminated literal");
316: return;
317: }
1.75 rillig 318:
1.87 rillig 319: token_add_char(inbuf_next());
1.52 rillig 320: if (token.e[-1] == delim)
321: return;
1.75 rillig 322:
1.52 rillig 323: if (token.e[-1] == '\\') {
1.78 rillig 324: if (*inp.s == '\n')
1.52 rillig 325: ++line_no;
1.87 rillig 326: token_add_char(inbuf_next());
1.52 rillig 327: }
328: }
1.43 rillig 329: }
330:
1.84 rillig 331: /* Guess whether the current token is a declared type. */
1.57 rillig 332: static bool
1.84 rillig 333: probably_typename(const struct parser_state *state)
1.57 rillig 334: {
1.70 rillig 335: if (state->p_l_follow != 0)
336: return false;
337: if (state->block_init || state->in_stmt)
338: return false;
1.78 rillig 339: if (inp.s[0] == '*' && inp.s[1] != '=')
1.70 rillig 340: goto maybe;
1.78 rillig 341: if (isalpha((unsigned char)*inp.s))
1.70 rillig 342: goto maybe;
343: return false;
344: maybe:
345: return state->last_token == semicolon ||
1.71 rillig 346: state->last_token == lbrace ||
347: state->last_token == rbrace;
1.57 rillig 348: }
349:
1.84 rillig 350: static int
351: bsearch_typenames(const char *key)
352: {
353: const char **arr = typenames.items;
354: int lo = 0;
355: int hi = (int)typenames.len - 1;
356:
357: while (lo <= hi) {
358: int mid = (int)((unsigned)(lo + hi) >> 1);
359: int cmp = strcmp(arr[mid], key);
360: if (cmp < 0)
361: lo = mid + 1;
362: else if (cmp > 0)
363: hi = mid - 1;
364: else
365: return mid;
366: }
367: return -(lo + 1);
368: }
369:
1.63 rillig 370: static bool
371: is_typename(void)
372: {
1.84 rillig 373: if (opt.auto_typedefs &&
374: token.e - token.s >= 2 && memcmp(token.e - 2, "_t", 2) == 0)
375: return true;
1.63 rillig 376:
1.84 rillig 377: return bsearch_typenames(token.s) >= 0;
1.63 rillig 378: }
379:
1.90 rillig 380: /* Read an alphanumeric token into 'token', or return end_of_file. */
1.89 rillig 381: static token_type
382: lexi_alnum(struct parser_state *state)
1.1 cgd 383: {
1.89 rillig 384: if (!(isalnum((unsigned char)*inp.s) ||
385: *inp.s == '_' || *inp.s == '$' ||
386: (inp.s[0] == '.' && isdigit((unsigned char)inp.s[1]))))
1.90 rillig 387: return end_of_file; /* just as a placeholder */
1.89 rillig 388:
389: if (isdigit((unsigned char)*inp.s) ||
390: (inp.s[0] == '.' && isdigit((unsigned char)inp.s[1]))) {
391: lex_number();
392: } else {
393: lex_word();
394: }
395: *token.e = '\0';
1.16 kamil 396:
1.89 rillig 397: if (token.s[0] == 'L' && token.s[1] == '\0' &&
398: (*inp.s == '"' || *inp.s == '\''))
399: return string_prefix;
1.16 kamil 400:
1.89 rillig 401: while (is_hspace(inbuf_peek()))
1.32 rillig 402: inbuf_skip();
1.89 rillig 403: state->keyword = kw_0;
404:
405: if (state->last_token == keyword_struct_union_enum &&
406: state->p_l_follow == 0) {
407: state->last_u_d = true;
408: return decl;
1.16 kamil 409: }
1.6 lukem 410:
1.89 rillig 411: /* Operator after identifier is binary unless last token was 'struct'. */
412: state->last_u_d = (state->last_token == keyword_struct_union_enum);
1.16 kamil 413:
1.89 rillig 414: const struct keyword *kw = bsearch(token.s, keywords,
415: nitems(keywords), sizeof(keywords[0]), cmp_keyword_by_name);
416: if (kw == NULL) {
417: if (is_typename()) {
418: state->keyword = kw_type;
1.16 kamil 419: state->last_u_d = true;
1.89 rillig 420: goto found_typename;
1.16 kamil 421: }
1.89 rillig 422:
423: } else { /* we have a keyword */
424: state->keyword = kw->kind;
425: state->last_u_d = true;
426:
427: switch (kw->kind) {
428: case kw_switch:
429: return switch_expr;
430:
431: case kw_case_or_default:
432: return case_label;
433:
434: case kw_struct_or_union_or_enum:
435: case kw_type:
436: found_typename:
437: if (state->p_l_follow != 0) {
438: /* inside parens: cast, param list, offsetof or sizeof */
439: state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask;
440: }
441: if (state->last_token == period || state->last_token == unary_op) {
442: state->keyword = kw_0;
443: break;
1.16 kamil 444: }
1.89 rillig 445: if (kw != NULL && kw->kind == kw_struct_or_union_or_enum)
446: return keyword_struct_union_enum;
447: if (state->p_l_follow != 0)
448: break;
449: return decl;
1.75 rillig 450:
1.89 rillig 451: case kw_for_or_if_or_while:
452: return keyword_for_if_while;
1.75 rillig 453:
1.89 rillig 454: case kw_do_or_else:
455: return keyword_do_else;
1.16 kamil 456:
1.89 rillig 457: case kw_storage_class:
458: return storage_class;
1.16 kamil 459:
1.89 rillig 460: case kw_typedef:
461: return type_def;
1.16 kamil 462:
1.89 rillig 463: default: /* all others are treated like any other
1.16 kamil 464: * identifier */
1.89 rillig 465: return ident;
1.90 rillig 466: }
467: }
1.89 rillig 468:
469: if (*inp.s == '(' && state->tos <= 1 && state->ind_level == 0 &&
470: !state->in_parameter_declaration && !state->block_init) {
471:
472: for (const char *p = inp.s; p < inp.e;)
473: if (*p++ == ')' && (*p == ';' || *p == ','))
474: goto not_proc;
475:
476: strncpy(state->procname, token.s, sizeof state->procname - 1);
477: if (state->in_decl)
478: state->in_parameter_declaration = true;
479: return funcname;
480: not_proc:;
481:
482: } else if (probably_typename(state)) {
483: state->keyword = kw_type;
484: state->last_u_d = true;
485: return decl;
486: }
487:
488: if (state->last_token == decl) /* if this is a declared variable,
489: * then following sign is unary */
490: state->last_u_d = true; /* will make "int a -1" work */
491:
492: return ident; /* the ident is not in the list */
493: }
1.75 rillig 494:
1.89 rillig 495: /* Reads the next token, placing it in the global variable "token". */
496: token_type
497: lexi(struct parser_state *state)
498: {
1.90 rillig 499: token.e = token.s;
500: state->col_1 = state->last_nl;
1.89 rillig 501: state->last_nl = false;
1.75 rillig 502:
1.89 rillig 503: while (is_hspace(*inp.s)) {
504: state->col_1 = false;
505: inbuf_skip();
506: }
1.75 rillig 507:
1.91 ! rillig 508: token_type alnum_ttype = lexi_alnum(state);
! 509: if (alnum_ttype != end_of_file)
! 510: return lexi_end(alnum_ttype);
1.16 kamil 511:
512: /* Scan a non-alphanumeric token */
513:
1.90 rillig 514: check_size_token(3); /* for things like "<<=" */
515: *token.e++ = inbuf_next();
1.50 rillig 516: *token.e = '\0';
1.16 kamil 517:
1.91 ! rillig 518: token_type ttype;
1.89 rillig 519: bool unary_delim = false; /* whether the current token forces a
520: * following operator to be unary */
521:
1.50 rillig 522: switch (*token.s) {
1.16 kamil 523: case '\n':
524: unary_delim = state->last_u_d;
525: state->last_nl = true; /* remember that we just had a newline */
1.47 rillig 526: /* if data has been exhausted, the newline is a dummy. */
527: ttype = had_eof ? end_of_file : newline;
1.16 kamil 528: break;
529:
1.43 rillig 530: case '\'':
531: case '"':
1.44 rillig 532: lex_char_or_string();
1.47 rillig 533: ttype = ident;
1.16 kamil 534: break;
1.6 lukem 535:
1.40 rillig 536: case '(':
537: case '[':
1.16 kamil 538: unary_delim = true;
1.79 rillig 539: ttype = lparen_or_lbracket;
1.16 kamil 540: break;
541:
1.40 rillig 542: case ')':
543: case ']':
1.79 rillig 544: ttype = rparen_or_rbracket;
1.16 kamil 545: break;
546:
547: case '#':
548: unary_delim = state->last_u_d;
1.47 rillig 549: ttype = preprocessing;
1.16 kamil 550: break;
551:
552: case '?':
553: unary_delim = true;
1.47 rillig 554: ttype = question;
1.16 kamil 555: break;
556:
1.40 rillig 557: case ':':
1.47 rillig 558: ttype = colon;
1.16 kamil 559: unary_delim = true;
560: break;
561:
1.40 rillig 562: case ';':
1.16 kamil 563: unary_delim = true;
1.47 rillig 564: ttype = semicolon;
1.16 kamil 565: break;
566:
1.40 rillig 567: case '{':
1.16 kamil 568: unary_delim = true;
1.47 rillig 569: ttype = lbrace;
1.16 kamil 570: break;
571:
1.40 rillig 572: case '}':
1.16 kamil 573: unary_delim = true;
1.47 rillig 574: ttype = rbrace;
1.16 kamil 575: break;
576:
1.69 rillig 577: case '\f':
1.16 kamil 578: unary_delim = state->last_u_d;
1.74 rillig 579: state->last_nl = true; /* remember this, so we can set 'state->col_1'
1.16 kamil 580: * right */
1.47 rillig 581: ttype = form_feed;
1.16 kamil 582: break;
583:
1.40 rillig 584: case ',':
1.16 kamil 585: unary_delim = true;
1.47 rillig 586: ttype = comma;
1.16 kamil 587: break;
588:
589: case '.':
590: unary_delim = false;
1.47 rillig 591: ttype = period;
1.16 kamil 592: break;
1.1 cgd 593:
1.16 kamil 594: case '-':
1.90 rillig 595: case '+':
1.47 rillig 596: ttype = state->last_u_d ? unary_op : binary_op;
1.16 kamil 597: unary_delim = true;
598:
1.90 rillig 599: if (*inp.s == token.s[0]) { /* ++, -- */
1.78 rillig 600: *token.e++ = *inp.s++;
1.79 rillig 601: if (state->last_token == ident ||
602: state->last_token == rparen_or_rbracket) {
1.47 rillig 603: ttype = state->last_u_d ? unary_op : postfix_op;
1.1 cgd 604: unary_delim = false;
1.16 kamil 605: }
1.75 rillig 606:
1.90 rillig 607: } else if (*inp.s == '=') { /* += */
1.78 rillig 608: *token.e++ = *inp.s++;
1.75 rillig 609:
1.90 rillig 610: } else if (*inp.s == '>') { /* -> */
1.78 rillig 611: *token.e++ = *inp.s++;
1.16 kamil 612: unary_delim = false;
1.47 rillig 613: ttype = unary_op;
1.16 kamil 614: state->want_blank = false;
615: }
1.90 rillig 616: break;
1.16 kamil 617:
618: case '=':
1.86 rillig 619: if (state->init_or_struct)
1.54 rillig 620: state->block_init = true;
1.78 rillig 621: if (*inp.s == '=') { /* == */
622: *token.e++ = *inp.s++;
1.67 rillig 623: *token.e = '\0';
1.16 kamil 624: }
1.47 rillig 625: ttype = binary_op;
1.16 kamil 626: unary_delim = true;
627: break;
628:
629: case '>':
630: case '<':
631: case '!': /* ops like <, <<, <=, !=, etc */
1.78 rillig 632: if (*inp.s == '>' || *inp.s == '<' || *inp.s == '=')
1.50 rillig 633: *token.e++ = inbuf_next();
1.78 rillig 634: if (*inp.s == '=')
635: *token.e++ = *inp.s++;
1.47 rillig 636: ttype = state->last_u_d ? unary_op : binary_op;
1.16 kamil 637: unary_delim = true;
638: break;
639:
640: case '*':
641: unary_delim = true;
642: if (!state->last_u_d) {
1.78 rillig 643: if (*inp.s == '=')
644: *token.e++ = *inp.s++;
1.47 rillig 645: ttype = binary_op;
1.16 kamil 646: break;
647: }
1.75 rillig 648:
1.78 rillig 649: while (*inp.s == '*' || isspace((unsigned char)*inp.s)) {
1.87 rillig 650: if (*inp.s == '*')
651: token_add_char('*');
1.32 rillig 652: inbuf_skip();
1.16 kamil 653: }
1.75 rillig 654:
1.16 kamil 655: if (ps.in_decl) {
1.78 rillig 656: char *tp = inp.s;
1.6 lukem 657:
1.16 kamil 658: while (isalpha((unsigned char)*tp) ||
659: isspace((unsigned char)*tp)) {
1.78 rillig 660: if (++tp >= inp.e)
1.81 rillig 661: inbuf_read_line();
1.16 kamil 662: }
663: if (*tp == '(')
664: ps.procname[0] = ' ';
665: }
1.75 rillig 666:
1.47 rillig 667: ttype = unary_op;
1.16 kamil 668: break;
1.1 cgd 669:
1.16 kamil 670: default:
1.78 rillig 671: if (token.s[0] == '/' && (*inp.s == '*' || *inp.s == '/')) {
1.16 kamil 672: /* it is start of comment */
1.50 rillig 673: *token.e++ = inbuf_next();
1.1 cgd 674:
1.47 rillig 675: ttype = comment;
1.16 kamil 676: unary_delim = state->last_u_d;
677: break;
1.1 cgd 678: }
1.75 rillig 679:
1.78 rillig 680: while (token.e[-1] == *inp.s || *inp.s == '=') {
1.87 rillig 681: /* handle '||', '&&', etc., and also things as in 'int *****i' */
682: token_add_char(inbuf_next());
1.16 kamil 683: }
1.75 rillig 684:
1.47 rillig 685: ttype = state->last_u_d ? unary_op : binary_op;
1.16 kamil 686: unary_delim = true;
1.47 rillig 687: }
1.16 kamil 688:
1.78 rillig 689: if (inp.s >= inp.e) /* check for input buffer empty */
1.81 rillig 690: inbuf_read_line();
1.75 rillig 691:
1.16 kamil 692: state->last_u_d = unary_delim;
1.75 rillig 693:
1.25 rillig 694: check_size_token(1);
1.50 rillig 695: *token.e = '\0';
1.75 rillig 696:
1.47 rillig 697: return lexi_end(ttype);
1.1 cgd 698: }
1.16 kamil 699:
1.6 lukem 700: void
1.64 rillig 701: add_typename(const char *name)
1.1 cgd 702: {
1.64 rillig 703: if (typenames.len >= typenames.cap) {
704: typenames.cap = 16 + 2 * typenames.cap;
705: typenames.items = xrealloc(typenames.items,
706: sizeof(typenames.items[0]) * typenames.cap);
707: }
1.16 kamil 708:
1.84 rillig 709: int pos = bsearch_typenames(name);
1.64 rillig 710: if (pos >= 0)
711: return; /* already in the list */
1.75 rillig 712:
1.64 rillig 713: pos = -(pos + 1);
714: memmove(typenames.items + pos + 1, typenames.items + pos,
1.73 rillig 715: sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos));
1.64 rillig 716: typenames.items[pos] = xstrdup(name);
1.1 cgd 717: }
CVSweb <webmaster@jp.NetBSD.org>