Annotation of src/usr.bin/xlint/lint1/lex.c, Revision 1.21
1.21 ! rillig 1: /* $NetBSD: lex.c,v 1.20 2021/03/27 12:42:22 rillig Exp $ */
1.1 rillig 2:
3: /*
4: * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
5: * Copyright (c) 1994, 1995 Jochen Pohl
6: * All Rights Reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed by Jochen Pohl for
19: * The NetBSD Project.
20: * 4. The name of the author may not be used to endorse or promote products
21: * derived from this software without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33: */
34:
1.4 christos 35: #if HAVE_NBTOOL_CONFIG_H
36: #include "nbtool_config.h"
37: #endif
38:
1.1 rillig 39: #include <sys/cdefs.h>
40: #if defined(__RCSID) && !defined(lint)
1.21 ! rillig 41: __RCSID("$NetBSD: lex.c,v 1.20 2021/03/27 12:42:22 rillig Exp $");
1.1 rillig 42: #endif
43:
44: #include <ctype.h>
45: #include <errno.h>
46: #include <float.h>
47: #include <limits.h>
48: #include <math.h>
49: #include <stdlib.h>
50: #include <string.h>
51:
52: #include "lint1.h"
53: #include "cgram.h"
54:
55: #define CHAR_MASK ((int)(~(~0U << CHAR_SIZE)))
56:
57:
58: /* Current position (it's also updated when an included file is parsed) */
59: pos_t curr_pos = { 1, "", 0 };
60:
61: /*
62: * Current position in C source (not updated when an included file is
63: * parsed).
64: */
65: pos_t csrc_pos = { 1, "", 0 };
66:
67: /* Are we parsing a gcc attribute? */
68: bool attron;
69:
70: bool in_system_header = false;
71:
72: static sbuf_t *allocsb(void);
73: static void freesb(sbuf_t *);
74: static int inpc(void);
75: static int hash(const char *);
76: static sym_t * search(sbuf_t *);
77: static int keyw(sym_t *);
1.6 rillig 78: static int get_escaped_char(int);
1.1 rillig 79:
80: void
1.6 rillig 81: lex_next_line(void)
1.1 rillig 82: {
83: curr_pos.p_line++;
84: curr_pos.p_uniq = 0;
85: #ifdef DEBUG
86: printf("parsing %s:%d\n", curr_pos.p_file, curr_pos.p_line);
87: #endif
88: if (curr_pos.p_file == csrc_pos.p_file) {
89: csrc_pos.p_line++;
90: csrc_pos.p_uniq = 0;
91: }
92: }
93:
94: void
1.6 rillig 95: lex_unknown_character(int c)
1.1 rillig 96: {
97:
98: /* unknown character \%o */
99: error(250, c);
100: }
101:
1.13 rillig 102: #define kwdef(name, token, scl, tspec, tqual, c89, c99, gcc, attr, deco) \
103: { \
104: name, token, scl, tspec, tqual, \
105: (c89) > 0, (c99) > 0, (gcc) > 0, (attr) > 0, deco, \
106: }
107: #define kwdef_token(name, token, c89, c99, gcc, attr, deco) \
108: kwdef(name, token, 0, 0, 0, c89, c99, gcc, attr, deco)
109: #define kwdef_sclass(name, sclass, c89, c99, gcc, attr, deco) \
110: kwdef(name, T_SCLASS, sclass, 0, 0, c89, c99, gcc, attr, deco)
111: #define kwdef_type(name, tspec, c89, c99, gcc, attr, deco) \
112: kwdef(name, T_TYPE, 0, tspec, 0, c89, c99, gcc, attr, deco)
113: #define kwdef_tqual(name, tqual, c89, c99, gcc, attr, deco) \
114: kwdef(name, T_QUAL, 0, 0, tqual, c89, c99, gcc, attr, deco)
115:
1.1 rillig 116: /*
117: * Keywords.
1.9 rillig 118: * During initialization they are written to the symbol table.
1.1 rillig 119: */
120: static struct kwtab {
121: const char *kw_name; /* keyword */
122: int kw_token; /* token returned by yylex() */
123: scl_t kw_scl; /* storage class if kw_token T_SCLASS */
124: tspec_t kw_tspec; /* type spec. if kw_token
125: * T_TYPE or T_STRUCT_OR_UNION */
1.13 rillig 126: tqual_t kw_tqual; /* type qual. if kw_token T_QUAL */
1.1 rillig 127: bool kw_c89 : 1; /* C89 keyword */
128: bool kw_c99 : 1; /* C99 keyword */
129: bool kw_gcc : 1; /* GCC keyword */
130: bool kw_attr : 1; /* GCC attribute, keyword */
131: u_int kw_deco : 3; /* 1 = name, 2 = __name, 4 = __name__ */
132: } kwtab[] = {
133: #ifdef INT128_SIZE
1.13 rillig 134: kwdef_type( "__int128_t", INT128, 0,1,0,0,1),
135: kwdef_type( "__uint128_t", UINT128, 0,1,0,0,1),
1.1 rillig 136: #endif
1.13 rillig 137: kwdef_tqual( "__thread", THREAD, 0,0,1,0,1),
1.16 christos 138: kwdef_token( "_Alignas", T_ALIGNAS, 0,0,0,0,1),
1.13 rillig 139: kwdef_token( "_Alignof", T_ALIGNOF, 0,0,0,0,1),
140: kwdef_type( "_Bool", BOOL, 0,1,0,0,1),
141: kwdef_type( "_Complex", COMPLEX, 0,1,0,0,1),
142: kwdef_token( "_Generic", T_GENERIC, 0,1,0,0,1),
143: kwdef_token( "_Noreturn", T_NORETURN, 0,1,0,0,1),
144: kwdef_tqual( "_Thread_local", THREAD, 0,1,0,0,1),
145: kwdef_token( "alias", T_AT_ALIAS, 0,0,1,1,5),
146: kwdef_token( "aligned", T_AT_ALIGNED, 0,0,1,1,5),
147: kwdef_token( "alignof", T_ALIGNOF, 0,0,0,0,4),
148: kwdef_token( "alloc_size", T_AT_ALLOC_SIZE, 0,0,1,1,5),
149: kwdef_token( "always_inline", T_AT_ALWAYS_INLINE, 0,0,1,1,5),
150: kwdef_token( "asm", T_ASM, 0,0,1,0,7),
151: kwdef_token( "attribute", T_ATTRIBUTE, 0,0,1,0,6),
152: kwdef_sclass( "auto", AUTO, 0,0,0,0,1),
153: kwdef_token( "bounded", T_AT_BOUNDED, 0,0,1,1,5),
154: kwdef_token( "break", T_BREAK, 0,0,0,0,1),
155: kwdef_token( "buffer", T_AT_BUFFER, 0,0,1,1,5),
156: kwdef_token( "builtin_offsetof", T_BUILTIN_OFFSETOF, 0,0,1,0,2),
157: kwdef_token( "case", T_CASE, 0,0,0,0,1),
158: kwdef_type( "char", CHAR, 0,0,0,0,1),
159: kwdef_token( "cold", T_AT_COLD, 0,0,1,1,5),
160: kwdef_tqual( "const", CONST, 1,0,0,0,7),
161: kwdef_token( "constructor", T_AT_CONSTRUCTOR, 0,0,1,1,5),
162: kwdef_token( "continue", T_CONTINUE, 0,0,0,0,1),
163: kwdef_token( "default", T_DEFAULT, 0,0,0,0,1),
164: kwdef_token( "deprecated", T_AT_DEPRECATED, 0,0,1,1,5),
165: kwdef_token( "destructor", T_AT_DESTRUCTOR, 0,0,1,1,5),
166: kwdef_token( "do", T_DO, 0,0,0,0,1),
167: kwdef_type( "double", DOUBLE, 0,0,0,0,1),
168: kwdef_token( "else", T_ELSE, 0,0,0,0,1),
169: kwdef_token( "enum", T_ENUM, 0,0,0,0,1),
170: kwdef_token( "extension", T_EXTENSION, 0,0,1,0,4),
171: kwdef_sclass( "extern", EXTERN, 0,0,0,0,1),
172: kwdef_type( "float", FLOAT, 0,0,0,0,1),
173: kwdef_token( "for", T_FOR, 0,0,0,0,1),
174: kwdef_token( "format", T_AT_FORMAT, 0,0,1,1,5),
175: kwdef_token( "format_arg", T_AT_FORMAT_ARG, 0,0,1,1,5),
176: kwdef_token( "gnu_inline", T_AT_GNU_INLINE, 0,0,1,1,5),
177: kwdef_token( "gnu_printf", T_AT_FORMAT_GNU_PRINTF, 0,0,1,1,5),
178: kwdef_token( "goto", T_GOTO, 0,0,0,0,1),
179: kwdef_token( "if", T_IF, 0,0,0,0,1),
180: kwdef_token( "imag", T_IMAG, 0,1,0,0,4),
181: kwdef_sclass( "inline", INLINE, 0,1,0,0,7),
182: kwdef_type( "int", INT, 0,0,0,0,1),
183: kwdef_type( "long", LONG, 0,0,0,0,1),
184: kwdef_token( "malloc", T_AT_MALLOC, 0,0,1,1,5),
185: kwdef_token( "may_alias", T_AT_MAY_ALIAS, 0,0,1,1,5),
186: kwdef_token( "minbytes", T_AT_MINBYTES, 0,0,1,1,5),
187: kwdef_token( "mode", T_AT_MODE, 0,0,1,1,5),
188: kwdef_token( "no_instrument_function",
189: T_AT_NO_INSTRUMENT_FUNCTION, 0,0,1,1,5),
190: kwdef_token( "nonnull", T_AT_NONNULL, 0,0,1,1,5),
191: kwdef_token( "noinline", T_AT_NOINLINE, 0,0,1,1,5),
192: kwdef_token( "noreturn", T_AT_NORETURN, 0,0,1,1,5),
193: kwdef_token( "nothrow", T_AT_NOTHROW, 0,0,1,1,5),
194: kwdef_token( "optimize", T_AT_OPTIMIZE, 0,0,1,1,5),
195: kwdef_token( "packed", T_AT_PACKED, 0,0,1,1,5),
196: kwdef_token( "packed", T_PACKED, 0,0,0,0,2),
197: kwdef_token( "pcs", T_AT_PCS, 0,0,0,0,5),
198: kwdef_token( "printf", T_AT_FORMAT_PRINTF, 0,0,1,1,5),
199: kwdef_token( "pure", T_AT_PURE, 0,0,1,1,5),
200: kwdef_token( "real", T_REAL, 0,1,0,0,4),
201: kwdef_sclass( "register", REG, 0,0,0,0,1),
202: kwdef_tqual( "restrict", RESTRICT, 0,1,0,0,5),
203: kwdef_token( "return", T_RETURN, 0,0,0,0,1),
204: kwdef_token( "returns_twice", T_AT_RETURNS_TWICE, 0,0,1,1,5),
205: kwdef_token( "scanf", T_AT_FORMAT_SCANF, 0,0,1,1,5),
206: kwdef_token( "section", T_AT_SECTION, 0,0,1,1,7),
207: kwdef_token( "sentinel", T_AT_SENTINEL, 0,0,1,1,5),
208: kwdef_type( "short", SHORT, 0,0,0,0,1),
209: kwdef_type( "signed", SIGNED, 1,0,0,0,3),
210: kwdef_token( "sizeof", T_SIZEOF, 0,0,0,0,1),
211: kwdef_sclass( "static", STATIC, 0,0,0,0,1),
212: kwdef_token( "strfmon", T_AT_FORMAT_STRFMON, 0,0,1,1,5),
213: kwdef_token( "strftime", T_AT_FORMAT_STRFTIME, 0,0,1,1,5),
214: kwdef_token( "string", T_AT_STRING, 0,0,1,1,5),
215: kwdef("struct", T_STRUCT_OR_UNION, 0, STRUCT, 0, 0,0,0,0,1),
216: kwdef_token( "switch", T_SWITCH, 0,0,0,0,1),
217: kwdef_token( "symbolrename", T_SYMBOLRENAME, 0,0,0,0,2),
218: kwdef_token( "syslog", T_AT_FORMAT_SYSLOG, 0,0,1,1,5),
219: kwdef_token( "transparent_union", T_AT_TUNION, 0,0,1,1,5),
220: kwdef_token( "tls_model", T_AT_TLS_MODEL, 0,0,1,1,5),
221: kwdef_sclass( "typedef", TYPEDEF, 0,0,0,0,1),
222: kwdef_token( "typeof", T_TYPEOF, 0,0,1,0,7),
223: kwdef("union", T_STRUCT_OR_UNION, 0, UNION, 0, 0,0,0,0,1),
224: kwdef_type( "unsigned", UNSIGN, 0,0,0,0,1),
225: kwdef_token( "unused", T_AT_UNUSED, 0,0,1,1,5),
226: kwdef_token( "used", T_AT_USED, 0,0,1,1,5),
227: kwdef_token( "visibility", T_AT_VISIBILITY, 0,0,1,1,5),
228: kwdef_type( "void", VOID, 0,0,0,0,1),
229: kwdef_tqual( "volatile", VOLATILE, 1,0,0,0,7),
230: kwdef_token("warn_unused_result", T_AT_WARN_UNUSED_RESULT, 0,0,1,1,5),
231: kwdef_token( "weak", T_AT_WEAK, 0,0,1,1,5),
232: kwdef_token( "while", T_WHILE, 0,0,0,0,1),
233: kwdef(NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0),
234: #undef kwdef
235: #undef kwdef_token
236: #undef kwdef_sclass
237: #undef kwdef_type
238: #undef kwdef_tqual
1.1 rillig 239: };
240:
241: /* Symbol table */
242: static sym_t *symtab[HSHSIZ1];
243:
244: /* bit i of the entry with index i is set */
245: uint64_t qbmasks[64];
246:
247: /* least significant i bits are set in the entry with index i */
248: uint64_t qlmasks[64 + 1];
249:
250: /* least significant i bits are not set in the entry with index i */
251: uint64_t qumasks[64 + 1];
252:
253: /* free list for sbuf structures */
254: static sbuf_t *sbfrlst;
255:
256: /* type of next expected symbol */
257: symt_t symtyp;
258:
259:
260: static void
261: add_keyword(struct kwtab *kw, int deco)
262: {
263: sym_t *sym;
264: size_t h;
265: char buf[256];
266: const char *name;
267:
1.14 rillig 268: if ((kw->kw_deco & deco) == 0)
1.1 rillig 269: return;
270:
271: switch (deco) {
272: case 1:
273: name = kw->kw_name;
274: break;
275: case 2:
1.17 rillig 276: snprintf(buf, sizeof buf, "__%s", kw->kw_name);
1.1 rillig 277: name = strdup(buf);
278: break;
279: default:
280: lint_assert(deco == 4);
1.17 rillig 281: snprintf(buf, sizeof buf, "__%s__", kw->kw_name);
1.1 rillig 282: name = strdup(buf);
283: break;
284: }
285:
286: if (name == NULL)
287: err(1, "Can't init symbol table");
288:
1.17 rillig 289: sym = getblk(sizeof *sym);
1.1 rillig 290: sym->s_name = name;
291: sym->s_keyword = kw;
292: sym->s_value.v_quad = kw->kw_token;
293: if (kw->kw_token == T_TYPE || kw->kw_token == T_STRUCT_OR_UNION) {
294: sym->s_tspec = kw->kw_tspec;
295: } else if (kw->kw_token == T_SCLASS) {
296: sym->s_scl = kw->kw_scl;
297: } else if (kw->kw_token == T_QUAL) {
298: sym->s_tqual = kw->kw_tqual;
299: }
300: h = hash(sym->s_name);
301: if ((sym->s_link = symtab[h]) != NULL)
302: symtab[h]->s_rlink = &sym->s_link;
303: sym->s_rlink = &symtab[h];
304: symtab[h] = sym;
305: }
306:
307: /*
308: * All keywords are written to the symbol table. This saves us looking
309: * in a extra table for each name we found.
310: */
311: void
312: initscan(void)
313: {
314: struct kwtab *kw;
315: size_t i;
316: uint64_t uq;
317:
318: for (kw = kwtab; kw->kw_name != NULL; kw++) {
319: if ((kw->kw_c89 || kw->kw_c99) && tflag)
320: continue;
321: if (kw->kw_c99 && !(Sflag || gflag))
322: continue;
323: if (kw->kw_gcc && !gflag)
324: continue;
325: add_keyword(kw, 1);
326: add_keyword(kw, 2);
327: add_keyword(kw, 4);
328: }
329:
330: /* initialize bit-masks for quads */
331: for (i = 0; i < 64; i++) {
332: qbmasks[i] = (uint64_t)1 << i;
333: uq = ~(uint64_t)0 << i;
334: qumasks[i] = uq;
335: qlmasks[i] = ~uq;
336: }
337: qumasks[i] = 0;
338: qlmasks[i] = ~(uint64_t)0;
339: }
340:
341: /*
342: * Get a free sbuf structure, if possible from the free list
343: */
344: static sbuf_t *
345: allocsb(void)
346: {
347: sbuf_t *sb;
348:
349: if ((sb = sbfrlst) != NULL) {
350: sbfrlst = sb->sb_next;
351: #ifdef BLKDEBUG
1.17 rillig 352: (void)memset(sb, 0, sizeof *sb);
1.1 rillig 353: #else
354: sb->sb_next = NULL;
355: #endif
356: } else {
1.17 rillig 357: sb = xmalloc(sizeof *sb);
358: (void)memset(sb, 0, sizeof *sb);
1.1 rillig 359: }
360: return sb;
361: }
362:
363: /*
364: * Put a sbuf structure to the free list
365: */
366: static void
367: freesb(sbuf_t *sb)
368: {
369:
1.17 rillig 370: (void)memset(sb, ZERO, sizeof *sb);
1.1 rillig 371: sb->sb_next = sbfrlst;
372: sbfrlst = sb;
373: }
374:
375: /*
376: * Read a character and ensure that it is positive (except EOF).
377: * Increment line count(s) if necessary.
378: */
379: static int
380: inpc(void)
381: {
382: int c;
383:
384: if ((c = lex_input()) != EOF && (c &= CHAR_MASK) == '\n')
1.6 rillig 385: lex_next_line();
1.1 rillig 386: return c;
387: }
388:
389: static int
390: hash(const char *s)
391: {
392: u_int v;
393: const u_char *us;
394:
395: v = 0;
396: for (us = (const u_char *)s; *us != '\0'; us++) {
1.17 rillig 397: v = (v << sizeof v) + *us;
398: v ^= v >> (sizeof v * CHAR_BIT - sizeof v);
1.1 rillig 399: }
400: return v % HSHSIZ1;
401: }
402:
403: /*
404: * Lex has found a letter followed by zero or more letters or digits.
405: * It looks for a symbol in the symbol table with the same name. This
406: * symbol must either be a keyword or a symbol of the type required by
407: * symtyp (label, member, tag, ...).
408: *
409: * If it is a keyword, the token is returned. In some cases it is described
410: * more deeply by data written to yylval.
411: *
412: * If it is a symbol, T_NAME is returned and the pointer to a sbuf struct
413: * is stored in yylval. This struct contains the name of the symbol, its
414: * length and hash value. If there is already a symbol of the same name
415: * and type in the symbol table, the sbuf struct also contains a pointer
416: * to the symbol table entry.
417: */
418: extern int
419: lex_name(const char *yytext, size_t yyleng)
420: {
421: char *s;
422: sbuf_t *sb;
423: sym_t *sym;
424: int tok;
425:
426: sb = allocsb();
427: sb->sb_name = yytext;
428: sb->sb_len = yyleng;
429: sb->sb_hash = hash(yytext);
1.3 rillig 430: if ((sym = search(sb)) != NULL && sym->s_keyword != NULL) {
1.1 rillig 431: freesb(sb);
432: return keyw(sym);
433: }
434:
435: sb->sb_sym = sym;
436:
437: if (sym != NULL) {
1.12 rillig 438: lint_assert(block_level >= sym->s_block_level);
1.1 rillig 439: sb->sb_name = sym->s_name;
440: sb->sb_len = strlen(sym->s_name);
441: tok = sym->s_scl == TYPEDEF ? T_TYPENAME : T_NAME;
442: } else {
443: s = getblk(yyleng + 1);
444: (void)memcpy(s, yytext, yyleng + 1);
445: sb->sb_name = s;
446: sb->sb_len = yyleng;
447: tok = T_NAME;
448: }
449:
450: yylval.y_sb = sb;
451: return tok;
452: }
453:
454: static sym_t *
455: search(sbuf_t *sb)
456: {
457: sym_t *sym;
458:
459: for (sym = symtab[sb->sb_hash]; sym != NULL; sym = sym->s_link) {
460: if (strcmp(sym->s_name, sb->sb_name) == 0) {
1.3 rillig 461: if (sym->s_keyword != NULL) {
1.1 rillig 462: struct kwtab *kw = sym->s_keyword;
463: if (!kw->kw_attr || attron)
464: return sym;
465: } else if (!attron && sym->s_kind == symtyp)
466: return sym;
467: }
468: }
469:
470: return NULL;
471: }
472:
473: static int
474: keyw(sym_t *sym)
475: {
476: int t;
477:
478: if ((t = (int)sym->s_value.v_quad) == T_SCLASS) {
479: yylval.y_scl = sym->s_scl;
480: } else if (t == T_TYPE || t == T_STRUCT_OR_UNION) {
481: yylval.y_tspec = sym->s_tspec;
482: } else if (t == T_QUAL) {
483: yylval.y_tqual = sym->s_tqual;
484: }
485: return t;
486: }
487:
488: /*
489: * Convert a string representing an integer into internal representation.
490: * The value is returned in yylval. icon() (and yylex()) returns T_CON.
491: */
492: int
1.6 rillig 493: lex_integer_constant(const char *yytext, size_t yyleng, int base)
1.1 rillig 494: {
495: int l_suffix, u_suffix;
496: int len;
497: const char *cp;
498: char c, *eptr;
499: tspec_t typ;
500: bool ansiu;
1.11 rillig 501: bool warned = false;
1.1 rillig 502: #ifdef TARG_INT128_MAX
503: __uint128_t uq = 0;
504: static tspec_t contypes[2][4] = {
505: { INT, LONG, QUAD, INT128, },
506: { UINT, ULONG, UQUAD, UINT128, }
507: };
508: #else
509: uint64_t uq = 0;
510: static tspec_t contypes[2][3] = {
511: { INT, LONG, QUAD, },
512: { UINT, ULONG, UQUAD, }
513: };
514: #endif
515:
516: cp = yytext;
517: len = yyleng;
518:
519: /* skip 0[xX] or 0[bB] */
520: if (base == 16 || base == 2) {
521: cp += 2;
522: len -= 2;
523: }
524:
525: /* read suffixes */
526: l_suffix = u_suffix = 0;
527: for (;;) {
528: if ((c = cp[len - 1]) == 'l' || c == 'L') {
529: l_suffix++;
530: } else if (c == 'u' || c == 'U') {
531: u_suffix++;
532: } else {
533: break;
534: }
535: len--;
536: }
537: if (l_suffix > 2 || u_suffix > 1) {
538: /* malformed integer constant */
539: warning(251);
540: if (l_suffix > 2)
541: l_suffix = 2;
542: if (u_suffix > 1)
543: u_suffix = 1;
544: }
545: if (tflag && u_suffix != 0) {
546: /* suffix U is illegal in traditional C */
547: warning(97);
548: }
549: typ = contypes[u_suffix][l_suffix];
550:
551: errno = 0;
552:
553: uq = strtouq(cp, &eptr, base);
554: lint_assert(eptr == cp + len);
1.11 rillig 555: if (errno != 0) {
1.1 rillig 556: /* integer constant out of range */
557: warning(252);
1.11 rillig 558: warned = true;
559: }
1.1 rillig 560:
561: /*
562: * If the value is too big for the current type, we must choose
563: * another type.
564: */
565: ansiu = false;
566: switch (typ) {
567: case INT:
568: if (uq <= TARG_INT_MAX) {
569: /* ok */
570: } else if (uq <= TARG_UINT_MAX && base != 10) {
571: typ = UINT;
572: } else if (uq <= TARG_LONG_MAX) {
573: typ = LONG;
574: } else {
575: typ = ULONG;
1.11 rillig 576: if (uq > TARG_ULONG_MAX && !warned) {
1.1 rillig 577: /* integer constant out of range */
578: warning(252);
579: }
580: }
581: if (typ == UINT || typ == ULONG) {
582: if (tflag) {
583: typ = LONG;
584: } else if (!sflag) {
585: /*
586: * Remember that the constant is unsigned
587: * only in ANSI C
588: */
589: ansiu = true;
590: }
591: }
592: break;
593: case UINT:
594: if (uq > TARG_UINT_MAX) {
595: typ = ULONG;
1.11 rillig 596: if (uq > TARG_ULONG_MAX && !warned) {
1.1 rillig 597: /* integer constant out of range */
598: warning(252);
599: }
600: }
601: break;
602: case LONG:
603: if (uq > TARG_LONG_MAX && !tflag) {
604: typ = ULONG;
605: if (!sflag)
606: ansiu = true;
1.11 rillig 607: if (uq > TARG_ULONG_MAX && !warned) {
1.1 rillig 608: /* integer constant out of range */
609: warning(252);
610: }
611: }
612: break;
613: case ULONG:
1.11 rillig 614: if (uq > TARG_ULONG_MAX && !warned) {
1.1 rillig 615: /* integer constant out of range */
616: warning(252);
617: }
618: break;
619: case QUAD:
620: if (uq > TARG_QUAD_MAX && !tflag) {
621: typ = UQUAD;
622: if (!sflag)
623: ansiu = true;
624: }
625: break;
626: case UQUAD:
1.11 rillig 627: if (uq > TARG_UQUAD_MAX && !warned) {
1.1 rillig 628: /* integer constant out of range */
629: warning(252);
630: }
631: break;
632: #ifdef INT128_SIZE
633: case INT128:
634: #ifdef TARG_INT128_MAX
635: if (uq > TARG_INT128_MAX && !tflag) {
636: typ = UINT128;
637: if (!sflag)
638: ansiu = true;
639: }
640: #endif
641: break;
642: case UINT128:
643: #ifdef TARG_INT128_MAX
1.11 rillig 644: if (uq > TARG_UINT128_MAX && !warned) {
1.1 rillig 645: /* integer constant out of range */
646: warning(252);
647: }
648: #endif
649: break;
650: #endif
651: /* LINTED206: (enumeration values not handled in switch) */
652: case STRUCT:
653: case VOID:
654: case LDOUBLE:
655: case FUNC:
656: case ARRAY:
657: case PTR:
658: case ENUM:
659: case UNION:
660: case SIGNED:
661: case NOTSPEC:
662: case DOUBLE:
663: case FLOAT:
664: case USHORT:
665: case SHORT:
666: case UCHAR:
667: case SCHAR:
668: case CHAR:
669: case BOOL:
670: case UNSIGN:
671: case FCOMPLEX:
672: case DCOMPLEX:
673: case LCOMPLEX:
674: case COMPLEX:
675: break;
676: }
677:
678: uq = (uint64_t)xsign((int64_t)uq, typ, -1);
679:
1.17 rillig 680: yylval.y_val = xcalloc(1, sizeof *yylval.y_val);
681: yylval.y_val->v_tspec = typ;
1.1 rillig 682: yylval.y_val->v_ansiu = ansiu;
683: yylval.y_val->v_quad = (int64_t)uq;
684:
685: return T_CON;
686: }
687:
688: /*
1.3 rillig 689: * Returns whether t is a signed type and the value is negative.
1.1 rillig 690: *
691: * len is the number of significant bits. If len is -1, len is set
692: * to the width of type t.
693: */
1.3 rillig 694: static bool
1.1 rillig 695: sign(int64_t q, tspec_t t, int len)
696: {
697:
698: if (t == PTR || is_uinteger(t))
1.3 rillig 699: return false;
700: return msb(q, t, len) != 0;
1.1 rillig 701: }
702:
703: int
704: msb(int64_t q, tspec_t t, int len)
705: {
706:
707: if (len <= 0)
1.10 rillig 708: len = size_in_bits(t);
1.3 rillig 709: return (q & qbmasks[len - 1]) != 0 ? 1 : 0;
1.1 rillig 710: }
711:
712: /*
713: * Extends the sign of q.
714: */
715: int64_t
716: xsign(int64_t q, tspec_t t, int len)
717: {
718:
719: if (len <= 0)
1.10 rillig 720: len = size_in_bits(t);
1.1 rillig 721:
722: if (t == PTR || is_uinteger(t) || !sign(q, t, len)) {
723: q &= qlmasks[len];
724: } else {
725: q |= qumasks[len];
726: }
727: return q;
728: }
729:
730: /*
731: * Convert a string representing a floating point value into its integral
732: * representation. Type and value are returned in yylval. fcon()
733: * (and yylex()) returns T_CON.
734: * XXX Currently it is not possible to convert constants of type
735: * long double which are greater than DBL_MAX.
736: */
737: int
1.6 rillig 738: lex_floating_constant(const char *yytext, size_t yyleng)
1.1 rillig 739: {
740: const char *cp;
741: int len;
742: tspec_t typ;
743: char c, *eptr;
744: double d;
745: float f = 0;
746:
747: cp = yytext;
748: len = yyleng;
749:
750: if (cp[len - 1] == 'i') {
751: /* imaginary, do nothing for now */
752: len--;
753: }
754: if ((c = cp[len - 1]) == 'f' || c == 'F') {
755: typ = FLOAT;
756: len--;
757: } else if (c == 'l' || c == 'L') {
758: typ = LDOUBLE;
759: len--;
760: } else {
761: if (c == 'd' || c == 'D')
762: len--;
763: typ = DOUBLE;
764: }
765:
766: if (tflag && typ != DOUBLE) {
767: /* suffixes F and L are illegal in traditional C */
768: warning(98);
769: }
770:
771: errno = 0;
772: d = strtod(cp, &eptr);
773: if (eptr != cp + len) {
774: switch (*eptr) {
775: /*
776: * XXX: non-native non-current strtod() may not handle hex
777: * floats, ignore the rest if we find traces of hex float
778: * syntax...
779: */
780: case 'p':
781: case 'P':
782: case 'x':
783: case 'X':
784: d = 0;
785: errno = 0;
786: break;
787: default:
1.20 rillig 788: INTERNAL_ERROR("fcon(%s->%s)", cp, eptr);
1.1 rillig 789: }
790: }
791: if (errno != 0)
792: /* floating-point constant out of range */
793: warning(248);
794:
795: if (typ == FLOAT) {
796: f = (float)d;
1.5 rillig 797: if (finite(f) == 0) {
1.1 rillig 798: /* floating-point constant out of range */
799: warning(248);
800: f = f > 0 ? FLT_MAX : -FLT_MAX;
801: }
802: }
803:
1.17 rillig 804: yylval.y_val = xcalloc(1, sizeof *yylval.y_val);
805: yylval.y_val->v_tspec = typ;
1.1 rillig 806: if (typ == FLOAT) {
807: yylval.y_val->v_ldbl = f;
808: } else {
809: yylval.y_val->v_ldbl = d;
810: }
811:
812: return T_CON;
813: }
814:
815: int
816: lex_operator(int t, op_t o)
817: {
818:
819: yylval.y_op = o;
820: return t;
821: }
822:
823: /*
824: * Called if lex found a leading \'.
825: */
826: int
1.6 rillig 827: lex_character_constant(void)
1.1 rillig 828: {
829: size_t n;
830: int val, c;
831: char cv;
832:
833: n = 0;
834: val = 0;
1.6 rillig 835: while ((c = get_escaped_char('\'')) >= 0) {
1.1 rillig 836: val = (val << CHAR_SIZE) + c;
837: n++;
838: }
839: if (c == -2) {
840: /* unterminated character constant */
841: error(253);
842: } else {
1.17 rillig 843: /* XXX: should rather be sizeof(TARG_INT) */
1.18 rillig 844: if (n > sizeof(int) || (n > 1 && (pflag || hflag))) {
1.1 rillig 845: /* too many characters in character constant */
846: error(71);
847: } else if (n > 1) {
848: /* multi-character character constant */
849: warning(294);
850: } else if (n == 0) {
851: /* empty character constant */
852: error(73);
853: }
854: }
855: if (n == 1) {
856: cv = (char)val;
857: val = cv;
858: }
859:
1.17 rillig 860: yylval.y_val = xcalloc(1, sizeof *yylval.y_val);
1.1 rillig 861: yylval.y_val->v_tspec = INT;
862: yylval.y_val->v_quad = val;
863:
864: return T_CON;
865: }
866:
867: /*
868: * Called if lex found a leading L\'
869: */
870: int
1.6 rillig 871: lex_wide_character_constant(void)
1.1 rillig 872: {
873: static char buf[MB_LEN_MAX + 1];
1.7 rillig 874: size_t i, imax;
1.1 rillig 875: int c;
876: wchar_t wc;
877:
1.7 rillig 878: imax = MB_CUR_MAX;
879:
1.1 rillig 880: i = 0;
1.6 rillig 881: while ((c = get_escaped_char('\'')) >= 0) {
1.7 rillig 882: if (i < imax)
1.1 rillig 883: buf[i] = (char)c;
884: i++;
885: }
886:
887: wc = 0;
888:
889: if (c == -2) {
890: /* unterminated character constant */
891: error(253);
892: } else if (c == 0) {
893: /* empty character constant */
894: error(73);
895: } else {
1.7 rillig 896: if (i > imax) {
897: i = imax;
1.1 rillig 898: /* too many characters in character constant */
899: error(71);
900: } else {
901: buf[i] = '\0';
902: (void)mbtowc(NULL, NULL, 0);
1.7 rillig 903: if (mbtowc(&wc, buf, imax) < 0)
1.1 rillig 904: /* invalid multibyte character */
905: error(291);
906: }
907: }
908:
1.17 rillig 909: yylval.y_val = xcalloc(1, sizeof *yylval.y_val);
1.1 rillig 910: yylval.y_val->v_tspec = WCHAR;
911: yylval.y_val->v_quad = wc;
912:
913: return T_CON;
914: }
915:
916: /*
917: * Read a character which is part of a character constant or of a string
918: * and handle escapes.
919: *
1.2 rillig 920: * The argument is the character which delimits the character constant or
1.1 rillig 921: * string.
922: *
923: * Returns -1 if the end of the character constant or string is reached,
924: * -2 if the EOF is reached, and the character otherwise.
925: */
926: static int
1.6 rillig 927: get_escaped_char(int delim)
1.1 rillig 928: {
929: static int pbc = -1;
930: int n, c, v;
931:
932: if (pbc == -1) {
933: c = inpc();
934: } else {
935: c = pbc;
936: pbc = -1;
937: }
1.2 rillig 938: if (c == delim)
1.1 rillig 939: return -1;
940: switch (c) {
941: case '\n':
942: if (tflag) {
943: /* newline in string or char constant */
944: error(254);
945: return -2;
946: }
947: return c;
948: case EOF:
949: return -2;
950: case '\\':
951: switch (c = inpc()) {
952: case '"':
1.2 rillig 953: if (tflag && delim == '\'')
1.1 rillig 954: /* \" inside character constants undef... */
955: warning(262);
956: return '"';
957: case '\'':
958: return '\'';
959: case '?':
960: if (tflag)
961: /* \? undefined in traditional C */
962: warning(263);
963: return '?';
964: case '\\':
965: return '\\';
966: case 'a':
967: if (tflag)
968: /* \a undefined in traditional C */
969: warning(81);
970: return '\a';
971: case 'b':
972: return '\b';
973: case 'f':
974: return '\f';
975: case 'n':
976: return '\n';
977: case 'r':
978: return '\r';
979: case 't':
980: return '\t';
981: case 'v':
982: if (tflag)
983: /* \v undefined in traditional C */
984: warning(264);
985: return '\v';
986: case '8': case '9':
987: /* bad octal digit %c */
988: warning(77, c);
989: /* FALLTHROUGH */
990: case '0': case '1': case '2': case '3':
991: case '4': case '5': case '6': case '7':
992: n = 3;
993: v = 0;
994: do {
995: v = (v << 3) + (c - '0');
996: c = inpc();
1.3 rillig 997: } while (--n > 0 && isdigit(c) && (tflag || c <= '7'));
1.1 rillig 998: if (tflag && n > 0 && isdigit(c))
999: /* bad octal digit %c */
1000: warning(77, c);
1001: pbc = c;
1002: if (v > TARG_UCHAR_MAX) {
1003: /* character escape does not fit in character */
1004: warning(76);
1005: v &= CHAR_MASK;
1006: }
1007: return v;
1008: case 'x':
1009: if (tflag)
1010: /* \x undefined in traditional C */
1011: warning(82);
1012: v = 0;
1013: n = 0;
1014: while ((c = inpc()) >= 0 && isxdigit(c)) {
1015: c = isdigit(c) ?
1016: c - '0' : toupper(c) - 'A' + 10;
1017: v = (v << 4) + c;
1018: if (n >= 0) {
1019: if ((v & ~CHAR_MASK) != 0) {
1020: /* overflow in hex escape */
1021: warning(75);
1022: n = -1;
1023: } else {
1024: n++;
1025: }
1026: }
1027: }
1028: pbc = c;
1029: if (n == 0) {
1030: /* no hex digits follow \x */
1031: error(74);
1032: } if (n == -1) {
1033: v &= CHAR_MASK;
1034: }
1035: return v;
1036: case '\n':
1.6 rillig 1037: return get_escaped_char(delim);
1.1 rillig 1038: case EOF:
1039: return -2;
1040: default:
1041: if (isprint(c)) {
1042: /* dubious escape \%c */
1043: warning(79, c);
1044: } else {
1045: /* dubious escape \%o */
1046: warning(80, c);
1047: }
1048: }
1049: }
1050: return c;
1051: }
1052:
1053: /* See https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html */
1054: static void
1055: parse_line_directive_flags(const char *p)
1056: {
1057:
1058: in_system_header = false;
1059:
1.8 rillig 1060: while (*p != '\0') {
1.1 rillig 1061: while (ch_isspace(*p))
1062: p++;
1.8 rillig 1063:
1064: const char *word_start = p;
1065: while (*p != '\0' && !ch_isspace(*p))
1066: p++;
1067: const char *word_end = p;
1068:
1069: if (word_end - word_start == 1 && word_start[0] == '3')
1070: in_system_header = true;
1.1 rillig 1071: }
1072:
1073: #if 0
1074: if (c != '\0')
1075: warning("extra character(s) after directive");
1076: #endif
1077: }
1078:
1079: /*
1080: * Called for preprocessor directives. Currently implemented are:
1081: * # lineno
1082: * # lineno "filename"
1.2 rillig 1083: * # lineno "filename" GCC-flag...
1.1 rillig 1084: */
1085: void
1086: lex_directive(const char *yytext)
1087: {
1088: const char *cp, *fn;
1089: char c, *eptr;
1090: size_t fnl;
1091: long ln;
1092: static bool first = true;
1093:
1094: /* Go to first non-whitespace after # */
1095: for (cp = yytext + 1; (c = *cp) == ' ' || c == '\t'; cp++)
1096: continue;
1097:
1098: if (!ch_isdigit(c)) {
1099: if (strncmp(cp, "pragma", 6) == 0 && ch_isspace(cp[6]))
1100: return;
1101: error:
1102: /* undefined or invalid # directive */
1103: warning(255);
1104: return;
1105: }
1106: ln = strtol(--cp, &eptr, 10);
1107: if (cp == eptr)
1108: goto error;
1109: if ((c = *(cp = eptr)) != ' ' && c != '\t' && c != '\0')
1110: goto error;
1111: while ((c = *cp++) == ' ' || c == '\t')
1112: continue;
1113: if (c != '\0') {
1114: if (c != '"')
1115: goto error;
1116: fn = cp;
1117: while ((c = *cp) != '"' && c != '\0')
1118: cp++;
1119: if (c != '"')
1120: goto error;
1121: if ((fnl = cp++ - fn) > PATH_MAX)
1122: goto error;
1123: parse_line_directive_flags(cp);
1124:
1125: /* empty string means stdin */
1126: if (fnl == 0) {
1127: fn = "{standard input}";
1128: fnl = 16; /* strlen (fn) */
1129: }
1.19 rillig 1130: curr_pos.p_file = record_filename(fn, fnl);
1.1 rillig 1131: /*
1132: * If this is the first directive, the name is the name
1133: * of the C source file as specified at the command line.
1134: * It is written to the output file.
1135: */
1136: if (first) {
1137: csrc_pos.p_file = curr_pos.p_file;
1.19 rillig 1138: outsrc(transform_filename(curr_pos.p_file,
1.1 rillig 1139: strlen(curr_pos.p_file)));
1140: first = false;
1141: }
1142: }
1143: curr_pos.p_line = (int)ln - 1;
1144: curr_pos.p_uniq = 0;
1145: if (curr_pos.p_file == csrc_pos.p_file) {
1146: csrc_pos.p_line = (int)ln - 1;
1147: csrc_pos.p_uniq = 0;
1148: }
1149: }
1150:
1151: /*
1.2 rillig 1152: * Handle lint comments such as ARGSUSED.
1153: *
1.1 rillig 1154: * If one of these comments is recognized, the argument, if any, is
1155: * parsed and a function which handles this comment is called.
1156: */
1157: void
1158: lex_comment(void)
1159: {
1160: int c, lc;
1161: static const struct {
1162: const char *keywd;
1.3 rillig 1163: bool arg;
1.1 rillig 1164: void (*func)(int);
1165: } keywtab[] = {
1.3 rillig 1166: { "ARGSUSED", true, argsused },
1167: { "BITFIELDTYPE", false, bitfieldtype },
1168: { "CONSTCOND", false, constcond },
1169: { "CONSTANTCOND", false, constcond },
1170: { "CONSTANTCONDITION", false, constcond },
1171: { "FALLTHRU", false, fallthru },
1172: { "FALLTHROUGH", false, fallthru },
1173: { "LINTLIBRARY", false, lintlib },
1174: { "LINTED", true, linted },
1175: { "LONGLONG", false, longlong },
1176: { "NOSTRICT", true, linted },
1.15 rillig 1177: { "NOTREACHED", false, not_reached },
1.3 rillig 1178: { "PRINTFLIKE", true, printflike },
1179: { "PROTOLIB", true, protolib },
1180: { "SCANFLIKE", true, scanflike },
1181: { "VARARGS", true, varargs },
1.1 rillig 1182: };
1183: char keywd[32];
1184: char arg[32];
1185: size_t l, i;
1186: int a;
1187: bool eoc;
1188:
1189: eoc = false;
1190:
1191: /* Skip whitespace after the start of the comment */
1192: while ((c = inpc()) != EOF && isspace(c))
1193: continue;
1194:
1195: /* Read the potential keyword to keywd */
1196: l = 0;
1.17 rillig 1197: while (c != EOF && isupper(c) && l < sizeof keywd - 1) {
1.1 rillig 1198: keywd[l++] = (char)c;
1199: c = inpc();
1200: }
1201: keywd[l] = '\0';
1202:
1203: /* look for the keyword */
1.17 rillig 1204: for (i = 0; i < sizeof keywtab / sizeof keywtab[0]; i++) {
1.1 rillig 1205: if (strcmp(keywtab[i].keywd, keywd) == 0)
1206: break;
1207: }
1.17 rillig 1208: if (i == sizeof keywtab / sizeof keywtab[0])
1.1 rillig 1209: goto skip_rest;
1210:
1211: /* skip whitespace after the keyword */
1212: while (c != EOF && isspace(c))
1213: c = inpc();
1214:
1215: /* read the argument, if the keyword accepts one and there is one */
1216: l = 0;
1217: if (keywtab[i].arg) {
1.17 rillig 1218: while (c != EOF && isdigit(c) && l < sizeof arg - 1) {
1.1 rillig 1219: arg[l++] = (char)c;
1220: c = inpc();
1221: }
1222: }
1223: arg[l] = '\0';
1224: a = l != 0 ? atoi(arg) : -1;
1225:
1226: /* skip whitespace after the argument */
1227: while (c != EOF && isspace(c))
1228: c = inpc();
1229:
1230: if (c != '*' || (c = inpc()) != '/') {
1231: if (keywtab[i].func != linted)
1232: /* extra characters in lint comment */
1233: warning(257);
1234: } else {
1235: /*
1236: * remember that we have already found the end of the
1237: * comment
1238: */
1239: eoc = true;
1240: }
1241:
1242: if (keywtab[i].func != NULL)
1243: (*keywtab[i].func)(a);
1244:
1245: skip_rest:
1246: while (!eoc) {
1247: lc = c;
1248: if ((c = inpc()) == EOF) {
1249: /* unterminated comment */
1250: error(256);
1251: break;
1252: }
1253: if (lc == '*' && c == '/')
1254: eoc = true;
1255: }
1256: }
1257:
1258: /*
1259: * Handle // style comments
1260: */
1261: void
1.6 rillig 1262: lex_slash_slash_comment(void)
1.1 rillig 1263: {
1264: int c;
1265:
1266: if (!Sflag && !gflag)
1267: /* %s C does not support // comments */
1268: gnuism(312, tflag ? "traditional" : "ANSI");
1269:
1270: while ((c = inpc()) != EOF && c != '\n')
1271: continue;
1272: }
1273:
1274: /*
1275: * Clear flags for lint comments LINTED, LONGLONG and CONSTCOND.
1276: * clear_warn_flags() is called after function definitions and global and
1277: * local declarations and definitions. It is also called between
1278: * the controlling expression and the body of control statements
1279: * (if, switch, for, while).
1280: */
1281: void
1282: clear_warn_flags(void)
1283: {
1284:
1285: lwarn = LWARN_ALL;
1286: quadflg = false;
1287: constcond_flag = false;
1288: }
1289:
1290: /*
1291: * Strings are stored in a dynamically allocated buffer and passed
1292: * in yylval.y_xstrg to the parser. The parser or the routines called
1293: * by the parser are responsible for freeing this buffer.
1294: */
1295: int
1296: lex_string(void)
1297: {
1298: u_char *s;
1299: int c;
1300: size_t len, max;
1301: strg_t *strg;
1302:
1303: s = xmalloc(max = 64);
1304:
1305: len = 0;
1.6 rillig 1306: while ((c = get_escaped_char('"')) >= 0) {
1.1 rillig 1307: /* +1 to reserve space for a trailing NUL character */
1308: if (len + 1 == max)
1309: s = xrealloc(s, max *= 2);
1310: s[len++] = (char)c;
1311: }
1312: s[len] = '\0';
1313: if (c == -2)
1314: /* unterminated string constant */
1315: error(258);
1316:
1.17 rillig 1317: strg = xcalloc(1, sizeof *strg);
1.1 rillig 1318: strg->st_tspec = CHAR;
1319: strg->st_len = len;
1320: strg->st_cp = s;
1321:
1322: yylval.y_string = strg;
1323: return T_STRING;
1324: }
1325:
1326: int
1.6 rillig 1327: lex_wide_string(void)
1.1 rillig 1328: {
1329: char *s;
1330: int c, n;
1331: size_t i, wi;
1332: size_t len, max, wlen;
1333: wchar_t *ws;
1334: strg_t *strg;
1335:
1336: s = xmalloc(max = 64);
1337: len = 0;
1.6 rillig 1338: while ((c = get_escaped_char('"')) >= 0) {
1.1 rillig 1339: /* +1 to save space for a trailing NUL character */
1340: if (len + 1 >= max)
1341: s = xrealloc(s, max *= 2);
1342: s[len++] = (char)c;
1343: }
1344: s[len] = '\0';
1345: if (c == -2)
1346: /* unterminated string constant */
1347: error(258);
1348:
1349: /* get length of wide-character string */
1350: (void)mblen(NULL, 0);
1351: for (i = 0, wlen = 0; i < len; i += n, wlen++) {
1352: if ((n = mblen(&s[i], MB_CUR_MAX)) == -1) {
1353: /* invalid multibyte character */
1354: error(291);
1355: break;
1356: }
1357: if (n == 0)
1358: n = 1;
1359: }
1360:
1.17 rillig 1361: ws = xmalloc((wlen + 1) * sizeof *ws);
1.1 rillig 1362:
1363: /* convert from multibyte to wide char */
1364: (void)mbtowc(NULL, NULL, 0);
1365: for (i = 0, wi = 0; i < len; i += n, wi++) {
1366: if ((n = mbtowc(&ws[wi], &s[i], MB_CUR_MAX)) == -1)
1367: break;
1368: if (n == 0)
1369: n = 1;
1370: }
1371: ws[wi] = 0;
1372: free(s);
1373:
1.17 rillig 1374: strg = xcalloc(1, sizeof *strg);
1.1 rillig 1375: strg->st_tspec = WCHAR;
1376: strg->st_len = wlen;
1377: strg->st_wcp = ws;
1378:
1379: yylval.y_string = strg;
1380: return T_STRING;
1381: }
1382:
1383: /*
1384: * As noted above the scanner does not create new symbol table entries
1385: * for symbols it cannot find in the symbol table. This is to avoid
1386: * putting undeclared symbols into the symbol table if a syntax error
1387: * occurs.
1388: *
1389: * getsym() is called as soon as it is probably ok to put the symbol to
1390: * the symbol table. This does not mean that it is not possible that
1391: * symbols are put to the symbol table which are not completely
1392: * declared due to syntax errors. To avoid too many problems in this
1393: * case, symbols get type int in getsym().
1394: *
1.2 rillig 1395: * XXX calls to getsym() should be delayed until decl1*() is called.
1.1 rillig 1396: */
1397: sym_t *
1398: getsym(sbuf_t *sb)
1399: {
1400: dinfo_t *di;
1401: char *s;
1402: sym_t *sym;
1403:
1404: sym = sb->sb_sym;
1405:
1406: /*
1407: * During member declaration it is possible that name() looked
1408: * for symbols of type FVFT, although it should have looked for
1409: * symbols of type FTAG. Same can happen for labels. Both cases
1410: * are compensated here.
1411: */
1412: if (symtyp == FMEMBER || symtyp == FLABEL) {
1413: if (sym == NULL || sym->s_kind == FVFT)
1414: sym = search(sb);
1415: }
1416:
1417: if (sym != NULL) {
1418: if (sym->s_kind != symtyp)
1.20 rillig 1419: INTERNAL_ERROR("getsym(%d, %d)", sym->s_kind, symtyp);
1.1 rillig 1420: symtyp = FVFT;
1421: freesb(sb);
1422: return sym;
1423: }
1424:
1425: /* create a new symbol table entry */
1426:
1427: /* labels must always be allocated at level 1 (outermost block) */
1428: if (symtyp == FLABEL) {
1.17 rillig 1429: sym = getlblk(1, sizeof *sym);
1.1 rillig 1430: s = getlblk(1, sb->sb_len + 1);
1431: (void)memcpy(s, sb->sb_name, sb->sb_len + 1);
1432: sym->s_name = s;
1.12 rillig 1433: sym->s_block_level = 1;
1.1 rillig 1434: di = dcs;
1435: while (di->d_next != NULL && di->d_next->d_next != NULL)
1436: di = di->d_next;
1437: lint_assert(di->d_ctx == AUTO);
1438: } else {
1.17 rillig 1439: sym = getblk(sizeof *sym);
1.1 rillig 1440: sym->s_name = sb->sb_name;
1.12 rillig 1441: sym->s_block_level = block_level;
1.1 rillig 1442: di = dcs;
1443: }
1444:
1445: UNIQUE_CURR_POS(sym->s_def_pos);
1446: if ((sym->s_kind = symtyp) != FLABEL)
1447: sym->s_type = gettyp(INT);
1448:
1449: symtyp = FVFT;
1450:
1451: if ((sym->s_link = symtab[sb->sb_hash]) != NULL)
1452: symtab[sb->sb_hash]->s_rlink = &sym->s_link;
1453: sym->s_rlink = &symtab[sb->sb_hash];
1454: symtab[sb->sb_hash] = sym;
1455:
1456: *di->d_ldlsym = sym;
1457: di->d_ldlsym = &sym->s_dlnxt;
1458:
1459: freesb(sb);
1460: return sym;
1461: }
1462:
1463: /*
1464: * Construct a temporary symbol. The symbol starts with a digit, so that
1465: * it is illegal.
1466: */
1467: sym_t *
1468: mktempsym(type_t *t)
1469: {
1470: static int n = 0;
1471: int h;
1.12 rillig 1472: char *s = getlblk(block_level, 64);
1.17 rillig 1473: sym_t *sym = getblk(sizeof *sym);
1.1 rillig 1474:
1475: (void)snprintf(s, 64, "%.8d_tmp", n++);
1476: h = hash(s);
1477:
1478: sym->s_name = s;
1479: sym->s_type = t;
1.12 rillig 1480: sym->s_block_level = block_level;
1.1 rillig 1481: sym->s_scl = AUTO;
1482: sym->s_kind = FVFT;
1483: sym->s_used = true;
1484: sym->s_set = true;
1485:
1486: if ((sym->s_link = symtab[h]) != NULL)
1487: symtab[h]->s_rlink = &sym->s_link;
1488: sym->s_rlink = &symtab[h];
1489: symtab[h] = sym;
1490:
1491: *dcs->d_ldlsym = sym;
1492: dcs->d_ldlsym = &sym->s_dlnxt;
1493:
1494: return sym;
1495: }
1496:
1497: /*
1.12 rillig 1498: * Remove a symbol forever from the symbol table. s_block_level
1.1 rillig 1499: * is set to -1 to avoid that the symbol will later be put
1500: * back to the symbol table.
1501: */
1502: void
1503: rmsym(sym_t *sym)
1504: {
1505:
1506: if ((*sym->s_rlink = sym->s_link) != NULL)
1507: sym->s_link->s_rlink = sym->s_rlink;
1.12 rillig 1508: sym->s_block_level = -1;
1.1 rillig 1509: sym->s_link = NULL;
1510: }
1511:
1512: /*
1513: * Remove a list of symbols declared at one level from the symbol
1514: * table.
1515: */
1516: void
1517: rmsyms(sym_t *syms)
1518: {
1519: sym_t *sym;
1520:
1521: for (sym = syms; sym != NULL; sym = sym->s_dlnxt) {
1.12 rillig 1522: if (sym->s_block_level != -1) {
1.1 rillig 1523: if ((*sym->s_rlink = sym->s_link) != NULL)
1524: sym->s_link->s_rlink = sym->s_rlink;
1525: sym->s_link = NULL;
1526: sym->s_rlink = NULL;
1527: }
1528: }
1529: }
1530:
1531: /*
1.2 rillig 1532: * Put a symbol into the symbol table.
1.1 rillig 1533: */
1534: void
1535: inssym(int bl, sym_t *sym)
1536: {
1537: int h;
1538:
1539: h = hash(sym->s_name);
1540: if ((sym->s_link = symtab[h]) != NULL)
1541: symtab[h]->s_rlink = &sym->s_link;
1542: sym->s_rlink = &symtab[h];
1543: symtab[h] = sym;
1.12 rillig 1544: sym->s_block_level = bl;
1.1 rillig 1545: lint_assert(sym->s_link == NULL ||
1.12 rillig 1546: sym->s_block_level >= sym->s_link->s_block_level);
1.1 rillig 1547: }
1548:
1549: /*
1.2 rillig 1550: * Called at level 0 after syntax errors.
1551: *
1.1 rillig 1552: * Removes all symbols which are not declared at level 0 from the
1553: * symbol table. Also frees all memory which is not associated with
1554: * level 0.
1555: */
1556: void
1557: cleanup(void)
1558: {
1559: sym_t *sym, *nsym;
1560: int i;
1561:
1562: for (i = 0; i < HSHSIZ1; i++) {
1563: for (sym = symtab[i]; sym != NULL; sym = nsym) {
1564: nsym = sym->s_link;
1.12 rillig 1565: if (sym->s_block_level >= 1) {
1.1 rillig 1566: if ((*sym->s_rlink = nsym) != NULL)
1567: nsym->s_rlink = sym->s_rlink;
1568: }
1569: }
1570: }
1571:
1.12 rillig 1572: for (i = mem_block_level; i > 0; i--)
1.1 rillig 1573: freelblk(i);
1574: }
1575:
1576: /*
1577: * Create a new symbol with the name of an existing symbol.
1578: */
1579: sym_t *
1.21 ! rillig 1580: pushdown(const sym_t *sym)
1.1 rillig 1581: {
1582: int h;
1583: sym_t *nsym;
1584:
1585: h = hash(sym->s_name);
1.17 rillig 1586: nsym = getblk(sizeof *nsym);
1.12 rillig 1587: lint_assert(sym->s_block_level <= block_level);
1.1 rillig 1588: nsym->s_name = sym->s_name;
1589: UNIQUE_CURR_POS(nsym->s_def_pos);
1590: nsym->s_kind = sym->s_kind;
1.12 rillig 1591: nsym->s_block_level = block_level;
1.1 rillig 1592:
1593: if ((nsym->s_link = symtab[h]) != NULL)
1594: symtab[h]->s_rlink = &nsym->s_link;
1595: nsym->s_rlink = &symtab[h];
1596: symtab[h] = nsym;
1597:
1598: *dcs->d_ldlsym = nsym;
1599: dcs->d_ldlsym = &nsym->s_dlnxt;
1600:
1601: return nsym;
1602: }
1603:
1604: /*
1605: * Free any dynamically allocated memory referenced by
1606: * the value stack or yylval.
1607: * The type of information in yylval is described by tok.
1608: */
1609: void
1610: freeyyv(void *sp, int tok)
1611: {
1612: if (tok == T_NAME || tok == T_TYPENAME) {
1613: sbuf_t *sb = *(sbuf_t **)sp;
1614: freesb(sb);
1615: } else if (tok == T_CON) {
1616: val_t *val = *(val_t **)sp;
1617: free(val);
1618: } else if (tok == T_STRING) {
1619: strg_t *strg = *(strg_t **)sp;
1620: if (strg->st_tspec == CHAR) {
1621: free(strg->st_cp);
1622: } else {
1623: lint_assert(strg->st_tspec == WCHAR);
1624: free(strg->st_wcp);
1625: }
1626: free(strg);
1627: }
1628: }
CVSweb <webmaster@jp.NetBSD.org>