Annotation of src/usr.bin/indent/indent.c, Revision 1.15
1.15 ! agc 1: /* $NetBSD: indent.c,v 1.14 2003/06/19 15:45:22 christos Exp $ */
1.4 tls 2:
1.1 cgd 3: /*
1.5 mrg 4: * Copyright (c) 1980, 1993
5: * The Regents of the University of California. All rights reserved.
1.15 ! agc 6: *
! 7: * Redistribution and use in source and binary forms, with or without
! 8: * modification, are permitted provided that the following conditions
! 9: * are met:
! 10: * 1. Redistributions of source code must retain the above copyright
! 11: * notice, this list of conditions and the following disclaimer.
! 12: * 2. Redistributions in binary form must reproduce the above copyright
! 13: * notice, this list of conditions and the following disclaimer in the
! 14: * documentation and/or other materials provided with the distribution.
! 15: * 3. Neither the name of the University nor the names of its contributors
! 16: * may be used to endorse or promote products derived from this software
! 17: * without specific prior written permission.
! 18: *
! 19: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 20: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 21: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 22: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 23: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 24: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 25: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 26: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 27: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 28: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 29: * SUCH DAMAGE.
! 30: */
! 31:
! 32: /*
1.5 mrg 33: * Copyright (c) 1976 Board of Trustees of the University of Illinois.
1.1 cgd 34: * Copyright (c) 1985 Sun Microsystems, Inc.
35: * All rights reserved.
36: *
37: * Redistribution and use in source and binary forms, with or without
38: * modification, are permitted provided that the following conditions
39: * are met:
40: * 1. Redistributions of source code must retain the above copyright
41: * notice, this list of conditions and the following disclaimer.
42: * 2. Redistributions in binary form must reproduce the above copyright
43: * notice, this list of conditions and the following disclaimer in the
44: * documentation and/or other materials provided with the distribution.
45: * 3. All advertising materials mentioning features or use of this software
46: * must display the following acknowledgement:
47: * This product includes software developed by the University of
48: * California, Berkeley and its contributors.
49: * 4. Neither the name of the University nor the names of its contributors
50: * may be used to endorse or promote products derived from this software
51: * without specific prior written permission.
52: *
53: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63: * SUCH DAMAGE.
64: */
65:
1.6 lukem 66: #include <sys/cdefs.h>
1.1 cgd 67: #ifndef lint
1.6 lukem 68: __COPYRIGHT("@(#) Copyright (c) 1985 Sun Microsystems, Inc.\n\
1.5 mrg 69: @(#) Copyright (c) 1976 Board of Trustees of the University of Illinois.\n\
70: @(#) Copyright (c) 1980, 1993\n\
1.6 lukem 71: The Regents of the University of California. All rights reserved.\n");
72: #endif /* not lint */
1.1 cgd 73:
74: #ifndef lint
1.5 mrg 75: #if 0
76: static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93";
77: #else
1.15 ! agc 78: __RCSID("$NetBSD: indent.c,v 1.14 2003/06/19 15:45:22 christos Exp $");
1.5 mrg 79: #endif
1.6 lukem 80: #endif /* not lint */
1.1 cgd 81:
82: #include <sys/param.h>
1.6 lukem 83: #include <ctype.h>
84: #include <err.h>
85: #include <errno.h>
1.1 cgd 86: #include <fcntl.h>
87: #include <stdio.h>
88: #include <stdlib.h>
89: #include <string.h>
1.6 lukem 90: #include <unistd.h>
1.14 christos 91: #include <locale.h>
1.9 wsanchez 92: #define EXTERN
1.1 cgd 93: #include "indent_globs.h"
1.9 wsanchez 94: #undef EXTERN
1.1 cgd 95: #include "indent_codes.h"
96:
1.6 lukem 97: char *in_name = "Standard Input"; /* will always point to name of input
98: * file */
99: char *out_name = "Standard Output"; /* will always point to name of output
1.1 cgd 100: * file */
1.6 lukem 101: char bakfile[MAXPATHLEN] = "";
1.1 cgd 102:
1.13 wiz 103: int main(int, char **);
1.6 lukem 104:
105: int
1.13 wiz 106: main(int argc, char **argv)
1.1 cgd 107: {
108:
1.6 lukem 109: extern int found_err; /* flag set in diag() on error */
110: int dec_ind; /* current indentation for declarations */
111: int di_stack[20]; /* a stack of structure indentation levels */
112: int flushed_nl; /* used when buffering up comments to remember
1.1 cgd 113: * that a newline was passed over */
1.6 lukem 114: int force_nl; /* when true, code must be broken */
115: int hd_type; /* used to store type of stmt for if (...),
1.1 cgd 116: * for (...), etc */
1.6 lukem 117: int i; /* local loop counter */
118: int scase; /* set to true when we see a case, so we will
1.1 cgd 119: * know what to do with the following colon */
1.6 lukem 120: int sp_sw; /* when true, we are in the expressin of
1.1 cgd 121: * if(...), while(...), etc. */
1.6 lukem 122: int squest; /* when this is positive, we have seen a ?
1.1 cgd 123: * without the matching : in a <c>?<s>:<s>
124: * construct */
1.6 lukem 125: char *t_ptr; /* used for copying tokens */
126: int type_code; /* the type of token, returned by lexi */
1.1 cgd 127:
1.6 lukem 128: int last_else = 0; /* true iff last keyword was an else */
1.1 cgd 129:
130:
1.6 lukem 131: /*-----------------------------------------------*\
132: | INITIALIZATION |
133: \*-----------------------------------------------*/
1.1 cgd 134:
1.14 christos 135: if (!setlocale(LC_ALL, ""))
136: fprintf(stderr, "indent: can't set locale.\n");
1.1 cgd 137:
1.6 lukem 138: hd_type = 0;
139: ps.p_stack[0] = stmt; /* this is the parser's stack */
140: ps.last_nl = true; /* this is true if the last thing scanned was
1.1 cgd 141: * a newline */
1.6 lukem 142: ps.last_token = semicolon;
143: combuf = (char *) malloc(bufsize);
144: labbuf = (char *) malloc(bufsize);
145: codebuf = (char *) malloc(bufsize);
146: tokenbuf = (char *) malloc(bufsize);
147: l_com = combuf + bufsize - 5;
148: l_lab = labbuf + bufsize - 5;
149: l_code = codebuf + bufsize - 5;
150: l_token = tokenbuf + bufsize - 5;
151: combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label,
152: * and comment buffers */
153: combuf[1] = codebuf[1] = labbuf[1] = '\0';
154: ps.else_if = 1; /* Default else-if special processing to on */
155: s_lab = e_lab = labbuf + 1;
156: s_code = e_code = codebuf + 1;
157: s_com = e_com = combuf + 1;
158: s_token = e_token = tokenbuf + 1;
159:
160: in_buffer = (char *) malloc(10);
161: in_buffer_limit = in_buffer + 8;
162: buf_ptr = buf_end = in_buffer;
163: line_no = 1;
164: had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
165: sp_sw = force_nl = false;
166: ps.in_or_st = false;
167: ps.bl_line = true;
168: dec_ind = 0;
169: di_stack[ps.dec_nest = 0] = 0;
170: ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
171:
172:
173: scase = ps.pcase = false;
174: squest = 0;
175: sc_end = 0;
176: bp_save = 0;
177: be_save = 0;
178:
179: output = 0;
1.1 cgd 180:
181:
182:
1.6 lukem 183: /*--------------------------------------------------*\
184: | COMMAND LINE SCAN |
185: \*--------------------------------------------------*/
1.1 cgd 186:
187: #ifdef undef
1.6 lukem 188: max_col = 78; /* -l78 */
189: lineup_to_parens = 1; /* -lp */
190: ps.ljust_decl = 0; /* -ndj */
191: ps.com_ind = 33; /* -c33 */
192: star_comment_cont = 1; /* -sc */
193: ps.ind_size = 8; /* -i8 */
194: verbose = 0;
195: ps.decl_indent = 16; /* -di16 */
196: ps.indent_parameters = 1; /* -ip */
197: ps.decl_com_ind = 0; /* if this is not set to some positive value
1.1 cgd 198: * by an arg, we will set this equal to
199: * ps.com_ind */
1.6 lukem 200: btype_2 = 1; /* -br */
201: cuddle_else = 1; /* -ce */
202: ps.unindent_displace = 0; /* -d0 */
203: ps.case_indent = 0; /* -cli0 */
204: format_col1_comments = 1; /* -fc1 */
205: procnames_start_line = 1; /* -psl */
206: proc_calls_space = 0; /* -npcs */
207: comment_delimiter_on_blankline = 1; /* -cdb */
208: ps.leave_comma = 1; /* -nbc */
1.1 cgd 209: #endif
210:
1.6 lukem 211: for (i = 1; i < argc; ++i)
212: if (strcmp(argv[i], "-npro") == 0)
213: break;
214: set_defaults();
215: if (i >= argc)
216: set_profile();
1.1 cgd 217:
1.6 lukem 218: for (i = 1; i < argc; ++i) {
1.1 cgd 219:
1.6 lukem 220: /*
221: * look thru args (if any) for changes to defaults
222: */
223: if (argv[i][0] != '-') { /* no flag on parameter */
224: if (input == 0) { /* we must have the input file */
225: in_name = argv[i]; /* remember name of
226: * input file */
227: input = fopen(in_name, "r");
228: if (input == 0) /* check for open error */
229: err(1, "%s", in_name);
230: continue;
231: } else
232: if (output == 0) { /* we have the output
233: * file */
234: out_name = argv[i]; /* remember name of
235: * output file */
236: if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite
237: * the file */
238: fprintf(stderr, "indent: input and output files must be different\n");
239: exit(1);
240: }
241: output = fopen(out_name, "w");
242: if (output == 0) /* check for create
243: * error */
244: err(1, "%s", out_name);
245: continue;
246: }
247: fprintf(stderr, "indent: unknown parameter: %s\n", argv[i]);
248: exit(1);
249: } else
250: set_option(argv[i]);
251: } /* end of for */
252: if (input == 0) {
1.8 mellon 253: input = stdin;
1.6 lukem 254: }
1.7 ross 255: if (output == 0) {
1.8 mellon 256: if (troff || input == stdin)
1.6 lukem 257: output = stdout;
258: else {
259: out_name = in_name;
260: bakcopy();
1.1 cgd 261: }
1.7 ross 262: }
1.6 lukem 263: if (ps.com_ind <= 1)
264: ps.com_ind = 2; /* dont put normal comments before column 2 */
265: if (troff) {
266: if (bodyf.font[0] == 0)
267: parsefont(&bodyf, "R");
268: if (scomf.font[0] == 0)
269: parsefont(&scomf, "I");
270: if (blkcomf.font[0] == 0)
271: blkcomf = scomf, blkcomf.size += 2;
272: if (boxcomf.font[0] == 0)
273: boxcomf = blkcomf;
274: if (stringf.font[0] == 0)
275: parsefont(&stringf, "L");
276: if (keywordf.font[0] == 0)
277: parsefont(&keywordf, "B");
278: writefdef(&bodyf, 'B');
279: writefdef(&scomf, 'C');
280: writefdef(&blkcomf, 'L');
281: writefdef(&boxcomf, 'X');
282: writefdef(&stringf, 'S');
283: writefdef(&keywordf, 'K');
1.1 cgd 284: }
1.6 lukem 285: if (block_comment_max_col <= 0)
286: block_comment_max_col = max_col;
287: if (ps.decl_com_ind <= 0) /* if not specified by user, set this */
288: ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind;
289: if (continuation_indent == 0)
290: continuation_indent = ps.ind_size;
291: fill_buffer(); /* get first batch of stuff into input buffer */
292:
293: parse(semicolon);
294: {
295: char *p = buf_ptr;
296: int col = 1;
297:
298: while (1) {
299: if (*p == ' ')
300: col++;
301: else
302: if (*p == '\t')
303: col = ((col - 1) & ~7) + 9;
304: else
305: break;
306: p++;
307: }
308: if (col > ps.ind_size)
309: ps.ind_level = ps.i_l_follow = col / ps.ind_size;
1.1 cgd 310: }
1.6 lukem 311: if (troff) {
312: char *p = in_name, *beg = in_name;
313:
314: while (*p)
315: if (*p++ == '/')
316: beg = p;
317: fprintf(output, ".Fn \"%s\"\n", beg);
1.1 cgd 318: }
1.6 lukem 319: /*
320: * START OF MAIN LOOP
321: */
1.1 cgd 322:
1.6 lukem 323: while (1) { /* this is the main loop. it will go until we
1.1 cgd 324: * reach eof */
1.6 lukem 325: int is_procname;
1.1 cgd 326:
1.6 lukem 327: type_code = lexi(); /* lexi reads one token. The actual
328: * characters read are stored in
329: * "token". lexi returns a code
330: * indicating the type of token */
331: is_procname = ps.procname[0];
1.1 cgd 332:
1.6 lukem 333: /*
334: * The following code moves everything following an if (), while (),
335: * else, etc. up to the start of the following stmt to a buffer. This
336: * allows proper handling of both kinds of brace placement.
337: */
338:
339: flushed_nl = false;
340: while (ps.search_brace) { /* if we scanned an if(),
341: * while(), etc., we might
342: * need to copy stuff into a
343: * buffer we must loop,
344: * copying stuff into
345: * save_com, until we find the
346: * start of the stmt which
347: * follows the if, or whatever */
348: switch (type_code) {
349: case newline:
350: ++line_no;
351: flushed_nl = true;
352: case form_feed:
353: break; /* form feeds and newlines found here
354: * will be ignored */
355:
356: case lbrace: /* this is a brace that starts the
357: * compound stmt */
358: if (sc_end == 0) { /* ignore buffering if a
359: * comment wasnt stored
360: * up */
361: ps.search_brace = false;
362: goto check_type;
363: }
364: if (btype_2) {
365: save_com[0] = '{'; /* we either want to put
366: * the brace right after
367: * the if */
368: goto sw_buffer; /* go to common code to
369: * get out of this loop */
370: }
371: case comment: /* we have a comment, so we must copy
372: * it into the buffer */
373: if (!flushed_nl || sc_end != 0) {
374: if (sc_end == 0) { /* if this is the first
375: * comment, we must set
376: * up the buffer */
377: save_com[0] = save_com[1] = ' ';
378: sc_end = &(save_com[2]);
379: } else {
380: *sc_end++ = '\n'; /* add newline between
381: * comments */
382: *sc_end++ = ' ';
383: --line_no;
384: }
385: *sc_end++ = '/'; /* copy in start of
386: * comment */
387: *sc_end++ = '*';
388:
389: for (;;) { /* loop until we get to
390: * the end of the
391: * comment */
392: *sc_end = *buf_ptr++;
393: if (buf_ptr >= buf_end)
394: fill_buffer();
395:
396: if (*sc_end++ == '*' && *buf_ptr == '/')
397: break; /* we are at end of
398: * comment */
399:
400: if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer
401: * overflow */
402: diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever.");
403: fflush(output);
404: exit(1);
405: }
406: }
407: *sc_end++ = '/'; /* add ending slash */
408: if (++buf_ptr >= buf_end) /* get past / in buffer */
409: fill_buffer();
410: break;
411: }
412: default: /* it is the start of a normal
413: * statment */
414: if (flushed_nl) /* if we flushed a newline,
415: * make sure it is put back */
416: force_nl = true;
417: if ((type_code == sp_paren && *token == 'i'
418: && last_else && ps.else_if) ||
419: (type_code == sp_nparen && *token == 'e'
420: && e_code != s_code && e_code[-1] == '}'))
421: force_nl = false;
422:
423: if (sc_end == 0) { /* ignore buffering if
424: * comment wasnt saved
425: * up */
426: ps.search_brace = false;
427: goto check_type;
428: }
429: if (force_nl) { /* if we should insert a nl
430: * here, put it into the
431: * buffer */
432: force_nl = false;
433: --line_no; /* this will be
434: * re-increased when the
435: * nl is read from the
436: * buffer */
437: *sc_end++ = '\n';
438: *sc_end++ = ' ';
439: if (verbose && !flushed_nl) /* print error msg if
440: * the line was not
441: * already broken */
442: diag(0, "Line broken");
443: flushed_nl = false;
444: }
445: for (t_ptr = token; *t_ptr; ++t_ptr)
446: *sc_end++ = *t_ptr; /* copy token into temp
447: * buffer */
448: ps.procname[0] = 0;
449:
450: sw_buffer:
451: ps.search_brace = false; /* stop looking for
452: * start of stmt */
453: bp_save = buf_ptr; /* save current input
454: * buffer */
455: be_save = buf_end;
456: buf_ptr = save_com; /* fix so that
457: * subsequent calls to
458: * lexi will take tokens
459: * out of save_com */
460: *sc_end++ = ' '; /* add trailing blank,
461: * just in case */
462: buf_end = sc_end;
463: sc_end = 0;
464: break;
465: } /* end of switch */
466: if (type_code != 0) /* we must make this check,
467: * just in case there was an
468: * unexpected EOF */
469: type_code = lexi(); /* read another token */
470: /* if (ps.search_brace) ps.procname[0] = 0; */
471: if ((is_procname = ps.procname[0]) && flushed_nl
472: && !procnames_start_line && ps.in_decl
473: && type_code == ident)
474: flushed_nl = 0;
475: } /* end of while (search_brace) */
476: last_else = 0;
1.1 cgd 477: check_type:
1.6 lukem 478: if (type_code == 0) { /* we got eof */
479: if (s_lab != e_lab || s_code != e_code
480: || s_com != e_com) /* must dump end of line */
481: dump_line();
482: if (ps.tos > 1) /* check for balanced braces */
483: diag(1, "Stuff missing from end of file.");
484:
485: if (verbose) {
486: printf("There were %d output lines and %d comments\n",
487: ps.out_lines, ps.out_coms);
488: printf("(Lines with comments)/(Lines with code): %6.3f\n",
489: (1.0 * ps.com_lines) / code_lines);
490: }
491: fflush(output);
492: exit(found_err);
1.1 cgd 493: }
1.6 lukem 494: if (
495: (type_code != comment) &&
496: (type_code != newline) &&
497: (type_code != preesc) &&
498: (type_code != form_feed)) {
499: if (force_nl &&
500: (type_code != semicolon) &&
501: (type_code != lbrace || !btype_2)) {
502: /* we should force a broken line here */
503: if (verbose && !flushed_nl)
504: diag(0, "Line broken");
505: flushed_nl = false;
506: dump_line();
507: ps.want_blank = false; /* dont insert blank at
508: * line start */
509: force_nl = false;
510: }
511: ps.in_stmt = true; /* turn on flag which causes
512: * an extra level of
513: * indentation. this is turned
514: * off by a ; or '}' */
515: if (s_com != e_com) { /* the turkey has embedded a
516: * comment in a line. fix it */
517: *e_code++ = ' ';
518: for (t_ptr = s_com; *t_ptr; ++t_ptr) {
519: CHECK_SIZE_CODE;
520: *e_code++ = *t_ptr;
521: }
522: *e_code++ = ' ';
523: *e_code = '\0'; /* null terminate code sect */
524: ps.want_blank = false;
525: e_com = s_com;
526: }
527: } else
528: if (type_code != comment) /* preserve force_nl
529: * thru a comment */
530: force_nl = false; /* cancel forced newline
531: * after newline, form
532: * feed, etc */
1.1 cgd 533:
534:
535:
1.6 lukem 536: /*-----------------------------------------------------*\
537: | do switch on type of token scanned |
538: \*-----------------------------------------------------*/
539: CHECK_SIZE_CODE;
540: switch (type_code) { /* now, decide what to do with the
541: * token */
542:
543: case form_feed:/* found a form feed in line */
544: ps.use_ff = true; /* a form feed is treated much
545: * like a newline */
546: dump_line();
547: ps.want_blank = false;
548: break;
549:
550: case newline:
551: if (ps.last_token != comma || ps.p_l_follow > 0
552: || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
553: dump_line();
554: ps.want_blank = false;
555: }
556: ++line_no; /* keep track of input line number */
557: break;
1.1 cgd 558:
1.6 lukem 559: case lparen: /* got a '(' or '[' */
560: ++ps.p_l_follow; /* count parens to make Healy
561: * happy */
562: if (ps.want_blank && *token != '[' &&
563: (ps.last_token != ident || proc_calls_space
564: || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon))))
565: *e_code++ = ' ';
1.10 christos 566: if (ps.in_decl && !ps.block_init) {
1.6 lukem 567: if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) {
568: ps.dumped_decl_indent = 1;
569: sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
570: e_code += strlen(e_code);
571: } else {
572: while ((e_code - s_code) < dec_ind) {
573: CHECK_SIZE_CODE;
574: *e_code++ = ' ';
575: }
576: *e_code++ = token[0];
577: }
1.10 christos 578: } else
1.6 lukem 579: *e_code++ = token[0];
580: ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code;
581: if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent
582: && ps.paren_indents[0] < 2 * ps.ind_size)
583: ps.paren_indents[0] = 2 * ps.ind_size;
584: ps.want_blank = false;
585: if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
586: /*
587: * this is a kluge to make sure that declarations will be
588: * aligned right if proc decl has an explicit type on it, i.e.
589: * "int a(x) {..."
590: */
591: parse(semicolon); /* I said this was a
592: * kluge... */
593: ps.in_or_st = false; /* turn off flag for
594: * structure decl or
595: * initialization */
596: }
597: if (ps.sizeof_keyword)
598: ps.sizeof_mask |= 1 << ps.p_l_follow;
1.1 cgd 599: break;
1.6 lukem 600:
601: case rparen: /* got a ')' or ']' */
602: rparen_count--;
603: if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) {
604: ps.last_u_d = true;
605: ps.cast_mask &= (1 << ps.p_l_follow) - 1;
606: }
607: ps.sizeof_mask &= (1 << ps.p_l_follow) - 1;
608: if (--ps.p_l_follow < 0) {
609: ps.p_l_follow = 0;
610: diag(0, "Extra %c", *token);
611: }
612: if (e_code == s_code) /* if the paren starts the
613: * line */
614: ps.paren_level = ps.p_l_follow; /* then indent it */
615:
616: *e_code++ = token[0];
617: ps.want_blank = true;
618:
619: if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if
620: * (...), or some such */
621: sp_sw = false;
622: force_nl = true; /* must force newline
623: * after if */
624: ps.last_u_d = true; /* inform lexi that a
625: * following operator is
626: * unary */
627: ps.in_stmt = false; /* dont use stmt
628: * continuation
629: * indentation */
630:
631: parse(hd_type); /* let parser worry about if,
632: * or whatever */
633: }
634: ps.search_brace = btype_2; /* this should insure
635: * that constructs such
636: * as main(){...} and
637: * int[]{...} have their
638: * braces put in the
639: * right place */
1.1 cgd 640: break;
1.6 lukem 641:
642: case unary_op: /* this could be any unary operation */
643: if (ps.want_blank)
644: *e_code++ = ' ';
645:
646: if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) {
647: sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
648: ps.dumped_decl_indent = 1;
649: e_code += strlen(e_code);
650: } else {
651: char *res = token;
652:
653: if (ps.in_decl && !ps.block_init) { /* if this is a unary op
654: * in a declaration, we
655: * should indent this
656: * token */
657: for (i = 0; token[i]; ++i); /* find length of token */
658: while ((e_code - s_code) < (dec_ind - i)) {
659: CHECK_SIZE_CODE;
660: *e_code++ = ' '; /* pad it */
661: }
662: }
663: if (troff && token[0] == '-' && token[1] == '>')
664: res = "\\(->";
665: for (t_ptr = res; *t_ptr; ++t_ptr) {
666: CHECK_SIZE_CODE;
667: *e_code++ = *t_ptr;
668: }
669: }
670: ps.want_blank = false;
1.1 cgd 671: break;
1.6 lukem 672:
673: case binary_op:/* any binary operation */
674: if (ps.want_blank)
675: *e_code++ = ' ';
676: {
677: char *res = token;
678:
679: if (troff)
680: switch (token[0]) {
681: case '<':
682: if (token[1] == '=')
683: res = "\\(<=";
684: break;
685: case '>':
686: if (token[1] == '=')
687: res = "\\(>=";
688: break;
689: case '!':
690: if (token[1] == '=')
691: res = "\\(!=";
692: break;
693: case '|':
694: if (token[1] == '|')
695: res = "\\(br\\(br";
696: else
697: if (token[1] == 0)
698: res = "\\(br";
699: break;
700: }
701: for (t_ptr = res; *t_ptr; ++t_ptr) {
702: CHECK_SIZE_CODE;
703: *e_code++ = *t_ptr; /* move the operator */
704: }
705: }
706: ps.want_blank = true;
1.1 cgd 707: break;
708:
1.6 lukem 709: case postop: /* got a trailing ++ or -- */
710: *e_code++ = token[0];
711: *e_code++ = token[1];
712: ps.want_blank = true;
713: break;
1.1 cgd 714:
1.6 lukem 715: case question: /* got a ? */
716: squest++; /* this will be used when a later
717: * colon appears so we can distinguish
718: * the <c>?<n>:<n> construct */
719: if (ps.want_blank)
720: *e_code++ = ' ';
721: *e_code++ = '?';
722: ps.want_blank = true;
723: break;
1.1 cgd 724:
1.6 lukem 725: case casestmt: /* got word 'case' or 'default' */
726: scase = true; /* so we can process the later colon
727: * properly */
728: goto copy_id;
729:
730: case colon: /* got a ':' */
731: if (squest > 0) { /* it is part of the <c>?<n>:
732: * <n> construct */
733: --squest;
734: if (ps.want_blank)
735: *e_code++ = ' ';
736: *e_code++ = ':';
737: ps.want_blank = true;
738: break;
739: }
1.11 kleink 740: if (ps.in_or_st) {
1.6 lukem 741: *e_code++ = ':';
742: ps.want_blank = false;
743: break;
744: }
745: ps.in_stmt = false; /* seeing a label does not
746: * imply we are in a stmt */
747: for (t_ptr = s_code; *t_ptr; ++t_ptr)
748: *e_lab++ = *t_ptr; /* turn everything so
749: * far into a label */
750: e_code = s_code;
751: *e_lab++ = ':';
752: *e_lab++ = ' ';
753: *e_lab = '\0';
754:
755: force_nl = ps.pcase = scase; /* ps.pcase will be used
756: * by dump_line to
757: * decide how to indent
758: * the label. force_nl
759: * will force a case n:
760: * to be on a line by
761: * itself */
762: scase = false;
763: ps.want_blank = false;
764: break;
1.1 cgd 765:
1.6 lukem 766: case semicolon:/* got a ';' */
767: ps.in_or_st = false; /* we are not in an
768: * initialization or structure
769: * declaration */
770: scase = false; /* these will only need resetting in a
771: * error */
772: squest = 0;
773: if (ps.last_token == rparen && rparen_count == 0)
774: ps.in_parameter_declaration = 0;
775: ps.cast_mask = 0;
776: ps.sizeof_mask = 0;
777: ps.block_init = 0;
778: ps.block_init_level = 0;
779: ps.just_saw_decl--;
780:
781: if (ps.in_decl && s_code == e_code && !ps.block_init)
782: while ((e_code - s_code) < (dec_ind - 1)) {
783: CHECK_SIZE_CODE;
784: *e_code++ = ' ';
785: }
786:
787: ps.in_decl = (ps.dec_nest > 0); /* if we were in a first
788: * level structure
789: * declaration, we arent
790: * any more */
791:
792: if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
793:
794: /*
795: * This should be true iff there were unbalanced parens in the
796: * stmt. It is a bit complicated, because the semicolon might
797: * be in a for stmt
798: */
799: diag(1, "Unbalanced parens");
800: ps.p_l_follow = 0;
801: if (sp_sw) { /* this is a check for a if,
802: * while, etc. with unbalanced
803: * parens */
804: sp_sw = false;
805: parse(hd_type); /* dont lose the if, or
806: * whatever */
807: }
808: }
809: *e_code++ = ';';
810: ps.want_blank = true;
811: ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in
812: * the middle of a stmt */
813:
814: if (!sp_sw) { /* if not if for (;;) */
815: parse(semicolon); /* let parser know about
816: * end of stmt */
817: force_nl = true; /* force newline after a
818: * end of stmt */
1.1 cgd 819: }
1.6 lukem 820: break;
821:
822: case lbrace: /* got a '{' */
823: ps.in_stmt = false; /* dont indent the {} */
824: if (!ps.block_init)
825: force_nl = true; /* force other stuff on
826: * same line as '{' onto
827: * new line */
1.1 cgd 828: else
1.6 lukem 829: if (ps.block_init_level <= 0)
830: ps.block_init_level = 1;
831: else
832: ps.block_init_level++;
833:
834: if (s_code != e_code && !ps.block_init) {
835: if (!btype_2) {
836: dump_line();
837: ps.want_blank = false;
838: } else
839: if (ps.in_parameter_declaration && !ps.in_or_st) {
840: ps.i_l_follow = 0;
841: dump_line();
842: ps.want_blank = false;
843: }
844: }
845: if (ps.in_parameter_declaration)
846: prefix_blankline_requested = 0;
847:
1.12 wiz 848: if (ps.p_l_follow > 0) { /* check for preceding
1.6 lukem 849: * unbalanced parens */
850: diag(1, "Unbalanced parens");
851: ps.p_l_follow = 0;
852: if (sp_sw) { /* check for unclosed if, for,
853: * etc. */
854: sp_sw = false;
855: parse(hd_type);
856: ps.ind_level = ps.i_l_follow;
857: }
858: }
859: if (s_code == e_code)
860: ps.ind_stmt = false; /* dont put extra
861: * indentation on line
862: * with '{' */
863: if (ps.in_decl && ps.in_or_st) { /* this is either a
864: * structure declaration
865: * or an init */
866: di_stack[ps.dec_nest++] = dec_ind;
867: /* ? dec_ind = 0; */
868: } else {
869: ps.decl_on_line = false; /* we cant be in the
870: * middle of a
871: * declaration, so dont
872: * do special
873: * indentation of
874: * comments */
875: if (blanklines_after_declarations_at_proctop
876: && ps.in_parameter_declaration)
877: postfix_blankline_requested = 1;
878: ps.in_parameter_declaration = 0;
879: }
880: dec_ind = 0;
881: parse(lbrace); /* let parser know about this */
882: if (ps.want_blank) /* put a blank before '{' if
883: * '{' is not at start of line */
1.1 cgd 884: *e_code++ = ' ';
1.6 lukem 885: ps.want_blank = false;
886: *e_code++ = '{';
887: ps.just_saw_decl = 0;
888: break;
889:
890: case rbrace: /* got a '}' */
891: if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be
892: * omitted in
893: * declarations */
894: parse(semicolon);
895: if (ps.p_l_follow) { /* check for unclosed if, for,
896: * else. */
897: diag(1, "Unbalanced parens");
898: ps.p_l_follow = 0;
899: sp_sw = false;
900: }
901: ps.just_saw_decl = 0;
902: ps.block_init_level--;
903: if (s_code != e_code && !ps.block_init) { /* '}' must be first on
904: * line */
905: if (verbose)
906: diag(0, "Line broken");
907: dump_line();
908: }
909: *e_code++ = '}';
910: ps.want_blank = true;
911: ps.in_stmt = ps.ind_stmt = false;
912: if (ps.dec_nest > 0) { /* we are in multi-level
913: * structure declaration */
914: dec_ind = di_stack[--ps.dec_nest];
915: if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
916: ps.just_saw_decl = 2;
917: ps.in_decl = true;
918: }
919: prefix_blankline_requested = 0;
920: parse(rbrace); /* let parser know about this */
921: ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead
922: && ps.il[ps.tos] >= ps.ind_level;
923: if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0)
924: postfix_blankline_requested = 1;
925: break;
1.1 cgd 926:
1.6 lukem 927: case swstmt: /* got keyword "switch" */
928: sp_sw = true;
929: hd_type = swstmt; /* keep this for when we have
930: * seen the expression */
931: goto copy_id; /* go move the token into buffer */
932:
933: case sp_paren: /* token is if, while, for */
934: sp_sw = true; /* the interesting stuff is done after
935: * the expression is scanned */
936: hd_type = (*token == 'i' ? ifstmt :
937: (*token == 'w' ? whilestmt : forstmt));
938:
939: /*
940: * remember the type of header for later use by parser
941: */
942: goto copy_id; /* copy the token into line */
943:
944: case sp_nparen:/* got else, do */
945: ps.in_stmt = false;
946: if (*token == 'e') {
947: if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) {
948: if (verbose)
949: diag(0, "Line broken");
950: dump_line(); /* make sure this starts
951: * a line */
952: ps.want_blank = false;
953: }
954: force_nl = true; /* also, following stuff
955: * must go onto new line */
956: last_else = 1;
957: parse(elselit);
958: } else {
959: if (e_code != s_code) { /* make sure this starts
960: * a line */
961: if (verbose)
962: diag(0, "Line broken");
963: dump_line();
964: ps.want_blank = false;
965: }
966: force_nl = true; /* also, following stuff
967: * must go onto new line */
968: last_else = 0;
969: parse(dolit);
970: }
971: goto copy_id; /* move the token into line */
1.1 cgd 972:
1.6 lukem 973: case decl: /* we have a declaration type (int, register,
974: * etc.) */
975: parse(decl); /* let parser worry about indentation */
976: if (ps.last_token == rparen && ps.tos <= 1) {
977: ps.in_parameter_declaration = 1;
978: if (s_code != e_code) {
979: dump_line();
980: ps.want_blank = 0;
981: }
1.1 cgd 982: }
1.6 lukem 983: if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) {
984: ps.ind_level = ps.i_l_follow = 1;
985: ps.ind_stmt = 0;
1.1 cgd 986: }
1.6 lukem 987: ps.in_or_st = true; /* this might be a structure
988: * or initialization
989: * declaration */
990: ps.in_decl = ps.decl_on_line = true;
991: if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
992: ps.just_saw_decl = 2;
993: prefix_blankline_requested = 0;
994: for (i = 0; token[i++];); /* get length of token */
995:
996: /*
997: * dec_ind = e_code - s_code + (ps.decl_indent>i ? ps.decl_indent
998: * : i);
999: */
1000: dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i;
1001: goto copy_id;
1002:
1003: case ident: /* got an identifier or constant */
1004: if (ps.in_decl) { /* if we are in a declaration,
1005: * we must indent identifier */
1006: if (ps.want_blank)
1007: *e_code++ = ' ';
1008: ps.want_blank = false;
1009: if (is_procname == 0 || !procnames_start_line) {
1.7 ross 1010: if (!ps.block_init) {
1.6 lukem 1011: if (troff && !ps.dumped_decl_indent) {
1012: sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7);
1013: ps.dumped_decl_indent = 1;
1014: e_code += strlen(e_code);
1015: } else
1016: while ((e_code - s_code) < dec_ind) {
1017: CHECK_SIZE_CODE;
1018: *e_code++ = ' ';
1019: }
1.7 ross 1020: }
1.6 lukem 1021: } else {
1022: if (dec_ind && s_code != e_code)
1023: dump_line();
1024: dec_ind = 0;
1025: ps.want_blank = false;
1026: }
1027: } else
1028: if (sp_sw && ps.p_l_follow == 0) {
1029: sp_sw = false;
1030: force_nl = true;
1031: ps.last_u_d = true;
1032: ps.in_stmt = false;
1033: parse(hd_type);
1034: }
1035: copy_id:
1036: if (ps.want_blank)
1037: *e_code++ = ' ';
1038: if (troff && ps.its_a_keyword) {
1039: e_code = chfont(&bodyf, &keywordf, e_code);
1040: for (t_ptr = token; *t_ptr; ++t_ptr) {
1041: CHECK_SIZE_CODE;
1.10 christos 1042: *e_code++ = keywordf.allcaps && islower((unsigned char)*t_ptr)
1.6 lukem 1043: ? toupper(*t_ptr) : *t_ptr;
1044: }
1045: e_code = chfont(&keywordf, &bodyf, e_code);
1046: } else
1047: for (t_ptr = token; *t_ptr; ++t_ptr) {
1048: CHECK_SIZE_CODE;
1049: *e_code++ = *t_ptr;
1050: }
1051: ps.want_blank = true;
1.1 cgd 1052: break;
1.6 lukem 1053:
1054: case period: /* treat a period kind of like a binary
1055: * operation */
1056: *e_code++ = '.'; /* move the period into line */
1057: ps.want_blank = false; /* dont put a blank after a
1058: * period */
1.1 cgd 1059: break;
1.6 lukem 1060:
1061: case comma:
1062: ps.want_blank = (s_code != e_code); /* only put blank after
1063: * comma if comma does
1064: * not start the line */
1065: if (ps.in_decl && is_procname == 0 && !ps.block_init)
1066: while ((e_code - s_code) < (dec_ind - 1)) {
1067: CHECK_SIZE_CODE;
1068: *e_code++ = ' ';
1069: }
1070:
1071: *e_code++ = ',';
1072: if (ps.p_l_follow == 0) {
1073: if (ps.block_init_level <= 0)
1074: ps.block_init = 0;
1075: if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8))
1076: force_nl = true;
1077: }
1.1 cgd 1078: break;
1.6 lukem 1079:
1080: case preesc: /* got the character '#' */
1081: if ((s_com != e_com) ||
1082: (s_lab != e_lab) ||
1083: (s_code != e_code))
1084: dump_line();
1085: *e_lab++ = '#'; /* move whole line to 'label' buffer */
1086: {
1087: int in_comment = 0;
1088: int com_start = 0;
1089: char quote = 0;
1090: int com_end = 0;
1091:
1092: while (*buf_ptr == ' ' || *buf_ptr == '\t') {
1093: buf_ptr++;
1094: if (buf_ptr >= buf_end)
1095: fill_buffer();
1096: }
1097: while (*buf_ptr != '\n' || in_comment) {
1098: CHECK_SIZE_LAB;
1099: *e_lab = *buf_ptr++;
1100: if (buf_ptr >= buf_end)
1101: fill_buffer();
1102: switch (*e_lab++) {
1103: case BACKSLASH:
1104: if (troff)
1105: *e_lab++ = BACKSLASH;
1106: if (!in_comment) {
1107: *e_lab++ = *buf_ptr++;
1108: if (buf_ptr >= buf_end)
1109: fill_buffer();
1110: }
1111: break;
1112: case '/':
1113: if (*buf_ptr == '*' && !in_comment && !quote) {
1114: in_comment = 1;
1115: *e_lab++ = *buf_ptr++;
1116: com_start = e_lab - s_lab - 2;
1117: }
1118: break;
1119: case '"':
1120: if (quote == '"')
1121: quote = 0;
1122: break;
1123: case '\'':
1124: if (quote == '\'')
1125: quote = 0;
1126: break;
1127: case '*':
1128: if (*buf_ptr == '/' && in_comment) {
1129: in_comment = 0;
1130: *e_lab++ = *buf_ptr++;
1131: com_end = e_lab - s_lab;
1132: }
1133: break;
1134: }
1135: }
1136:
1137: while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1138: e_lab--;
1139: if (e_lab - s_lab == com_end && bp_save == 0) { /* comment on
1140: * preprocessor line */
1141: if (sc_end == 0) /* if this is the first
1142: * comment, we must set
1143: * up the buffer */
1144: sc_end = &(save_com[0]);
1145: else {
1146: *sc_end++ = '\n'; /* add newline between
1147: * comments */
1148: *sc_end++ = ' ';
1149: --line_no;
1150: }
1151: memmove(sc_end, s_lab + com_start, com_end - com_start);
1152: sc_end += com_end - com_start;
1153: if (sc_end >= &save_com[sc_size])
1154: abort();
1155: e_lab = s_lab + com_start;
1156: while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1157: e_lab--;
1158: bp_save = buf_ptr; /* save current input
1159: * buffer */
1160: be_save = buf_end;
1161: buf_ptr = save_com; /* fix so that
1162: * subsequent calls to
1163: * lexi will take tokens
1164: * out of save_com */
1165: *sc_end++ = ' '; /* add trailing blank,
1166: * just in case */
1167: buf_end = sc_end;
1168: sc_end = 0;
1169: }
1170: *e_lab = '\0'; /* null terminate line */
1171: ps.pcase = false;
1.1 cgd 1172: }
1173:
1.6 lukem 1174: if (strncmp(s_lab, "#if", 3) == 0) {
1175: if (blanklines_around_conditional_compilation) {
1176: int c;
1177: prefix_blankline_requested++;
1178: while ((c = getc(input)) == '\n');
1179: ungetc(c, input);
1180: }
1181: if (ifdef_level < sizeof state_stack / sizeof state_stack[0]) {
1182: match_state[ifdef_level].tos = -1;
1183: state_stack[ifdef_level++] = ps;
1184: } else
1185: diag(1, "#if stack overflow");
1186: } else
1.10 christos 1187: if (strncmp(s_lab, "#else", 5) == 0) {
1.6 lukem 1188: if (ifdef_level <= 0)
1189: diag(1, "Unmatched #else");
1190: else {
1191: match_state[ifdef_level - 1] = ps;
1192: ps = state_stack[ifdef_level - 1];
1193: }
1.10 christos 1194: } else
1.6 lukem 1195: if (strncmp(s_lab, "#endif", 6) == 0) {
1196: if (ifdef_level <= 0)
1197: diag(1, "Unmatched #endif");
1198: else {
1199: ifdef_level--;
1.1 cgd 1200:
1201: #ifdef undef
1.6 lukem 1202: /*
1203: * This match needs to be more intelligent before the
1204: * message is useful
1205: */
1206: if (match_state[ifdef_level].tos >= 0
1207: && memcmp(&ps, &match_state[ifdef_level], sizeof ps))
1208: diag(0, "Syntactically inconsistant #ifdef alternatives.");
1.1 cgd 1209: #endif
1.6 lukem 1210: }
1211: if (blanklines_around_conditional_compilation) {
1212: postfix_blankline_requested++;
1213: n_real_blanklines = 0;
1214: }
1215: }
1216: break; /* subsequent processing of the newline
1.1 cgd 1217: * character will cause the line to be printed */
1218:
1.6 lukem 1219: case comment: /* we have gotten a start comment */
1220: /* this is a biggie */
1221: if (flushed_nl) { /* we should force a broken
1222: * line here */
1223: flushed_nl = false;
1224: dump_line();
1225: ps.want_blank = false; /* dont insert blank at
1226: * line start */
1227: force_nl = false;
1228: }
1229: pr_comment();
1230: break;
1231: } /* end of big switch stmt */
1232:
1233: *e_code = '\0'; /* make sure code section is null terminated */
1234: if (type_code != comment && type_code != newline && type_code != preesc)
1235: ps.last_token = type_code;
1236: } /* end of main while (1) loop */
1.1 cgd 1237: }
1238: /*
1239: * copy input file to backup file if in_name is /blah/blah/blah/file, then
1240: * backup file will be ".Bfile" then make the backup file the input and
1241: * original input file the output
1242: */
1.6 lukem 1243: void
1.13 wiz 1244: bakcopy(void)
1.1 cgd 1245: {
1.6 lukem 1246: int n, bakchn;
1247: char buff[8 * 1024];
1248: char *p;
1249:
1250: /* construct file name .Bfile */
1251: for (p = in_name; *p; p++); /* skip to end of string */
1252: while (p > in_name && *p != '/') /* find last '/' */
1253: p--;
1254: if (*p == '/')
1255: p++;
1256: sprintf(bakfile, "%s.BAK", p);
1257:
1258: /* copy in_name to backup file */
1259: bakchn = creat(bakfile, 0600);
1260: if (bakchn < 0)
1261: err(1, "%s", bakfile);
1262: while ((n = read(fileno(input), buff, sizeof buff)) > 0)
1263: if (write(bakchn, buff, n) != n)
1264: err(1, "%s", bakfile);
1265: if (n < 0)
1266: err(1, "%s", in_name);
1267: close(bakchn);
1268: fclose(input);
1269:
1270: /* re-open backup file as the input file */
1271: input = fopen(bakfile, "r");
1272: if (input == 0)
1273: err(1, "%s", bakfile);
1274: /* now the original input file will be the output */
1275: output = fopen(in_name, "w");
1276: if (output == 0) {
1277: unlink(bakfile);
1278: err(1, "%s", in_name);
1279: }
1.1 cgd 1280: }
CVSweb <webmaster@jp.NetBSD.org>