Annotation of src/lib/libc/stdio/vfscanf.c, Revision 1.2
1.1 cgd 1: /*-
2: * Copyright (c) 1990 The Regents of the University of California.
3: * All rights reserved.
4: *
5: * This code is derived from software contributed to Berkeley by
6: * Chris Torek.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed by the University of
19: * California, Berkeley and its contributors.
20: * 4. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: */
36:
37: #if defined(LIBC_SCCS) && !defined(lint)
38: static char sccsid[] = "@(#)vfscanf.c 5.6 (Berkeley) 2/24/91";
39: #endif /* LIBC_SCCS and not lint */
40:
41: #include <stdio.h>
42: #include <stdlib.h>
43: #include <ctype.h>
44: #if __STDC__
45: #include <stdarg.h>
46: #else
47: #include <varargs.h>
48: #endif
49: #include "local.h"
50:
51: #define FLOATING_POINT
52:
53: #ifdef FLOATING_POINT
54: #include "floatio.h"
55: #define BUF (MAXEXP+MAXFRACT+3) /* 3 = sign + decimal point + NUL */
56: #else
57: #define BUF 40
58: #endif
59:
60: /*
61: * Flags used during conversion.
62: */
63: #define LONG 0x01 /* l: long or double */
64: #define LONGDBL 0x02 /* L: long double; unimplemented */
65: #define SHORT 0x04 /* h: short */
66: #define SUPPRESS 0x08 /* suppress assignment */
67: #define POINTER 0x10 /* weird %p pointer (`fake hex') */
68: #define NOSKIP 0x20 /* do not skip blanks */
69:
70: /*
71: * The following are used in numeric conversions only:
72: * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
73: * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
74: */
75: #define SIGNOK 0x40 /* +/- is (still) legal */
76: #define NDIGITS 0x80 /* no digits detected */
77:
78: #define DPTOK 0x100 /* (float) decimal point is still legal */
79: #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
80:
81: #define PFXOK 0x100 /* 0x prefix is (still) legal */
82: #define NZDIGITS 0x200 /* no zero digits detected */
83:
84: /*
85: * Conversion types.
86: */
87: #define CT_CHAR 0 /* %c conversion */
88: #define CT_CCL 1 /* %[...] conversion */
89: #define CT_STRING 2 /* %s conversion */
90: #define CT_INT 3 /* integer, i.e., strtol or strtoul */
91: #define CT_FLOAT 4 /* floating, i.e., strtod */
92:
93: #define u_char unsigned char
94: #define u_long unsigned long
95:
96: static u_char *__sccl();
97:
98: /*
99: * vfscanf
100: */
101: __svfscanf(fp, fmt0, ap)
102: register FILE *fp;
103: char const *fmt0;
1.2 ! mycroft 104: _VA_LIST_ ap;
1.1 cgd 105: {
106: register u_char *fmt = (u_char *)fmt0;
107: register int c; /* character from format, or conversion */
108: register size_t width; /* field width, or 0 */
109: register char *p; /* points into all kinds of strings */
110: register int n; /* handy integer */
111: register int flags; /* flags as defined above */
112: register char *p0; /* saves original value of p when necessary */
113: int nassigned; /* number of fields assigned */
114: int nread; /* number of characters consumed from fp */
115: int base; /* base argument to strtol/strtoul */
116: u_long (*ccfn)(); /* conversion function (strtol/strtoul) */
117: char ccltab[256]; /* character class table for %[...] */
118: char buf[BUF]; /* buffer for numeric conversions */
119:
120: /* `basefix' is used to avoid `if' tests in the integer scanner */
121: static short basefix[17] =
122: { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
123:
124: nassigned = 0;
125: nread = 0;
126: base = 0; /* XXX just to keep gcc happy */
127: ccfn = NULL; /* XXX just to keep gcc happy */
128: for (;;) {
129: c = *fmt++;
130: if (c == 0)
131: return (nassigned);
132: if (isspace(c)) {
133: for (;;) {
134: if (fp->_r <= 0 && __srefill(fp))
135: return (nassigned);
136: if (!isspace(*fp->_p))
137: break;
138: nread++, fp->_r--, fp->_p++;
139: }
140: continue;
141: }
142: if (c != '%')
143: goto literal;
144: width = 0;
145: flags = 0;
146: /*
147: * switch on the format. continue if done;
148: * break once format type is derived.
149: */
150: again: c = *fmt++;
151: switch (c) {
152: case '%':
153: literal:
154: if (fp->_r <= 0 && __srefill(fp))
155: goto input_failure;
156: if (*fp->_p != c)
157: goto match_failure;
158: fp->_r--, fp->_p++;
159: nread++;
160: continue;
161:
162: case '*':
163: flags |= SUPPRESS;
164: goto again;
165: case 'l':
166: flags |= LONG;
167: goto again;
168: case 'L':
169: flags |= LONGDBL;
170: goto again;
171: case 'h':
172: flags |= SHORT;
173: goto again;
174:
175: case '0': case '1': case '2': case '3': case '4':
176: case '5': case '6': case '7': case '8': case '9':
177: width = width * 10 + c - '0';
178: goto again;
179:
180: /*
181: * Conversions.
182: * Those marked `compat' are for 4.[123]BSD compatibility.
183: *
184: * (According to ANSI, E and X formats are supposed
185: * to the same as e and x. Sorry about that.)
186: */
187: case 'D': /* compat */
188: flags |= LONG;
189: /* FALLTHROUGH */
190: case 'd':
191: c = CT_INT;
192: ccfn = (u_long (*)())strtol;
193: base = 10;
194: break;
195:
196: case 'i':
197: c = CT_INT;
198: ccfn = (u_long (*)())strtol;
199: base = 0;
200: break;
201:
202: case 'O': /* compat */
203: flags |= LONG;
204: /* FALLTHROUGH */
205: case 'o':
206: c = CT_INT;
207: ccfn = strtoul;
208: base = 8;
209: break;
210:
211: case 'u':
212: c = CT_INT;
213: ccfn = strtoul;
214: base = 10;
215: break;
216:
217: case 'X': /* compat XXX */
218: flags |= LONG;
219: /* FALLTHROUGH */
220: case 'x':
221: flags |= PFXOK; /* enable 0x prefixing */
222: c = CT_INT;
223: ccfn = strtoul;
224: base = 16;
225: break;
226:
227: #ifdef FLOATING_POINT
228: case 'E': /* compat XXX */
229: case 'F': /* compat */
230: flags |= LONG;
231: /* FALLTHROUGH */
232: case 'e': case 'f': case 'g':
233: c = CT_FLOAT;
234: break;
235: #endif
236:
237: case 's':
238: c = CT_STRING;
239: break;
240:
241: case '[':
242: fmt = __sccl(ccltab, fmt);
243: flags |= NOSKIP;
244: c = CT_CCL;
245: break;
246:
247: case 'c':
248: flags |= NOSKIP;
249: c = CT_CHAR;
250: break;
251:
252: case 'p': /* pointer format is like hex */
253: flags |= POINTER | PFXOK;
254: c = CT_INT;
255: ccfn = strtoul;
256: base = 16;
257: break;
258:
259: case 'n':
260: if (flags & SUPPRESS) /* ??? */
261: continue;
262: if (flags & SHORT)
263: *va_arg(ap, short *) = nread;
264: else if (flags & LONG)
265: *va_arg(ap, long *) = nread;
266: else
267: *va_arg(ap, int *) = nread;
268: continue;
269:
270: /*
271: * Disgusting backwards compatibility hacks. XXX
272: */
273: case '\0': /* compat */
274: return (EOF);
275:
276: default: /* compat */
277: if (isupper(c))
278: flags |= LONG;
279: c = CT_INT;
280: ccfn = (u_long (*)())strtol;
281: base = 10;
282: break;
283: }
284:
285: /*
286: * We have a conversion that requires input.
287: */
288: if (fp->_r <= 0 && __srefill(fp))
289: goto input_failure;
290:
291: /*
292: * Consume leading white space, except for formats
293: * that suppress this.
294: */
295: if ((flags & NOSKIP) == 0) {
296: while (isspace(*fp->_p)) {
297: nread++;
298: if (--fp->_r > 0)
299: fp->_p++;
300: else if (__srefill(fp))
301: goto input_failure;
302: }
303: /*
304: * Note that there is at least one character in
305: * the buffer, so conversions that do not set NOSKIP
306: * ca no longer result in an input failure.
307: */
308: }
309:
310: /*
311: * Do the conversion.
312: */
313: switch (c) {
314:
315: case CT_CHAR:
316: /* scan arbitrary characters (sets NOSKIP) */
317: if (width == 0)
318: width = 1;
319: if (flags & SUPPRESS) {
320: size_t sum = 0;
321: for (;;) {
322: if ((n = fp->_r) < width) {
323: sum += n;
324: width -= n;
325: fp->_p += n;
326: if (__srefill(fp)) {
327: if (sum == 0)
328: goto input_failure;
329: break;
330: }
331: } else {
332: sum += width;
333: fp->_r -= width;
334: fp->_p += width;
335: break;
336: }
337: }
338: nread += sum;
339: } else {
340: size_t r = fread((void *)va_arg(ap, char *), 1,
341: width, fp);
342:
343: if (r == 0)
344: goto input_failure;
345: nread += r;
346: nassigned++;
347: }
348: break;
349:
350: case CT_CCL:
351: /* scan a (nonempty) character class (sets NOSKIP) */
352: if (width == 0)
353: width = ~0; /* `infinity' */
354: /* take only those things in the class */
355: if (flags & SUPPRESS) {
356: n = 0;
357: while (ccltab[*fp->_p]) {
358: n++, fp->_r--, fp->_p++;
359: if (--width == 0)
360: break;
361: if (fp->_r <= 0 && __srefill(fp)) {
362: if (n == 0)
363: goto input_failure;
364: break;
365: }
366: }
367: if (n == 0)
368: goto match_failure;
369: } else {
370: p0 = p = va_arg(ap, char *);
371: while (ccltab[*fp->_p]) {
372: fp->_r--;
373: *p++ = *fp->_p++;
374: if (--width == 0)
375: break;
376: if (fp->_r <= 0 && __srefill(fp)) {
377: if (p == p0)
378: goto input_failure;
379: break;
380: }
381: }
382: n = p - p0;
383: if (n == 0)
384: goto match_failure;
385: *p = 0;
386: nassigned++;
387: }
388: nread += n;
389: break;
390:
391: case CT_STRING:
392: /* like CCL, but zero-length string OK, & no NOSKIP */
393: if (width == 0)
394: width = ~0;
395: if (flags & SUPPRESS) {
396: n = 0;
397: while (!isspace(*fp->_p)) {
398: n++, fp->_r--, fp->_p++;
399: if (--width == 0)
400: break;
401: if (fp->_r <= 0 && __srefill(fp))
402: break;
403: }
404: nread += n;
405: } else {
406: p0 = p = va_arg(ap, char *);
407: while (!isspace(*fp->_p)) {
408: fp->_r--;
409: *p++ = *fp->_p++;
410: if (--width == 0)
411: break;
412: if (fp->_r <= 0 && __srefill(fp))
413: break;
414: }
415: *p = 0;
416: nread += p - p0;
417: nassigned++;
418: }
419: continue;
420:
421: case CT_INT:
422: /* scan an integer as if by strtol/strtoul */
423: #ifdef hardway
424: if (width == 0 || width > sizeof(buf) - 1)
425: width = sizeof(buf) - 1;
426: #else
427: /* size_t is unsigned, hence this optimisation */
428: if (--width > sizeof(buf) - 2)
429: width = sizeof(buf) - 2;
430: width++;
431: #endif
432: flags |= SIGNOK | NDIGITS | NZDIGITS;
433: for (p = buf; width; width--) {
434: c = *fp->_p;
435: /*
436: * Switch on the character; `goto ok'
437: * if we accept it as a part of number.
438: */
439: switch (c) {
440:
441: /*
442: * The digit 0 is always legal, but is
443: * special. For %i conversions, if no
444: * digits (zero or nonzero) have been
445: * scanned (only signs), we will have
446: * base==0. In that case, we should set
447: * it to 8 and enable 0x prefixing.
448: * Also, if we have not scanned zero digits
449: * before this, do not turn off prefixing
450: * (someone else will turn it off if we
451: * have scanned any nonzero digits).
452: */
453: case '0':
454: if (base == 0) {
455: base = 8;
456: flags |= PFXOK;
457: }
458: if (flags & NZDIGITS)
459: flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
460: else
461: flags &= ~(SIGNOK|PFXOK|NDIGITS);
462: goto ok;
463:
464: /* 1 through 7 always legal */
465: case '1': case '2': case '3':
466: case '4': case '5': case '6': case '7':
467: base = basefix[base];
468: flags &= ~(SIGNOK | PFXOK | NDIGITS);
469: goto ok;
470:
471: /* digits 8 and 9 ok iff decimal or hex */
472: case '8': case '9':
473: base = basefix[base];
474: if (base <= 8)
475: break; /* not legal here */
476: flags &= ~(SIGNOK | PFXOK | NDIGITS);
477: goto ok;
478:
479: /* letters ok iff hex */
480: case 'A': case 'B': case 'C':
481: case 'D': case 'E': case 'F':
482: case 'a': case 'b': case 'c':
483: case 'd': case 'e': case 'f':
484: /* no need to fix base here */
485: if (base <= 10)
486: break; /* not legal here */
487: flags &= ~(SIGNOK | PFXOK | NDIGITS);
488: goto ok;
489:
490: /* sign ok only as first character */
491: case '+': case '-':
492: if (flags & SIGNOK) {
493: flags &= ~SIGNOK;
494: goto ok;
495: }
496: break;
497:
498: /* x ok iff flag still set & 2nd char */
499: case 'x': case 'X':
500: if (flags & PFXOK && p == buf + 1) {
501: base = 16; /* if %i */
502: flags &= ~PFXOK;
503: goto ok;
504: }
505: break;
506: }
507:
508: /*
509: * If we got here, c is not a legal character
510: * for a number. Stop accumulating digits.
511: */
512: break;
513: ok:
514: /*
515: * c is legal: store it and look at the next.
516: */
517: *p++ = c;
518: if (--fp->_r > 0)
519: fp->_p++;
520: else if (__srefill(fp))
521: break; /* EOF */
522: }
523: /*
524: * If we had only a sign, it is no good; push
525: * back the sign. If the number ends in `x',
526: * it was [sign] '0' 'x', so push back the x
527: * and treat it as [sign] '0'.
528: */
529: if (flags & NDIGITS) {
530: if (p > buf)
531: (void) ungetc(*(u_char *)--p, fp);
532: goto match_failure;
533: }
534: c = ((u_char *)p)[-1];
535: if (c == 'x' || c == 'X') {
536: --p;
537: (void) ungetc(c, fp);
538: }
539: if ((flags & SUPPRESS) == 0) {
540: u_long res;
541:
542: *p = 0;
543: res = (*ccfn)(buf, (char **)NULL, base);
544: if (flags & POINTER)
545: *va_arg(ap, void **) = (void *)res;
546: else if (flags & SHORT)
547: *va_arg(ap, short *) = res;
548: else if (flags & LONG)
549: *va_arg(ap, long *) = res;
550: else
551: *va_arg(ap, int *) = res;
552: nassigned++;
553: }
554: nread += p - buf;
555: break;
556:
557: #ifdef FLOATING_POINT
558: case CT_FLOAT:
559: /* scan a floating point number as if by strtod */
560: #ifdef hardway
561: if (width == 0 || width > sizeof(buf) - 1)
562: width = sizeof(buf) - 1;
563: #else
564: /* size_t is unsigned, hence this optimisation */
565: if (--width > sizeof(buf) - 2)
566: width = sizeof(buf) - 2;
567: width++;
568: #endif
569: flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
570: for (p = buf; width; width--) {
571: c = *fp->_p;
572: /*
573: * This code mimicks the integer conversion
574: * code, but is much simpler.
575: */
576: switch (c) {
577:
578: case '0': case '1': case '2': case '3':
579: case '4': case '5': case '6': case '7':
580: case '8': case '9':
581: flags &= ~(SIGNOK | NDIGITS);
582: goto fok;
583:
584: case '+': case '-':
585: if (flags & SIGNOK) {
586: flags &= ~SIGNOK;
587: goto fok;
588: }
589: break;
590: case '.':
591: if (flags & DPTOK) {
592: flags &= ~(SIGNOK | DPTOK);
593: goto fok;
594: }
595: break;
596: case 'e': case 'E':
597: /* no exponent without some digits */
598: if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
599: flags =
600: (flags & ~(EXPOK|DPTOK)) |
601: SIGNOK | NDIGITS;
602: goto fok;
603: }
604: break;
605: }
606: break;
607: fok:
608: *p++ = c;
609: if (--fp->_r > 0)
610: fp->_p++;
611: else if (__srefill(fp))
612: break; /* EOF */
613: }
614: /*
615: * If no digits, might be missing exponent digits
616: * (just give back the exponent) or might be missing
617: * regular digits, but had sign and/or decimal point.
618: */
619: if (flags & NDIGITS) {
620: if (flags & EXPOK) {
621: /* no digits at all */
622: while (p > buf)
623: ungetc(*(u_char *)--p, fp);
624: goto match_failure;
625: }
626: /* just a bad exponent (e and maybe sign) */
627: c = *(u_char *)--p;
628: if (c != 'e' && c != 'E') {
629: (void) ungetc(c, fp);/* sign */
630: c = *(u_char *)--p;
631: }
632: (void) ungetc(c, fp);
633: }
634: if ((flags & SUPPRESS) == 0) {
635: double res;
636:
637: *p = 0;
638: res = atof(buf);
639: if (flags & LONG)
640: *va_arg(ap, double *) = res;
641: else
642: *va_arg(ap, float *) = res;
643: nassigned++;
644: }
645: nread += p - buf;
646: break;
647: #endif /* FLOATING_POINT */
648: }
649: }
650: input_failure:
651: return (nassigned ? nassigned : -1);
652: match_failure:
653: return (nassigned);
654: }
655:
656: /*
657: * Fill in the given table from the scanset at the given format
658: * (just after `['). Return a pointer to the character past the
659: * closing `]'. The table has a 1 wherever characters should be
660: * considered part of the scanset.
661: */
662: static u_char *
663: __sccl(tab, fmt)
664: register char *tab;
665: register u_char *fmt;
666: {
667: register int c, n, v;
668:
669: /* first `clear' the whole table */
670: c = *fmt++; /* first char hat => negated scanset */
671: if (c == '^') {
672: v = 1; /* default => accept */
673: c = *fmt++; /* get new first char */
674: } else
675: v = 0; /* default => reject */
676: /* should probably use memset here */
677: for (n = 0; n < 256; n++)
678: tab[n] = v;
679: if (c == 0)
680: return (fmt - 1);/* format ended before closing ] */
681:
682: /*
683: * Now set the entries corresponding to the actual scanset
684: * to the opposite of the above.
685: *
686: * The first character may be ']' (or '-') without being special;
687: * the last character may be '-'.
688: */
689: v = 1 - v;
690: for (;;) {
691: tab[c] = v; /* take character c */
692: doswitch:
693: n = *fmt++; /* and examine the next */
694: switch (n) {
695:
696: case 0: /* format ended too soon */
697: return (fmt - 1);
698:
699: case '-':
700: /*
701: * A scanset of the form
702: * [01+-]
703: * is defined as `the digit 0, the digit 1,
704: * the character +, the character -', but
705: * the effect of a scanset such as
706: * [a-zA-Z0-9]
707: * is implementation defined. The V7 Unix
708: * scanf treats `a-z' as `the letters a through
709: * z', but treats `a-a' as `the letter a, the
710: * character -, and the letter a'.
711: *
712: * For compatibility, the `-' is not considerd
713: * to define a range if the character following
714: * it is either a close bracket (required by ANSI)
715: * or is not numerically greater than the character
716: * we just stored in the table (c).
717: */
718: n = *fmt;
719: if (n == ']' || n < c) {
720: c = '-';
721: break; /* resume the for(;;) */
722: }
723: fmt++;
724: do { /* fill in the range */
725: tab[++c] = v;
726: } while (c < n);
727: #if 1 /* XXX another disgusting compatibility hack */
728: /*
729: * Alas, the V7 Unix scanf also treats formats
730: * such as [a-c-e] as `the letters a through e'.
731: * This too is permitted by the standard....
732: */
733: goto doswitch;
734: #else
735: c = *fmt++;
736: if (c == 0)
737: return (fmt - 1);
738: if (c == ']')
739: return (fmt);
740: #endif
741: break;
742:
743: case ']': /* end of scanset */
744: return (fmt);
745:
746: default: /* just another character */
747: c = n;
748: break;
749: }
750: }
751: /* NOTREACHED */
752: }
CVSweb <webmaster@jp.NetBSD.org>