Annotation of src/lib/libc/gen/glob.c, Revision 1.1
1.1 ! cgd 1: /*
! 2: * Copyright (c) 1989 The Regents of the University of California.
! 3: * All rights reserved.
! 4: *
! 5: * This code is derived from software contributed to Berkeley by
! 6: * Guido van Rossum.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: * 3. All advertising materials mentioning features or use of this software
! 17: * must display the following acknowledgement:
! 18: * This product includes software developed by the University of
! 19: * California, Berkeley and its contributors.
! 20: * 4. Neither the name of the University nor the names of its contributors
! 21: * may be used to endorse or promote products derived from this software
! 22: * without specific prior written permission.
! 23: *
! 24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 34: * SUCH DAMAGE.
! 35: */
! 36:
! 37: #if defined(LIBC_SCCS) && !defined(lint)
! 38: static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91";
! 39: #endif /* LIBC_SCCS and not lint */
! 40:
! 41: /*
! 42: * glob(3) -- a superset of the one defined in POSIX 1003.2.
! 43: *
! 44: * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
! 45: *
! 46: * Optional extra services, controlled by flags not defined by POSIX:
! 47: *
! 48: * GLOB_QUOTE:
! 49: * Escaping convention: \ inhibits any special meaning the following
! 50: * character might have (except \ at end of string is retained).
! 51: * GLOB_MAGCHAR:
! 52: * Set in gl_flags if pattern contained a globbing character.
! 53: * gl_matchc:
! 54: * Number of matches in the current invocation of glob.
! 55: */
! 56:
! 57: #include <sys/cdefs.h>
! 58: #include <sys/param.h>
! 59: #include <sys/stat.h>
! 60: #include <dirent.h>
! 61: #include <glob.h>
! 62: #include <ctype.h>
! 63: #include <errno.h>
! 64: #include <string.h>
! 65: #include <stdio.h>
! 66: #include <stdlib.h>
! 67:
! 68: #define DOLLAR '$'
! 69: #define DOT '.'
! 70: #define EOS '\0'
! 71: #define LBRACKET '['
! 72: #define NOT '!'
! 73: #define QUESTION '?'
! 74: #define QUOTE '\\'
! 75: #define RANGE '-'
! 76: #define RBRACKET ']'
! 77: #define SEP '/'
! 78: #define STAR '*'
! 79: #define TILDE '~'
! 80: #define UNDERSCORE '_'
! 81:
! 82: #define M_QUOTE 0x8000
! 83: #define M_PROTECT 0x4000
! 84: #define M_MASK 0xffff
! 85: #define M_ASCII 0x00ff
! 86:
! 87: #define CHAR(c) ((c)&M_ASCII)
! 88: #define META(c) ((c)|M_QUOTE)
! 89: #define M_ALL META('*')
! 90: #define M_END META(']')
! 91: #define M_NOT META('!')
! 92: #define M_ONE META('?')
! 93: #define M_RNG META('-')
! 94: #define M_SET META('[')
! 95: #define ismeta(c) (((c)&M_QUOTE) != 0)
! 96:
! 97: typedef u_short Char;
! 98:
! 99: static int compare __P((const void *, const void *));
! 100: static void g_Ctoc __P((Char *, char *));
! 101: static int g_lstat __P((Char *, struct stat *));
! 102: static DIR *g_opendir __P((Char *));
! 103: static Char *g_strchr __P((Char *, int));
! 104: static int g_stat __P((Char *, struct stat *));
! 105: static int glob1 __P((Char *, glob_t *));
! 106: static int glob2 __P((Char *, Char *, Char *, glob_t *));
! 107: static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
! 108: static int globextend __P((Char *, glob_t *));
! 109: static int match __P((Char *, Char *, Char *));
! 110: #ifdef DEBUG
! 111: static void qprintf __P((Char *));
! 112: #endif
! 113:
! 114: /*
! 115: * The main glob() routine: compiles the pattern (optionally processing
! 116: * quotes), calls glob1() to do the real pattern matching, and finally
! 117: * sorts the list (unless unsorted operation is requested). Returns 0
! 118: * if things went well, nonzero if errors occurred. It is not an error
! 119: * to find no matches.
! 120: */
! 121: glob(pattern, flags, errfunc, pglob)
! 122: const char *pattern;
! 123: int flags, (*errfunc) __P((char *, int));
! 124: glob_t *pglob;
! 125: {
! 126: const u_char *compilepat, *patnext;
! 127: int c, err, oldpathc;
! 128: Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
! 129:
! 130: patnext = (u_char *) pattern;
! 131: if (!(flags & GLOB_APPEND)) {
! 132: pglob->gl_pathc = 0;
! 133: pglob->gl_pathv = NULL;
! 134: if (!(flags & GLOB_DOOFFS))
! 135: pglob->gl_offs = 0;
! 136: }
! 137: pglob->gl_flags = flags & ~GLOB_MAGCHAR;
! 138: pglob->gl_errfunc = errfunc;
! 139: oldpathc = pglob->gl_pathc;
! 140: pglob->gl_matchc = 0;
! 141:
! 142: bufnext = patbuf;
! 143: bufend = bufnext + MAXPATHLEN;
! 144: compilebuf = bufnext;
! 145: compilepat = patnext;
! 146: if (flags & GLOB_QUOTE) {
! 147: /* Protect the quoted characters. */
! 148: while (bufnext < bufend && (c = *patnext++) != EOS)
! 149: if (c == QUOTE) {
! 150: if ((c = *patnext++) == EOS) {
! 151: c = QUOTE;
! 152: --patnext;
! 153: }
! 154: *bufnext++ = c | M_PROTECT;
! 155: }
! 156: else
! 157: *bufnext++ = c;
! 158: }
! 159: else
! 160: while (bufnext < bufend && (c = *patnext++) != EOS)
! 161: *bufnext++ = c;
! 162: *bufnext = EOS;
! 163:
! 164: bufnext = patbuf;
! 165: qpatnext = patbuf;
! 166: /* We don't need to check for buffer overflow any more. */
! 167: while ((c = *qpatnext++) != EOS) {
! 168: switch (c) {
! 169: case LBRACKET:
! 170: pglob->gl_flags |= GLOB_MAGCHAR;
! 171: c = *qpatnext;
! 172: if (c == NOT)
! 173: ++qpatnext;
! 174: if (*qpatnext == EOS ||
! 175: g_strchr(qpatnext+1, RBRACKET) == NULL) {
! 176: *bufnext++ = LBRACKET;
! 177: if (c == NOT)
! 178: --qpatnext;
! 179: break;
! 180: }
! 181: *bufnext++ = M_SET;
! 182: if (c == NOT)
! 183: *bufnext++ = M_NOT;
! 184: c = *qpatnext++;
! 185: do {
! 186: *bufnext++ = CHAR(c);
! 187: if (*qpatnext == RANGE &&
! 188: (c = qpatnext[1]) != RBRACKET) {
! 189: *bufnext++ = M_RNG;
! 190: *bufnext++ = CHAR(c);
! 191: qpatnext += 2;
! 192: }
! 193: } while ((c = *qpatnext++) != RBRACKET);
! 194: *bufnext++ = M_END;
! 195: break;
! 196: case QUESTION:
! 197: pglob->gl_flags |= GLOB_MAGCHAR;
! 198: *bufnext++ = M_ONE;
! 199: break;
! 200: case STAR:
! 201: pglob->gl_flags |= GLOB_MAGCHAR;
! 202: *bufnext++ = M_ALL;
! 203: break;
! 204: default:
! 205: *bufnext++ = CHAR(c);
! 206: break;
! 207: }
! 208: }
! 209: *bufnext = EOS;
! 210: #ifdef DEBUG
! 211: qprintf(patbuf);
! 212: #endif
! 213:
! 214: if ((err = glob1(patbuf, pglob)) != 0)
! 215: return(err);
! 216:
! 217: if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
! 218: if (!(flags & GLOB_QUOTE)) {
! 219: Char *dp = compilebuf;
! 220: const u_char *sp = compilepat;
! 221: while (*dp++ = *sp++);
! 222: }
! 223: else {
! 224: /*
! 225: * Copy pattern, interpreting quotes; this is slightly
! 226: * different than the interpretation of quotes above
! 227: * -- which should prevail?
! 228: */
! 229: while (*compilepat != EOS) {
! 230: if (*compilepat == QUOTE) {
! 231: if (*++compilepat == EOS)
! 232: --compilepat;
! 233: }
! 234: *compilebuf++ = (u_char)*compilepat++;
! 235: }
! 236: *compilebuf = EOS;
! 237: }
! 238: return(globextend(patbuf, pglob));
! 239: } else if (!(flags & GLOB_NOSORT))
! 240: qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
! 241: pglob->gl_pathc - oldpathc, sizeof(char *), compare);
! 242: return(0);
! 243: }
! 244:
! 245: static int
! 246: compare(p, q)
! 247: const void *p, *q;
! 248: {
! 249: return(strcmp(*(char **)p, *(char **)q));
! 250: }
! 251:
! 252: static
! 253: glob1(pattern, pglob)
! 254: Char *pattern;
! 255: glob_t *pglob;
! 256: {
! 257: Char pathbuf[MAXPATHLEN+1];
! 258:
! 259: /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
! 260: if (*pattern == EOS)
! 261: return(0);
! 262: return(glob2(pathbuf, pathbuf, pattern, pglob));
! 263: }
! 264:
! 265: /*
! 266: * The functions glob2 and glob3 are mutually recursive; there is one level
! 267: * of recursion for each segment in the pattern that contains one or more
! 268: * meta characters.
! 269: */
! 270: static
! 271: glob2(pathbuf, pathend, pattern, pglob)
! 272: Char *pathbuf, *pathend, *pattern;
! 273: glob_t *pglob;
! 274: {
! 275: struct stat sb;
! 276: Char *p, *q;
! 277: int anymeta;
! 278:
! 279: /*
! 280: * Loop over pattern segments until end of pattern or until
! 281: * segment with meta character found.
! 282: */
! 283: for (anymeta = 0;;) {
! 284: if (*pattern == EOS) { /* End of pattern? */
! 285: *pathend = EOS;
! 286: if (g_stat(pathbuf, &sb))
! 287: return(0);
! 288:
! 289: if (((pglob->gl_flags & GLOB_MARK) &&
! 290: pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
! 291: || (S_ISLNK(sb.st_mode) &&
! 292: (g_stat(pathbuf, &sb) == 0) &&
! 293: S_ISDIR(sb.st_mode)))) {
! 294: *pathend++ = SEP;
! 295: *pathend = EOS;
! 296: }
! 297: ++pglob->gl_matchc;
! 298: return(globextend(pathbuf, pglob));
! 299: }
! 300:
! 301: /* Find end of next segment, copy tentatively to pathend. */
! 302: q = pathend;
! 303: p = pattern;
! 304: while (*p != EOS && *p != SEP) {
! 305: if (ismeta(*p))
! 306: anymeta = 1;
! 307: *q++ = *p++;
! 308: }
! 309:
! 310: if (!anymeta) { /* No expansion, do next segment. */
! 311: pathend = q;
! 312: pattern = p;
! 313: while (*pattern == SEP)
! 314: *pathend++ = *pattern++;
! 315: } else /* Need expansion, recurse. */
! 316: return(glob3(pathbuf, pathend, pattern, p, pglob));
! 317: }
! 318: /* NOTREACHED */
! 319: }
! 320:
! 321: static
! 322: glob3(pathbuf, pathend, pattern, restpattern, pglob)
! 323: Char *pathbuf, *pathend, *pattern, *restpattern;
! 324: glob_t *pglob;
! 325: {
! 326: register struct dirent *dp;
! 327: DIR *dirp;
! 328: int len, err;
! 329:
! 330: *pathend = EOS;
! 331: errno = 0;
! 332:
! 333: if (!(dirp = g_opendir(pathbuf)))
! 334: /* TODO: don't call for ENOENT or ENOTDIR? */
! 335: if (pglob->gl_errfunc &&
! 336: (*pglob->gl_errfunc)(pathbuf, errno) ||
! 337: (pglob->gl_flags & GLOB_ERR))
! 338: return(GLOB_ABEND);
! 339: else
! 340: return(0);
! 341:
! 342: err = 0;
! 343:
! 344: /* Search directory for matching names. */
! 345: while ((dp = readdir(dirp))) {
! 346: register u_char *sc;
! 347: register Char *dc;
! 348:
! 349: /* Initial DOT must be matched literally. */
! 350: if (dp->d_name[0] == DOT && *pattern != DOT)
! 351: continue;
! 352: for (sc = (u_char *) dp->d_name, dc = pathend;
! 353: *dc++ = *sc++;);
! 354: if (!match(pathend, pattern, restpattern)) {
! 355: *pathend = EOS;
! 356: continue;
! 357: }
! 358: err = glob2(pathbuf, --dc, restpattern, pglob);
! 359: if (err)
! 360: break;
! 361: }
! 362:
! 363: /* TODO: check error from readdir? */
! 364: (void)closedir(dirp);
! 365: return(err);
! 366: }
! 367:
! 368:
! 369: /*
! 370: * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
! 371: * add the new item, and update gl_pathc.
! 372: *
! 373: * This assumes the BSD realloc, which only copies the block when its size
! 374: * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
! 375: * behavior.
! 376: *
! 377: * Return 0 if new item added, error code if memory couldn't be allocated.
! 378: *
! 379: * Invariant of the glob_t structure:
! 380: * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
! 381: * gl_pathv points to (gl_offs + gl_pathc + 1) items.
! 382: */
! 383: static int
! 384: globextend(path, pglob)
! 385: Char *path;
! 386: glob_t *pglob;
! 387: {
! 388: register char **pathv;
! 389: register int i;
! 390: u_int newsize;
! 391: char *copy;
! 392: Char *p;
! 393:
! 394: newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
! 395: pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
! 396: if (pathv == NULL)
! 397: return(GLOB_NOSPACE);
! 398:
! 399: if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
! 400: /* first time around -- clear initial gl_offs items */
! 401: pathv += pglob->gl_offs;
! 402: for (i = pglob->gl_offs; --i >= 0; )
! 403: *--pathv = NULL;
! 404: }
! 405: pglob->gl_pathv = pathv;
! 406:
! 407: for (p = path; *p++;);
! 408: if ((copy = malloc(p - path)) != NULL) {
! 409: g_Ctoc(path, copy);
! 410: pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
! 411: }
! 412: pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
! 413: return(copy == NULL ? GLOB_NOSPACE : 0);
! 414: }
! 415:
! 416:
! 417: /*
! 418: * pattern matching function for filenames. Each occurrence of the *
! 419: * pattern causes a recursion level.
! 420: */
! 421: static
! 422: match(name, pat, patend)
! 423: register Char *name, *pat, *patend;
! 424: {
! 425: int ok, negate_range;
! 426: Char c, k;
! 427:
! 428: while (pat < patend) {
! 429: c = *pat++;
! 430: switch (c & M_MASK) {
! 431: case M_ALL:
! 432: if (pat == patend)
! 433: return(1);
! 434: for (; *name != EOS; ++name)
! 435: if (match(name, pat, patend))
! 436: return(1);
! 437: return(0);
! 438: case M_ONE:
! 439: if (*name++ == EOS)
! 440: return(0);
! 441: break;
! 442: case M_SET:
! 443: ok = 0;
! 444: k = *name++;
! 445: if (negate_range = ((*pat & M_MASK) == M_NOT))
! 446: ++pat;
! 447: while (((c = *pat++) & M_MASK) != M_END)
! 448: if ((*pat & M_MASK) == M_RNG) {
! 449: if (c <= k && k <= pat[1])
! 450: ok = 1;
! 451: pat += 2;
! 452: } else if (c == k)
! 453: ok = 1;
! 454: if (ok == negate_range)
! 455: return(0);
! 456: break;
! 457: default:
! 458: if (*name++ != c)
! 459: return(0);
! 460: break;
! 461: }
! 462: }
! 463: return(*name == EOS);
! 464: }
! 465:
! 466: /* Free allocated data belonging to a glob_t structure. */
! 467: void
! 468: globfree(pglob)
! 469: glob_t *pglob;
! 470: {
! 471: register int i;
! 472: register char **pp;
! 473:
! 474: if (pglob->gl_pathv != NULL) {
! 475: pp = pglob->gl_pathv + pglob->gl_offs;
! 476: for (i = pglob->gl_pathc; i--; ++pp)
! 477: if (*pp)
! 478: free(*pp);
! 479: free(pglob->gl_pathv);
! 480: }
! 481: }
! 482:
! 483: static DIR *
! 484: g_opendir(str)
! 485: register Char *str;
! 486: {
! 487: char buf[MAXPATHLEN];
! 488:
! 489: if (!*str)
! 490: return(opendir("."));
! 491: g_Ctoc(str, buf);
! 492: return(opendir(buf));
! 493: }
! 494:
! 495: static int
! 496: g_lstat(fn, sb)
! 497: register Char *fn;
! 498: struct stat *sb;
! 499: {
! 500: char buf[MAXPATHLEN];
! 501:
! 502: g_Ctoc(fn, buf);
! 503: return(lstat(buf, sb));
! 504: }
! 505:
! 506: static int
! 507: g_stat(fn, sb)
! 508: register Char *fn;
! 509: struct stat *sb;
! 510: {
! 511: char buf[MAXPATHLEN];
! 512:
! 513: g_Ctoc(fn, buf);
! 514: return(stat(buf, sb));
! 515: }
! 516:
! 517: static Char *
! 518: g_strchr(str, ch)
! 519: Char *str;
! 520: int ch;
! 521: {
! 522: do {
! 523: if (*str == ch)
! 524: return (str);
! 525: } while (*str++);
! 526: return (NULL);
! 527: }
! 528:
! 529: static void
! 530: g_Ctoc(str, buf)
! 531: register Char *str;
! 532: char *buf;
! 533: {
! 534: register char *dc;
! 535:
! 536: for (dc = buf; *dc++ = *str++;);
! 537: }
! 538:
! 539: #ifdef DEBUG
! 540: static void
! 541: qprintf(s)
! 542: register Char *s;
! 543: {
! 544: register Char *p;
! 545:
! 546: for (p = s; *p; p++)
! 547: (void)printf("%c", *p & 0xff);
! 548: (void)printf("\n");
! 549: for (p = s; *p; p++)
! 550: (void)printf("%c", *p & M_PROTECT ? '"' : ' ');
! 551: (void)printf("\n");
! 552: for (p = s; *p; p++)
! 553: (void)printf("%c", *p & M_META ? '_' : ' ');
! 554: (void)printf("\n");
! 555: }
! 556: #endif
CVSweb <webmaster@jp.NetBSD.org>