Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/lib/libc/gen/glob.c,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1.1.1 -r1.1.1.2 --- src/lib/libc/gen/glob.c 1993/03/21 09:45:37 1.1.1.1 +++ src/lib/libc/gen/glob.c 1995/02/25 09:12:07 1.1.1.2 @@ -1,6 +1,6 @@ /* - * Copyright (c) 1989 The Regents of the University of California. - * All rights reserved. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Guido van Rossum. @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91"; +static char sccsid[] = "@(#)glob.c 8.3 (Berkeley) 10/13/93"; #endif /* LIBC_SCCS and not lint */ /* @@ -50,20 +50,31 @@ static char sccsid[] = "@(#)glob.c 5.12 * character might have (except \ at end of string is retained). * GLOB_MAGCHAR: * Set in gl_flags if pattern contained a globbing character. + * GLOB_NOMAGIC: + * Same as GLOB_NOCHECK, but it will only append pattern if it did + * not contain any magic characters. [Used in csh style globbing] + * GLOB_ALTDIRFUNC: + * Use alternately specified directory access functions. + * GLOB_TILDE: + * expand ~user/foo to the /home/dir/of/user/foo + * GLOB_BRACE: + * expand {1,2}{a,b} to 1a 1b 2a 2b * gl_matchc: * Number of matches in the current invocation of glob. */ -#include #include #include -#include -#include + #include +#include #include -#include +#include +#include #include #include +#include +#include #define DOLLAR '$' #define DOT '.' @@ -78,14 +89,34 @@ static char sccsid[] = "@(#)glob.c 5.12 #define STAR '*' #define TILDE '~' #define UNDERSCORE '_' +#define LBRACE '{' +#define RBRACE '}' +#define SLASH '/' +#define COMMA ',' + +#ifndef DEBUG #define M_QUOTE 0x8000 #define M_PROTECT 0x4000 #define M_MASK 0xffff #define M_ASCII 0x00ff -#define CHAR(c) ((c)&M_ASCII) -#define META(c) ((c)|M_QUOTE) +typedef u_short Char; + +#else + +#define M_QUOTE 0x80 +#define M_PROTECT 0x40 +#define M_MASK 0xff +#define M_ASCII 0x7f + +typedef char Char; + +#endif + + +#define CHAR(c) ((Char)((c)&M_ASCII)) +#define META(c) ((Char)((c)|M_QUOTE)) #define M_ALL META('*') #define M_END META(']') #define M_NOT META('!') @@ -94,38 +125,38 @@ static char sccsid[] = "@(#)glob.c 5.12 #define M_SET META('[') #define ismeta(c) (((c)&M_QUOTE) != 0) -typedef u_short Char; static int compare __P((const void *, const void *)); -static void g_Ctoc __P((Char *, char *)); -static int g_lstat __P((Char *, struct stat *)); -static DIR *g_opendir __P((Char *)); +static void g_Ctoc __P((const Char *, char *)); +static int g_lstat __P((Char *, struct stat *, glob_t *)); +static DIR *g_opendir __P((Char *, glob_t *)); static Char *g_strchr __P((Char *, int)); -static int g_stat __P((Char *, struct stat *)); +#ifdef notdef +static Char *g_strcat __P((Char *, const Char *)); +#endif +static int g_stat __P((Char *, struct stat *, glob_t *)); +static int glob0 __P((const Char *, glob_t *)); static int glob1 __P((Char *, glob_t *)); static int glob2 __P((Char *, Char *, Char *, glob_t *)); static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *)); -static int globextend __P((Char *, glob_t *)); +static int globextend __P((const Char *, glob_t *)); +static const Char * globtilde __P((const Char *, Char *, glob_t *)); +static int globexp1 __P((const Char *, glob_t *)); +static int globexp2 __P((const Char *, const Char *, glob_t *, int *)); static int match __P((Char *, Char *, Char *)); #ifdef DEBUG -static void qprintf __P((Char *)); +static void qprintf __P((const char *, Char *)); #endif -/* - * The main glob() routine: compiles the pattern (optionally processing - * quotes), calls glob1() to do the real pattern matching, and finally - * sorts the list (unless unsorted operation is requested). Returns 0 - * if things went well, nonzero if errors occurred. It is not an error - * to find no matches. - */ +int glob(pattern, flags, errfunc, pglob) const char *pattern; - int flags, (*errfunc) __P((char *, int)); + int flags, (*errfunc) __P((const char *, int)); glob_t *pglob; { - const u_char *compilepat, *patnext; - int c, err, oldpathc; - Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1]; + const u_char *patnext; + int c; + Char *bufnext, *bufend, patbuf[MAXPATHLEN+1]; patnext = (u_char *) pattern; if (!(flags & GLOB_APPEND)) { @@ -136,13 +167,10 @@ glob(pattern, flags, errfunc, pglob) } pglob->gl_flags = flags & ~GLOB_MAGCHAR; pglob->gl_errfunc = errfunc; - oldpathc = pglob->gl_pathc; pglob->gl_matchc = 0; bufnext = patbuf; bufend = bufnext + MAXPATHLEN; - compilebuf = bufnext; - compilepat = patnext; if (flags & GLOB_QUOTE) { /* Protect the quoted characters. */ while (bufnext < bufend && (c = *patnext++) != EOS) @@ -161,18 +189,230 @@ glob(pattern, flags, errfunc, pglob) *bufnext++ = c; *bufnext = EOS; + if (flags & GLOB_BRACE) + return globexp1(patbuf, pglob); + else + return glob0(patbuf, pglob); +} + +/* + * Expand recursively a glob {} pattern. When there is no more expansion + * invoke the standard globbing routine to glob the rest of the magic + * characters + */ +static int globexp1(pattern, pglob) + const Char *pattern; + glob_t *pglob; +{ + const Char* ptr = pattern; + int rv; + + /* Protect a single {}, for find(1), like csh */ + if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) + return glob0(pattern, pglob); + + while ((ptr = (const Char *) g_strchr((Char *) ptr, LBRACE)) != NULL) + if (!globexp2(ptr, pattern, pglob, &rv)) + return rv; + + return glob0(pattern, pglob); +} + + +/* + * Recursive brace globbing helper. Tries to expand a single brace. + * If it succeeds then it invokes globexp1 with the new pattern. + * If it fails then it tries to glob the rest of the pattern and returns. + */ +static int globexp2(ptr, pattern, pglob, rv) + const Char *ptr, *pattern; + glob_t *pglob; + int *rv; +{ + int i; + Char *lm, *ls; + const Char *pe, *pm, *pl; + Char patbuf[MAXPATHLEN + 1]; + + /* copy part up to the brace */ + for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) + continue; + ls = lm; + + /* Find the balanced brace */ + for (i = 0, pe = ++ptr; *pe; pe++) + if (*pe == LBRACKET) { + /* Ignore everything between [] */ + for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++) + continue; + if (*pe == EOS) { + /* + * We could not find a matching RBRACKET. + * Ignore and just look for RBRACE + */ + pe = pm; + } + } + else if (*pe == LBRACE) + i++; + else if (*pe == RBRACE) { + if (i == 0) + break; + i--; + } + + /* Non matching braces; just glob the pattern */ + if (i != 0 || *pe == EOS) { + *rv = glob0(patbuf, pglob); + return 0; + } + + for (i = 0, pl = pm = ptr; pm <= pe; pm++) + switch (*pm) { + case LBRACKET: + /* Ignore everything between [] */ + for (pl = pm++; *pm != RBRACKET && *pm != EOS; pm++) + continue; + if (*pm == EOS) { + /* + * We could not find a matching RBRACKET. + * Ignore and just look for RBRACE + */ + pm = pl; + } + break; + + case LBRACE: + i++; + break; + + case RBRACE: + if (i) { + i--; + break; + } + /* FALLTHROUGH */ + case COMMA: + if (i && *pm == COMMA) + break; + else { + /* Append the current string */ + for (lm = ls; (pl < pm); *lm++ = *pl++) + continue; + /* + * Append the rest of the pattern after the + * closing brace + */ + for (pl = pe + 1; (*lm++ = *pl++) != EOS;) + continue; + + /* Expand the current pattern */ +#ifdef DEBUG + qprintf("globexp2:", patbuf); +#endif + *rv = globexp1(patbuf, pglob); + + /* move after the comma, to the next string */ + pl = pm + 1; + } + break; + + default: + break; + } + *rv = 0; + return 0; +} + + + +/* + * expand tilde from the passwd file. + */ +static const Char * +globtilde(pattern, patbuf, pglob) + const Char *pattern; + Char *patbuf; + glob_t *pglob; +{ + struct passwd *pwd; + char *h; + const Char *p; + Char *b; + + if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE)) + return pattern; + + /* Copy up to the end of the string or / */ + for (p = pattern + 1, h = (char *) patbuf; *p && *p != SLASH; + *h++ = *p++) + continue; + + *h = EOS; + + if (((char *) patbuf)[0] == EOS) { + /* + * handle a plain ~ or ~/ by expanding $HOME + * first and then trying the password file + */ + if ((h = getenv("HOME")) == NULL) { + if ((pwd = getpwuid(getuid())) == NULL) + return pattern; + else + h = pwd->pw_dir; + } + } + else { + /* + * Expand a ~user + */ + if ((pwd = getpwnam((char*) patbuf)) == NULL) + return pattern; + else + h = pwd->pw_dir; + } + + /* Copy the home directory */ + for (b = patbuf; *h; *b++ = *h++) + continue; + + /* Append the rest of the pattern */ + while ((*b++ = *p++) != EOS) + continue; + + return patbuf; +} + + +/* + * The main glob() routine: compiles the pattern (optionally processing + * quotes), calls glob1() to do the real pattern matching, and finally + * sorts the list (unless unsorted operation is requested). Returns 0 + * if things went well, nonzero if errors occurred. It is not an error + * to find no matches. + */ +static int +glob0(pattern, pglob) + const Char *pattern; + glob_t *pglob; +{ + const Char *qpatnext; + int c, err, oldpathc; + Char *bufnext, patbuf[MAXPATHLEN+1]; + + qpatnext = globtilde(pattern, patbuf, pglob); + oldpathc = pglob->gl_pathc; bufnext = patbuf; - qpatnext = patbuf; + /* We don't need to check for buffer overflow any more. */ while ((c = *qpatnext++) != EOS) { switch (c) { case LBRACKET: - pglob->gl_flags |= GLOB_MAGCHAR; c = *qpatnext; if (c == NOT) ++qpatnext; if (*qpatnext == EOS || - g_strchr(qpatnext+1, RBRACKET) == NULL) { + g_strchr((Char *) qpatnext+1, RBRACKET) == NULL) { *bufnext++ = LBRACKET; if (c == NOT) --qpatnext; @@ -191,6 +431,7 @@ glob(pattern, flags, errfunc, pglob) qpatnext += 2; } } while ((c = *qpatnext++) != RBRACKET); + pglob->gl_flags |= GLOB_MAGCHAR; *bufnext++ = M_END; break; case QUESTION: @@ -199,7 +440,11 @@ glob(pattern, flags, errfunc, pglob) break; case STAR: pglob->gl_flags |= GLOB_MAGCHAR; - *bufnext++ = M_ALL; + /* collapse adjacent stars to one, + * to avoid exponential behavior + */ + if (bufnext == patbuf || bufnext[-1] != M_ALL) + *bufnext++ = M_ALL; break; default: *bufnext++ = CHAR(c); @@ -208,35 +453,24 @@ glob(pattern, flags, errfunc, pglob) } *bufnext = EOS; #ifdef DEBUG - qprintf(patbuf); + qprintf("glob0:", patbuf); #endif if ((err = glob1(patbuf, pglob)) != 0) return(err); - if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { - if (!(flags & GLOB_QUOTE)) { - Char *dp = compilebuf; - const u_char *sp = compilepat; - while (*dp++ = *sp++); - } - else { - /* - * Copy pattern, interpreting quotes; this is slightly - * different than the interpretation of quotes above - * -- which should prevail? - */ - while (*compilepat != EOS) { - if (*compilepat == QUOTE) { - if (*++compilepat == EOS) - --compilepat; - } - *compilebuf++ = (u_char)*compilepat++; - } - *compilebuf = EOS; - } - return(globextend(patbuf, pglob)); - } else if (!(flags & GLOB_NOSORT)) + /* + * If there was no match we are going to append the pattern + * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified + * and the pattern did not contain any magic characters + * GLOB_NOMAGIC is there just for compatibility with csh. + */ + if (pglob->gl_pathc == oldpathc && + ((pglob->gl_flags & GLOB_NOCHECK) || + ((pglob->gl_flags & GLOB_NOMAGIC) && + !(pglob->gl_flags & GLOB_MAGCHAR)))) + return(globextend(pattern, pglob)); + else if (!(pglob->gl_flags & GLOB_NOSORT)) qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, pglob->gl_pathc - oldpathc, sizeof(char *), compare); return(0); @@ -249,7 +483,7 @@ compare(p, q) return(strcmp(*(char **)p, *(char **)q)); } -static +static int glob1(pattern, pglob) Char *pattern; glob_t *pglob; @@ -267,7 +501,7 @@ glob1(pattern, pglob) * of recursion for each segment in the pattern that contains one or more * meta characters. */ -static +static int glob2(pathbuf, pathend, pattern, pglob) Char *pathbuf, *pathend, *pattern; glob_t *pglob; @@ -283,13 +517,13 @@ glob2(pathbuf, pathend, pattern, pglob) for (anymeta = 0;;) { if (*pattern == EOS) { /* End of pattern? */ *pathend = EOS; - if (g_stat(pathbuf, &sb)) + if (g_lstat(pathbuf, &sb, pglob)) return(0); if (((pglob->gl_flags & GLOB_MARK) && pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) || (S_ISLNK(sb.st_mode) && - (g_stat(pathbuf, &sb) == 0) && + (g_stat(pathbuf, &sb, pglob) == 0) && S_ISDIR(sb.st_mode)))) { *pathend++ = SEP; *pathend = EOS; @@ -318,31 +552,46 @@ glob2(pathbuf, pathend, pattern, pglob) /* NOTREACHED */ } -static +static int glob3(pathbuf, pathend, pattern, restpattern, pglob) Char *pathbuf, *pathend, *pattern, *restpattern; glob_t *pglob; { register struct dirent *dp; DIR *dirp; - int len, err; + int err; + char buf[MAXPATHLEN]; + + /* + * The readdirfunc declaration can't be prototyped, because it is + * assigned, below, to two functions which are prototyped in glob.h + * and dirent.h as taking pointers to differently typed opaque + * structures. + */ + struct dirent *(*readdirfunc)(); *pathend = EOS; errno = 0; - if (!(dirp = g_opendir(pathbuf))) + if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { /* TODO: don't call for ENOENT or ENOTDIR? */ - if (pglob->gl_errfunc && - (*pglob->gl_errfunc)(pathbuf, errno) || - (pglob->gl_flags & GLOB_ERR)) - return(GLOB_ABEND); - else - return(0); + if (pglob->gl_errfunc) { + g_Ctoc(pathbuf, buf); + if (pglob->gl_errfunc(buf, errno) || + pglob->gl_flags & GLOB_ERR) + return (GLOB_ABEND); + } + return(0); + } err = 0; /* Search directory for matching names. */ - while ((dp = readdir(dirp))) { + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + readdirfunc = pglob->gl_readdir; + else + readdirfunc = readdir; + while ((dp = (*readdirfunc)(dirp))) { register u_char *sc; register Char *dc; @@ -350,7 +599,8 @@ glob3(pathbuf, pathend, pattern, restpat if (dp->d_name[0] == DOT && *pattern != DOT) continue; for (sc = (u_char *) dp->d_name, dc = pathend; - *dc++ = *sc++;); + (*dc++ = *sc++) != EOS;) + continue; if (!match(pathend, pattern, restpattern)) { *pathend = EOS; continue; @@ -360,8 +610,10 @@ glob3(pathbuf, pathend, pattern, restpat break; } - /* TODO: check error from readdir? */ - (void)closedir(dirp); + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + (*pglob->gl_closedir)(dirp); + else + closedir(dirp); return(err); } @@ -382,17 +634,19 @@ glob3(pathbuf, pathend, pattern, restpat */ static int globextend(path, pglob) - Char *path; + const Char *path; glob_t *pglob; { register char **pathv; register int i; u_int newsize; char *copy; - Char *p; + const Char *p; newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); - pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); + pathv = pglob->gl_pathv ? + realloc((char *)pglob->gl_pathv, newsize) : + malloc(newsize); if (pathv == NULL) return(GLOB_NOSPACE); @@ -404,7 +658,8 @@ globextend(path, pglob) } pglob->gl_pathv = pathv; - for (p = path; *p++;); + for (p = path; *p++;) + continue; if ((copy = malloc(p - path)) != NULL) { g_Ctoc(path, copy); pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; @@ -418,7 +673,7 @@ globextend(path, pglob) * pattern matching function for filenames. Each occurrence of the * * pattern causes a recursion level. */ -static +static int match(name, pat, patend) register Char *name, *pat, *patend; { @@ -431,9 +686,10 @@ match(name, pat, patend) case M_ALL: if (pat == patend) return(1); - for (; *name != EOS; ++name) - if (match(name, pat, patend)) - return(1); + do + if (match(name, pat, patend)) + return(1); + while (*name++ != EOS); return(0); case M_ONE: if (*name++ == EOS) @@ -441,8 +697,9 @@ match(name, pat, patend) break; case M_SET: ok = 0; - k = *name++; - if (negate_range = ((*pat & M_MASK) == M_NOT)) + if ((k = *name++) == EOS) + return(0); + if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS) ++pat; while (((c = *pat++) & M_MASK) != M_END) if ((*pat & M_MASK) == M_RNG) { @@ -481,36 +738,48 @@ globfree(pglob) } static DIR * -g_opendir(str) +g_opendir(str, pglob) register Char *str; + glob_t *pglob; { char buf[MAXPATHLEN]; if (!*str) - return(opendir(".")); - g_Ctoc(str, buf); + strcpy(buf, "."); + else + g_Ctoc(str, buf); + + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + return((*pglob->gl_opendir)(buf)); + return(opendir(buf)); } static int -g_lstat(fn, sb) +g_lstat(fn, sb, pglob) register Char *fn; struct stat *sb; + glob_t *pglob; { char buf[MAXPATHLEN]; g_Ctoc(fn, buf); + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + return((*pglob->gl_lstat)(buf, sb)); return(lstat(buf, sb)); } static int -g_stat(fn, sb) +g_stat(fn, sb, pglob) register Char *fn; struct stat *sb; + glob_t *pglob; { char buf[MAXPATHLEN]; g_Ctoc(fn, buf); + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + return((*pglob->gl_stat)(buf, sb)); return(stat(buf, sb)); } @@ -526,31 +795,52 @@ g_strchr(str, ch) return (NULL); } +#ifdef notdef +static Char * +g_strcat(dst, src) + Char *dst; + const Char* src; +{ + Char *sdst = dst; + + while (*dst++) + continue; + --dst; + while((*dst++ = *src++) != EOS) + continue; + + return (sdst); +} +#endif + static void g_Ctoc(str, buf) - register Char *str; + register const Char *str; char *buf; { register char *dc; - for (dc = buf; *dc++ = *str++;); + for (dc = buf; (*dc++ = *str++) != EOS;) + continue; } #ifdef DEBUG static void -qprintf(s) +qprintf(str, s) + const char *str; register Char *s; { register Char *p; + (void)printf("%s:\n", str); for (p = s; *p; p++) - (void)printf("%c", *p & 0xff); + (void)printf("%c", CHAR(*p)); (void)printf("\n"); for (p = s; *p; p++) (void)printf("%c", *p & M_PROTECT ? '"' : ' '); (void)printf("\n"); for (p = s; *p; p++) - (void)printf("%c", *p & M_META ? '_' : ' '); + (void)printf("%c", ismeta(*p) ? '_' : ' '); (void)printf("\n"); } #endif