src/usr.bin/make/str.c - annotate

Return to str.c CVS log
Up to [cvs.NetBSD.org] / src / usr.bin / make
Annotation of src/usr.bin/make/str.c, Revision 1.75

1.75    ! rillig      1: /*     $NetBSD: str.c,v 1.74 2020/11/16 18:28:27 rillig Exp $  */
1.10      christos    2:
1.1       cgd         3: /*-
1.13      christos    4:  * Copyright (c) 1988, 1989, 1990, 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
1.20      agc         6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Adam de Boor.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
                     18:  * 3. Neither the name of the University nor the names of its contributors
                     19:  *    may be used to endorse or promote products derived from this software
                     20:  *    without specific prior written permission.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     23:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     24:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     25:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     26:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     27:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     28:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     29:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     30:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     31:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     32:  * SUCH DAMAGE.
                     33:  */
                     34:
                     35: /*-
1.1       cgd        36:  * Copyright (c) 1989 by Berkeley Softworks
                     37:  * All rights reserved.
                     38:  *
                     39:  * This code is derived from software contributed to Berkeley by
                     40:  * Adam de Boor.
                     41:  *
                     42:  * Redistribution and use in source and binary forms, with or without
                     43:  * modification, are permitted provided that the following conditions
                     44:  * are met:
                     45:  * 1. Redistributions of source code must retain the above copyright
                     46:  *    notice, this list of conditions and the following disclaimer.
                     47:  * 2. Redistributions in binary form must reproduce the above copyright
                     48:  *    notice, this list of conditions and the following disclaimer in the
                     49:  *    documentation and/or other materials provided with the distribution.
                     50:  * 3. All advertising materials mentioning features or use of this software
                     51:  *    must display the following acknowledgement:
                     52:  *     This product includes software developed by the University of
                     53:  *     California, Berkeley and its contributors.
                     54:  * 4. Neither the name of the University nor the names of its contributors
                     55:  *    may be used to endorse or promote products derived from this software
                     56:  *    without specific prior written permission.
                     57:  *
                     58:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     59:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     60:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     61:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     62:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     63:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     64:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     65:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     66:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     67:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     68:  * SUCH DAMAGE.
                     69:  */
                     70:
1.65      rillig     71: #include "make.h"
1.1       cgd        72:
1.65      rillig     73: /*     "@(#)str.c      5.8 (Berkeley) 6/1/90"  */
1.75    ! rillig     74: MAKE_RCSID("$NetBSD: str.c,v 1.74 2020/11/16 18:28:27 rillig Exp $");
1.1       cgd        75:
1.59      rillig     76: /* Return the concatenation of s1 and s2, freshly allocated. */
1.1       cgd        77: char *
1.59      rillig     78: str_concat2(const char *s1, const char *s2)
1.1       cgd        79: {
1.57      rillig     80:        size_t len1 = strlen(s1);
                     81:        size_t len2 = strlen(s2);
1.59      rillig     82:        char *result = bmake_malloc(len1 + len2 + 1);
1.28      christos   83:        memcpy(result, s1, len1);
                     84:        memcpy(result + len1, s2, len2 + 1);
1.59      rillig     85:        return result;
                     86: }
1.1       cgd        87:
1.59      rillig     88: /* Return the concatenation of s1, s2 and s3, freshly allocated. */
                     89: char *
                     90: str_concat3(const char *s1, const char *s2, const char *s3)
                     91: {
                     92:        size_t len1 = strlen(s1);
                     93:        size_t len2 = strlen(s2);
                     94:        size_t len3 = strlen(s3);
                     95:        char *result = bmake_malloc(len1 + len2 + len3 + 1);
                     96:        memcpy(result, s1, len1);
                     97:        memcpy(result + len1, s2, len2);
                     98:        memcpy(result + len1 + len2, s3, len3 + 1);
1.45      rillig     99:        return result;
1.1       cgd       100: }
                    101:
1.60      rillig    102: /* Return the concatenation of s1, s2, s3 and s4, freshly allocated. */
                    103: char *
                    104: str_concat4(const char *s1, const char *s2, const char *s3, const char *s4)
                    105: {
                    106:        size_t len1 = strlen(s1);
                    107:        size_t len2 = strlen(s2);
                    108:        size_t len3 = strlen(s3);
                    109:        size_t len4 = strlen(s4);
                    110:        char *result = bmake_malloc(len1 + len2 + len3 + len4 + 1);
                    111:        memcpy(result, s1, len1);
                    112:        memcpy(result + len1, s2, len2);
                    113:        memcpy(result + len1 + len2, s3, len3);
                    114:        memcpy(result + len1 + len2 + len3, s4, len4 + 1);
                    115:        return result;
                    116: }
                    117:
1.64      rillig    118: /* Fracture a string into an array of words (as delineated by tabs or spaces)
1.73      rillig    119:  * taking quotation marks into account.
1.64      rillig    120:  *
                    121:  * If expand is TRUE, quotes are removed and escape sequences such as \r, \t,
1.73      rillig    122:  * etc... are expanded. In this case, return NULL on parse errors.
1.64      rillig    123:  *
1.73      rillig    124:  * Returns the fractured words, which must be freed later using Words_Free,
                    125:  * unless the returned Words.words was NULL.
1.1       cgd       126:  */
1.64      rillig    127: Words
                    128: Str_Words(const char *str, Boolean expand)
1.1       cgd       129: {
1.56      rillig    130:        size_t str_len;
                    131:        char *words_buf;
1.61      rillig    132:        size_t words_cap;
1.56      rillig    133:        char **words;
1.61      rillig    134:        size_t words_len;
1.56      rillig    135:        char inquote;
                    136:        char *word_start;
                    137:        char *word_end;
1.55      rillig    138:        const char *str_p;
                    139:
1.72      rillig    140:        /* XXX: why only hspace, not whitespace? */
                    141:        cpp_skip_hspace(&str);  /* skip leading space chars. */
1.1       cgd       142:
1.41      rillig    143:        /* words_buf holds the words, separated by '\0'. */
1.55      rillig    144:        str_len = strlen(str);
1.75    ! rillig    145:        words_buf = bmake_malloc(str_len + 1);
1.1       cgd       146:
1.70      rillig    147:        words_cap = str_len / 5 > 50 ? str_len / 5 : 50;
1.55      rillig    148:        words = bmake_malloc((words_cap + 1) * sizeof(char *));
1.35      sjg       149:
                    150:        /*
1.1       cgd       151:         * copy the string; at the same time, parse backslashes,
1.41      rillig    152:         * quotes and build the word list.
1.1       cgd       153:         */
1.55      rillig    154:        words_len = 0;
                    155:        inquote = '\0';
                    156:        word_start = words_buf;
                    157:        word_end = words_buf;
1.41      rillig    158:        for (str_p = str;; ++str_p) {
                    159:                char ch = *str_p;
1.56      rillig    160:                switch (ch) {
1.1       cgd       161:                case '"':
                    162:                case '\'':
1.17      christos  163:                        if (inquote) {
1.1       cgd       164:                                if (inquote == ch)
1.4       cgd       165:                                        inquote = '\0';
1.1       cgd       166:                                else
                    167:                                        break;
1.56      rillig    168:                        } else {
1.69      rillig    169:                                inquote = ch;
1.6       jtc       170:                                /* Don't miss "" or '' */
1.41      rillig    171:                                if (word_start == NULL && str_p[1] == inquote) {
1.30      christos  172:                                        if (!expand) {
1.41      rillig    173:                                                word_start = word_end;
                    174:                                                *word_end++ = ch;
1.30      christos  175:                                        } else
1.41      rillig    176:                                                word_start = word_end + 1;
                    177:                                        str_p++;
1.25      christos  178:                                        inquote = '\0';
1.6       jtc       179:                                        break;
                    180:                                }
                    181:                        }
1.8       jtc       182:                        if (!expand) {
1.41      rillig    183:                                if (word_start == NULL)
                    184:                                        word_start = word_end;
                    185:                                *word_end++ = ch;
1.8       jtc       186:                        }
1.1       cgd       187:                        continue;
                    188:                case ' ':
                    189:                case '\t':
1.8       jtc       190:                case '\n':
1.1       cgd       191:                        if (inquote)
                    192:                                break;
1.41      rillig    193:                        if (word_start == NULL)
1.1       cgd       194:                                continue;
                    195:                        /* FALLTHROUGH */
                    196:                case '\0':
                    197:                        /*
1.41      rillig    198:                         * end of a token -- make sure there's enough words
1.1       cgd       199:                         * space and save off a pointer.
                    200:                         */
1.41      rillig    201:                        if (word_start == NULL)
1.56      rillig    202:                                goto done;
1.8       jtc       203:
1.41      rillig    204:                        *word_end++ = '\0';
                    205:                        if (words_len == words_cap) {
1.56      rillig    206:                                size_t new_size;
1.41      rillig    207:                                words_cap *= 2;         /* ramp up fast */
1.56      rillig    208:                                new_size = (words_cap + 1) * sizeof(char *);
                    209:                                words = bmake_realloc(words, new_size);
1.1       cgd       210:                        }
1.41      rillig    211:                        words[words_len++] = word_start;
                    212:                        word_start = NULL;
1.31      christos  213:                        if (ch == '\n' || ch == '\0') {
                    214:                                if (expand && inquote) {
1.41      rillig    215:                                        free(words);
                    216:                                        free(words_buf);
1.64      rillig    217:                                        return (Words){ NULL, 0, NULL };
1.31      christos  218:                                }
1.1       cgd       219:                                goto done;
1.31      christos  220:                        }
1.1       cgd       221:                        continue;
                    222:                case '\\':
1.8       jtc       223:                        if (!expand) {
1.41      rillig    224:                                if (word_start == NULL)
                    225:                                        word_start = word_end;
                    226:                                *word_end++ = '\\';
                    227:                                /* catch '\' at end of line */
                    228:                                if (str_p[1] == '\0')
1.26      erh       229:                                        continue;
1.41      rillig    230:                                ch = *++str_p;
1.8       jtc       231:                                break;
                    232:                        }
1.13      christos  233:
1.41      rillig    234:                        switch (ch = *++str_p) {
1.1       cgd       235:                        case '\0':
                    236:                        case '\n':
                    237:                                /* hmmm; fix it up as best we can */
                    238:                                ch = '\\';
1.74      rillig    239:                                str_p--;
1.1       cgd       240:                                break;
                    241:                        case 'b':
                    242:                                ch = '\b';
                    243:                                break;
                    244:                        case 'f':
                    245:                                ch = '\f';
                    246:                                break;
                    247:                        case 'n':
                    248:                                ch = '\n';
                    249:                                break;
                    250:                        case 'r':
                    251:                                ch = '\r';
                    252:                                break;
                    253:                        case 't':
                    254:                                ch = '\t';
                    255:                                break;
                    256:                        }
                    257:                        break;
                    258:                }
1.41      rillig    259:                if (word_start == NULL)
                    260:                        word_start = word_end;
                    261:                *word_end++ = ch;
1.1       cgd       262:        }
1.56      rillig    263: done:
1.73      rillig    264:        words[words_len] = NULL;        /* useful for argv */
1.64      rillig    265:        return (Words){ words, words_len, words_buf };
1.1       cgd       266: }
                    267:
                    268: /*
1.51      rillig    269:  * Str_Match -- Test if a string matches a pattern like "*.[ch]".
1.73      rillig    270:  * The following special characters are known *?\[] (as in fnmatch(3)).
1.13      christos  271:  *
1.73      rillig    272:  * XXX: this function does not detect or report malformed patterns.
1.1       cgd       273:  */
1.51      rillig    274: Boolean
                    275: Str_Match(const char *str, const char *pat)
1.1       cgd       276: {
                    277:        for (;;) {
                    278:                /*
                    279:                 * See if we're at the end of both the pattern and the
1.73      rillig    280:                 * string. If so, we succeeded.  If we're at the end of the
1.1       cgd       281:                 * pattern but not at the end of the string, we failed.
                    282:                 */
1.71      rillig    283:                if (*pat == '\0')
                    284:                        return *str == '\0';
                    285:                if (*str == '\0' && *pat != '*')
1.51      rillig    286:                        return FALSE;
                    287:
1.1       cgd       288:                /*
1.51      rillig    289:                 * A '*' in the pattern matches any substring.  We handle this
                    290:                 * by calling ourselves for each suffix of the string.
1.1       cgd       291:                 */
1.51      rillig    292:                if (*pat == '*') {
                    293:                        pat++;
                    294:                        while (*pat == '*')
                    295:                                pat++;
1.71      rillig    296:                        if (*pat == '\0')
1.51      rillig    297:                                return TRUE;
1.71      rillig    298:                        while (*str != '\0') {
1.51      rillig    299:                                if (Str_Match(str, pat))
                    300:                                        return TRUE;
                    301:                                str++;
1.1       cgd       302:                        }
1.51      rillig    303:                        return FALSE;
1.1       cgd       304:                }
1.51      rillig    305:
                    306:                /* A '?' in the pattern matches any single character. */
                    307:                if (*pat == '?')
1.1       cgd       308:                        goto thisCharOK;
1.51      rillig    309:
1.1       cgd       310:                /*
1.51      rillig    311:                 * A '[' in the pattern matches a character from a list.
                    312:                 * The '[' is followed by the list of acceptable characters,
                    313:                 * or by ranges (two characters separated by '-'). In these
                    314:                 * character lists, the backslash is an ordinary character.
1.1       cgd       315:                 */
1.51      rillig    316:                if (*pat == '[') {
                    317:                        Boolean neg = pat[1] == '^';
1.63      rillig    318:                        pat += neg ? 2 : 1;
1.37      sjg       319:
1.1       cgd       320:                        for (;;) {
1.71      rillig    321:                                if (*pat == ']' || *pat == '\0') {
1.51      rillig    322:                                        if (neg)
1.38      sjg       323:                                                break;
1.51      rillig    324:                                        return FALSE;
1.38      sjg       325:                                }
1.73      rillig    326:                                /* XXX: This naive comparison makes the parser
                    327:                                 * for the pattern dependent on the actual of
                    328:                                 * the string.  This is unpredictable. */
1.51      rillig    329:                                if (*pat == *str)
1.1       cgd       330:                                        break;
1.51      rillig    331:                                if (pat[1] == '-') {
1.71      rillig    332:                                        if (pat[2] == '\0')
1.51      rillig    333:                                                return neg;
                    334:                                        if (*pat <= *str && pat[2] >= *str)
1.1       cgd       335:                                                break;
1.51      rillig    336:                                        if (*pat >= *str && pat[2] <= *str)
1.1       cgd       337:                                                break;
1.51      rillig    338:                                        pat += 2;
1.1       cgd       339:                                }
1.51      rillig    340:                                pat++;
1.1       cgd       341:                        }
1.71      rillig    342:                        if (neg && *pat != ']' && *pat != '\0')
1.51      rillig    343:                                return FALSE;
1.71      rillig    344:                        while (*pat != ']' && *pat != '\0')
1.51      rillig    345:                                pat++;
1.71      rillig    346:                        if (*pat == '\0')
1.51      rillig    347:                                pat--;
1.1       cgd       348:                        goto thisCharOK;
                    349:                }
1.51      rillig    350:
1.1       cgd       351:                /*
1.51      rillig    352:                 * A backslash in the pattern matches the character following
                    353:                 * it exactly.
1.1       cgd       354:                 */
1.51      rillig    355:                if (*pat == '\\') {
                    356:                        pat++;
1.71      rillig    357:                        if (*pat == '\0')
1.51      rillig    358:                                return FALSE;
1.1       cgd       359:                }
1.51      rillig    360:
                    361:                if (*pat != *str)
                    362:                        return FALSE;
                    363:
                    364:        thisCharOK:
                    365:                pat++;
                    366:                str++;
1.1       cgd       367:        }
1.4       cgd       368: }
CVSweb <webmaster@jp.NetBSD.org>