[BACK]Return to str.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / usr.bin / make

Annotation of src/usr.bin/make/str.c, Revision 1.81

1.81    ! rillig      1: /*     $NetBSD: str.c,v 1.80 2021/02/01 19:46:58 rillig Exp $  */
1.10      christos    2:
1.79      rillig      3: /*
1.13      christos    4:  * Copyright (c) 1988, 1989, 1990, 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
1.20      agc         6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Adam de Boor.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
                     18:  * 3. Neither the name of the University nor the names of its contributors
                     19:  *    may be used to endorse or promote products derived from this software
                     20:  *    without specific prior written permission.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     23:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     24:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     25:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     26:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     27:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     28:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     29:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     30:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     31:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     32:  * SUCH DAMAGE.
                     33:  */
                     34:
1.79      rillig     35: /*
1.1       cgd        36:  * Copyright (c) 1989 by Berkeley Softworks
                     37:  * All rights reserved.
                     38:  *
                     39:  * This code is derived from software contributed to Berkeley by
                     40:  * Adam de Boor.
                     41:  *
                     42:  * Redistribution and use in source and binary forms, with or without
                     43:  * modification, are permitted provided that the following conditions
                     44:  * are met:
                     45:  * 1. Redistributions of source code must retain the above copyright
                     46:  *    notice, this list of conditions and the following disclaimer.
                     47:  * 2. Redistributions in binary form must reproduce the above copyright
                     48:  *    notice, this list of conditions and the following disclaimer in the
                     49:  *    documentation and/or other materials provided with the distribution.
                     50:  * 3. All advertising materials mentioning features or use of this software
                     51:  *    must display the following acknowledgement:
                     52:  *     This product includes software developed by the University of
                     53:  *     California, Berkeley and its contributors.
                     54:  * 4. Neither the name of the University nor the names of its contributors
                     55:  *    may be used to endorse or promote products derived from this software
                     56:  *    without specific prior written permission.
                     57:  *
                     58:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     59:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     60:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     61:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     62:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     63:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     64:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     65:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     66:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     67:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     68:  * SUCH DAMAGE.
                     69:  */
                     70:
1.65      rillig     71: #include "make.h"
1.1       cgd        72:
1.65      rillig     73: /*     "@(#)str.c      5.8 (Berkeley) 6/1/90"  */
1.81    ! rillig     74: MAKE_RCSID("$NetBSD: str.c,v 1.80 2021/02/01 19:46:58 rillig Exp $");
1.1       cgd        75:
1.59      rillig     76: /* Return the concatenation of s1 and s2, freshly allocated. */
1.1       cgd        77: char *
1.59      rillig     78: str_concat2(const char *s1, const char *s2)
1.1       cgd        79: {
1.57      rillig     80:        size_t len1 = strlen(s1);
                     81:        size_t len2 = strlen(s2);
1.59      rillig     82:        char *result = bmake_malloc(len1 + len2 + 1);
1.28      christos   83:        memcpy(result, s1, len1);
                     84:        memcpy(result + len1, s2, len2 + 1);
1.59      rillig     85:        return result;
                     86: }
1.1       cgd        87:
1.59      rillig     88: /* Return the concatenation of s1, s2 and s3, freshly allocated. */
                     89: char *
                     90: str_concat3(const char *s1, const char *s2, const char *s3)
                     91: {
                     92:        size_t len1 = strlen(s1);
                     93:        size_t len2 = strlen(s2);
                     94:        size_t len3 = strlen(s3);
                     95:        char *result = bmake_malloc(len1 + len2 + len3 + 1);
                     96:        memcpy(result, s1, len1);
                     97:        memcpy(result + len1, s2, len2);
                     98:        memcpy(result + len1 + len2, s3, len3 + 1);
1.45      rillig     99:        return result;
1.1       cgd       100: }
                    101:
1.60      rillig    102: /* Return the concatenation of s1, s2, s3 and s4, freshly allocated. */
                    103: char *
                    104: str_concat4(const char *s1, const char *s2, const char *s3, const char *s4)
                    105: {
                    106:        size_t len1 = strlen(s1);
                    107:        size_t len2 = strlen(s2);
                    108:        size_t len3 = strlen(s3);
                    109:        size_t len4 = strlen(s4);
                    110:        char *result = bmake_malloc(len1 + len2 + len3 + len4 + 1);
                    111:        memcpy(result, s1, len1);
                    112:        memcpy(result + len1, s2, len2);
                    113:        memcpy(result + len1 + len2, s3, len3);
                    114:        memcpy(result + len1 + len2 + len3, s4, len4 + 1);
                    115:        return result;
                    116: }
                    117:
1.76      rillig    118: /*
                    119:  * Fracture a string into an array of words (as delineated by tabs or spaces)
1.73      rillig    120:  * taking quotation marks into account.
1.64      rillig    121:  *
                    122:  * If expand is TRUE, quotes are removed and escape sequences such as \r, \t,
1.73      rillig    123:  * etc... are expanded. In this case, return NULL on parse errors.
1.64      rillig    124:  *
1.73      rillig    125:  * Returns the fractured words, which must be freed later using Words_Free,
                    126:  * unless the returned Words.words was NULL.
1.1       cgd       127:  */
1.64      rillig    128: Words
                    129: Str_Words(const char *str, Boolean expand)
1.1       cgd       130: {
1.56      rillig    131:        size_t str_len;
                    132:        char *words_buf;
1.61      rillig    133:        size_t words_cap;
1.56      rillig    134:        char **words;
1.61      rillig    135:        size_t words_len;
1.56      rillig    136:        char inquote;
                    137:        char *word_start;
                    138:        char *word_end;
1.55      rillig    139:        const char *str_p;
                    140:
1.72      rillig    141:        /* XXX: why only hspace, not whitespace? */
                    142:        cpp_skip_hspace(&str);  /* skip leading space chars. */
1.1       cgd       143:
1.41      rillig    144:        /* words_buf holds the words, separated by '\0'. */
1.55      rillig    145:        str_len = strlen(str);
1.75      rillig    146:        words_buf = bmake_malloc(str_len + 1);
1.1       cgd       147:
1.70      rillig    148:        words_cap = str_len / 5 > 50 ? str_len / 5 : 50;
1.55      rillig    149:        words = bmake_malloc((words_cap + 1) * sizeof(char *));
1.35      sjg       150:
                    151:        /*
1.1       cgd       152:         * copy the string; at the same time, parse backslashes,
1.41      rillig    153:         * quotes and build the word list.
1.1       cgd       154:         */
1.55      rillig    155:        words_len = 0;
                    156:        inquote = '\0';
                    157:        word_start = words_buf;
                    158:        word_end = words_buf;
1.80      rillig    159:        for (str_p = str;; str_p++) {
1.41      rillig    160:                char ch = *str_p;
1.56      rillig    161:                switch (ch) {
1.1       cgd       162:                case '"':
                    163:                case '\'':
1.78      rillig    164:                        if (inquote != '\0') {
1.1       cgd       165:                                if (inquote == ch)
1.4       cgd       166:                                        inquote = '\0';
1.1       cgd       167:                                else
                    168:                                        break;
1.56      rillig    169:                        } else {
1.69      rillig    170:                                inquote = ch;
1.6       jtc       171:                                /* Don't miss "" or '' */
1.41      rillig    172:                                if (word_start == NULL && str_p[1] == inquote) {
1.30      christos  173:                                        if (!expand) {
1.41      rillig    174:                                                word_start = word_end;
                    175:                                                *word_end++ = ch;
1.30      christos  176:                                        } else
1.41      rillig    177:                                                word_start = word_end + 1;
                    178:                                        str_p++;
1.25      christos  179:                                        inquote = '\0';
1.6       jtc       180:                                        break;
                    181:                                }
                    182:                        }
1.8       jtc       183:                        if (!expand) {
1.41      rillig    184:                                if (word_start == NULL)
                    185:                                        word_start = word_end;
                    186:                                *word_end++ = ch;
1.8       jtc       187:                        }
1.1       cgd       188:                        continue;
                    189:                case ' ':
                    190:                case '\t':
1.8       jtc       191:                case '\n':
1.78      rillig    192:                        if (inquote != '\0')
1.1       cgd       193:                                break;
1.41      rillig    194:                        if (word_start == NULL)
1.1       cgd       195:                                continue;
                    196:                        /* FALLTHROUGH */
                    197:                case '\0':
                    198:                        /*
1.41      rillig    199:                         * end of a token -- make sure there's enough words
1.1       cgd       200:                         * space and save off a pointer.
                    201:                         */
1.41      rillig    202:                        if (word_start == NULL)
1.56      rillig    203:                                goto done;
1.8       jtc       204:
1.41      rillig    205:                        *word_end++ = '\0';
                    206:                        if (words_len == words_cap) {
1.56      rillig    207:                                size_t new_size;
1.41      rillig    208:                                words_cap *= 2;         /* ramp up fast */
1.56      rillig    209:                                new_size = (words_cap + 1) * sizeof(char *);
                    210:                                words = bmake_realloc(words, new_size);
1.1       cgd       211:                        }
1.41      rillig    212:                        words[words_len++] = word_start;
                    213:                        word_start = NULL;
1.31      christos  214:                        if (ch == '\n' || ch == '\0') {
1.77      rillig    215:                                if (expand && inquote != '\0') {
1.41      rillig    216:                                        free(words);
                    217:                                        free(words_buf);
1.64      rillig    218:                                        return (Words){ NULL, 0, NULL };
1.31      christos  219:                                }
1.1       cgd       220:                                goto done;
1.31      christos  221:                        }
1.1       cgd       222:                        continue;
                    223:                case '\\':
1.8       jtc       224:                        if (!expand) {
1.41      rillig    225:                                if (word_start == NULL)
                    226:                                        word_start = word_end;
                    227:                                *word_end++ = '\\';
                    228:                                /* catch '\' at end of line */
                    229:                                if (str_p[1] == '\0')
1.26      erh       230:                                        continue;
1.41      rillig    231:                                ch = *++str_p;
1.8       jtc       232:                                break;
                    233:                        }
1.13      christos  234:
1.41      rillig    235:                        switch (ch = *++str_p) {
1.1       cgd       236:                        case '\0':
                    237:                        case '\n':
                    238:                                /* hmmm; fix it up as best we can */
                    239:                                ch = '\\';
1.74      rillig    240:                                str_p--;
1.1       cgd       241:                                break;
                    242:                        case 'b':
                    243:                                ch = '\b';
                    244:                                break;
                    245:                        case 'f':
                    246:                                ch = '\f';
                    247:                                break;
                    248:                        case 'n':
                    249:                                ch = '\n';
                    250:                                break;
                    251:                        case 'r':
                    252:                                ch = '\r';
                    253:                                break;
                    254:                        case 't':
                    255:                                ch = '\t';
                    256:                                break;
                    257:                        }
                    258:                        break;
                    259:                }
1.41      rillig    260:                if (word_start == NULL)
                    261:                        word_start = word_end;
                    262:                *word_end++ = ch;
1.1       cgd       263:        }
1.56      rillig    264: done:
1.73      rillig    265:        words[words_len] = NULL;        /* useful for argv */
1.64      rillig    266:        return (Words){ words, words_len, words_buf };
1.1       cgd       267: }
                    268:
                    269: /*
1.51      rillig    270:  * Str_Match -- Test if a string matches a pattern like "*.[ch]".
1.73      rillig    271:  * The following special characters are known *?\[] (as in fnmatch(3)).
1.13      christos  272:  *
1.73      rillig    273:  * XXX: this function does not detect or report malformed patterns.
1.1       cgd       274:  */
1.51      rillig    275: Boolean
                    276: Str_Match(const char *str, const char *pat)
1.1       cgd       277: {
                    278:        for (;;) {
                    279:                /*
                    280:                 * See if we're at the end of both the pattern and the
1.73      rillig    281:                 * string. If so, we succeeded.  If we're at the end of the
1.1       cgd       282:                 * pattern but not at the end of the string, we failed.
                    283:                 */
1.71      rillig    284:                if (*pat == '\0')
                    285:                        return *str == '\0';
                    286:                if (*str == '\0' && *pat != '*')
1.51      rillig    287:                        return FALSE;
                    288:
1.1       cgd       289:                /*
1.51      rillig    290:                 * A '*' in the pattern matches any substring.  We handle this
                    291:                 * by calling ourselves for each suffix of the string.
1.1       cgd       292:                 */
1.51      rillig    293:                if (*pat == '*') {
                    294:                        pat++;
                    295:                        while (*pat == '*')
                    296:                                pat++;
1.71      rillig    297:                        if (*pat == '\0')
1.51      rillig    298:                                return TRUE;
1.71      rillig    299:                        while (*str != '\0') {
1.51      rillig    300:                                if (Str_Match(str, pat))
                    301:                                        return TRUE;
                    302:                                str++;
1.1       cgd       303:                        }
1.51      rillig    304:                        return FALSE;
1.1       cgd       305:                }
1.51      rillig    306:
                    307:                /* A '?' in the pattern matches any single character. */
                    308:                if (*pat == '?')
1.1       cgd       309:                        goto thisCharOK;
1.51      rillig    310:
1.1       cgd       311:                /*
1.51      rillig    312:                 * A '[' in the pattern matches a character from a list.
                    313:                 * The '[' is followed by the list of acceptable characters,
                    314:                 * or by ranges (two characters separated by '-'). In these
                    315:                 * character lists, the backslash is an ordinary character.
1.1       cgd       316:                 */
1.51      rillig    317:                if (*pat == '[') {
                    318:                        Boolean neg = pat[1] == '^';
1.63      rillig    319:                        pat += neg ? 2 : 1;
1.37      sjg       320:
1.1       cgd       321:                        for (;;) {
1.71      rillig    322:                                if (*pat == ']' || *pat == '\0') {
1.51      rillig    323:                                        if (neg)
1.38      sjg       324:                                                break;
1.51      rillig    325:                                        return FALSE;
1.38      sjg       326:                                }
1.81    ! rillig    327:                                /*
        !           328:                                 * XXX: This naive comparison makes the
        !           329:                                 * control flow of the pattern parser
        !           330:                                 * dependent on the actual value of the
        !           331:                                 * string.  This is unpredictable.  It may be
        !           332:                                 * though that the code only looks wrong but
        !           333:                                 * actually all code paths result in the same
        !           334:                                 * behavior.  This needs further tests.
        !           335:                                 */
1.51      rillig    336:                                if (*pat == *str)
1.1       cgd       337:                                        break;
1.51      rillig    338:                                if (pat[1] == '-') {
1.71      rillig    339:                                        if (pat[2] == '\0')
1.51      rillig    340:                                                return neg;
                    341:                                        if (*pat <= *str && pat[2] >= *str)
1.1       cgd       342:                                                break;
1.51      rillig    343:                                        if (*pat >= *str && pat[2] <= *str)
1.1       cgd       344:                                                break;
1.51      rillig    345:                                        pat += 2;
1.1       cgd       346:                                }
1.51      rillig    347:                                pat++;
1.1       cgd       348:                        }
1.71      rillig    349:                        if (neg && *pat != ']' && *pat != '\0')
1.51      rillig    350:                                return FALSE;
1.71      rillig    351:                        while (*pat != ']' && *pat != '\0')
1.51      rillig    352:                                pat++;
1.71      rillig    353:                        if (*pat == '\0')
1.51      rillig    354:                                pat--;
1.1       cgd       355:                        goto thisCharOK;
                    356:                }
1.51      rillig    357:
1.1       cgd       358:                /*
1.51      rillig    359:                 * A backslash in the pattern matches the character following
                    360:                 * it exactly.
1.1       cgd       361:                 */
1.51      rillig    362:                if (*pat == '\\') {
                    363:                        pat++;
1.71      rillig    364:                        if (*pat == '\0')
1.51      rillig    365:                                return FALSE;
1.1       cgd       366:                }
1.51      rillig    367:
                    368:                if (*pat != *str)
                    369:                        return FALSE;
                    370:
                    371:        thisCharOK:
                    372:                pat++;
                    373:                str++;
1.1       cgd       374:        }
1.4       cgd       375: }

CVSweb <webmaster@jp.NetBSD.org>