Annotation of src/usr.bin/make/str.c, Revision 1.81
1.81 ! rillig 1: /* $NetBSD: str.c,v 1.80 2021/02/01 19:46:58 rillig Exp $ */
1.10 christos 2:
1.79 rillig 3: /*
1.13 christos 4: * Copyright (c) 1988, 1989, 1990, 1993
5: * The Regents of the University of California. All rights reserved.
1.20 agc 6: *
7: * This code is derived from software contributed to Berkeley by
8: * Adam de Boor.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: * 3. Neither the name of the University nor the names of its contributors
19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
1.79 rillig 35: /*
1.1 cgd 36: * Copyright (c) 1989 by Berkeley Softworks
37: * All rights reserved.
38: *
39: * This code is derived from software contributed to Berkeley by
40: * Adam de Boor.
41: *
42: * Redistribution and use in source and binary forms, with or without
43: * modification, are permitted provided that the following conditions
44: * are met:
45: * 1. Redistributions of source code must retain the above copyright
46: * notice, this list of conditions and the following disclaimer.
47: * 2. Redistributions in binary form must reproduce the above copyright
48: * notice, this list of conditions and the following disclaimer in the
49: * documentation and/or other materials provided with the distribution.
50: * 3. All advertising materials mentioning features or use of this software
51: * must display the following acknowledgement:
52: * This product includes software developed by the University of
53: * California, Berkeley and its contributors.
54: * 4. Neither the name of the University nor the names of its contributors
55: * may be used to endorse or promote products derived from this software
56: * without specific prior written permission.
57: *
58: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68: * SUCH DAMAGE.
69: */
70:
1.65 rillig 71: #include "make.h"
1.1 cgd 72:
1.65 rillig 73: /* "@(#)str.c 5.8 (Berkeley) 6/1/90" */
1.81 ! rillig 74: MAKE_RCSID("$NetBSD: str.c,v 1.80 2021/02/01 19:46:58 rillig Exp $");
1.1 cgd 75:
1.59 rillig 76: /* Return the concatenation of s1 and s2, freshly allocated. */
1.1 cgd 77: char *
1.59 rillig 78: str_concat2(const char *s1, const char *s2)
1.1 cgd 79: {
1.57 rillig 80: size_t len1 = strlen(s1);
81: size_t len2 = strlen(s2);
1.59 rillig 82: char *result = bmake_malloc(len1 + len2 + 1);
1.28 christos 83: memcpy(result, s1, len1);
84: memcpy(result + len1, s2, len2 + 1);
1.59 rillig 85: return result;
86: }
1.1 cgd 87:
1.59 rillig 88: /* Return the concatenation of s1, s2 and s3, freshly allocated. */
89: char *
90: str_concat3(const char *s1, const char *s2, const char *s3)
91: {
92: size_t len1 = strlen(s1);
93: size_t len2 = strlen(s2);
94: size_t len3 = strlen(s3);
95: char *result = bmake_malloc(len1 + len2 + len3 + 1);
96: memcpy(result, s1, len1);
97: memcpy(result + len1, s2, len2);
98: memcpy(result + len1 + len2, s3, len3 + 1);
1.45 rillig 99: return result;
1.1 cgd 100: }
101:
1.60 rillig 102: /* Return the concatenation of s1, s2, s3 and s4, freshly allocated. */
103: char *
104: str_concat4(const char *s1, const char *s2, const char *s3, const char *s4)
105: {
106: size_t len1 = strlen(s1);
107: size_t len2 = strlen(s2);
108: size_t len3 = strlen(s3);
109: size_t len4 = strlen(s4);
110: char *result = bmake_malloc(len1 + len2 + len3 + len4 + 1);
111: memcpy(result, s1, len1);
112: memcpy(result + len1, s2, len2);
113: memcpy(result + len1 + len2, s3, len3);
114: memcpy(result + len1 + len2 + len3, s4, len4 + 1);
115: return result;
116: }
117:
1.76 rillig 118: /*
119: * Fracture a string into an array of words (as delineated by tabs or spaces)
1.73 rillig 120: * taking quotation marks into account.
1.64 rillig 121: *
122: * If expand is TRUE, quotes are removed and escape sequences such as \r, \t,
1.73 rillig 123: * etc... are expanded. In this case, return NULL on parse errors.
1.64 rillig 124: *
1.73 rillig 125: * Returns the fractured words, which must be freed later using Words_Free,
126: * unless the returned Words.words was NULL.
1.1 cgd 127: */
1.64 rillig 128: Words
129: Str_Words(const char *str, Boolean expand)
1.1 cgd 130: {
1.56 rillig 131: size_t str_len;
132: char *words_buf;
1.61 rillig 133: size_t words_cap;
1.56 rillig 134: char **words;
1.61 rillig 135: size_t words_len;
1.56 rillig 136: char inquote;
137: char *word_start;
138: char *word_end;
1.55 rillig 139: const char *str_p;
140:
1.72 rillig 141: /* XXX: why only hspace, not whitespace? */
142: cpp_skip_hspace(&str); /* skip leading space chars. */
1.1 cgd 143:
1.41 rillig 144: /* words_buf holds the words, separated by '\0'. */
1.55 rillig 145: str_len = strlen(str);
1.75 rillig 146: words_buf = bmake_malloc(str_len + 1);
1.1 cgd 147:
1.70 rillig 148: words_cap = str_len / 5 > 50 ? str_len / 5 : 50;
1.55 rillig 149: words = bmake_malloc((words_cap + 1) * sizeof(char *));
1.35 sjg 150:
151: /*
1.1 cgd 152: * copy the string; at the same time, parse backslashes,
1.41 rillig 153: * quotes and build the word list.
1.1 cgd 154: */
1.55 rillig 155: words_len = 0;
156: inquote = '\0';
157: word_start = words_buf;
158: word_end = words_buf;
1.80 rillig 159: for (str_p = str;; str_p++) {
1.41 rillig 160: char ch = *str_p;
1.56 rillig 161: switch (ch) {
1.1 cgd 162: case '"':
163: case '\'':
1.78 rillig 164: if (inquote != '\0') {
1.1 cgd 165: if (inquote == ch)
1.4 cgd 166: inquote = '\0';
1.1 cgd 167: else
168: break;
1.56 rillig 169: } else {
1.69 rillig 170: inquote = ch;
1.6 jtc 171: /* Don't miss "" or '' */
1.41 rillig 172: if (word_start == NULL && str_p[1] == inquote) {
1.30 christos 173: if (!expand) {
1.41 rillig 174: word_start = word_end;
175: *word_end++ = ch;
1.30 christos 176: } else
1.41 rillig 177: word_start = word_end + 1;
178: str_p++;
1.25 christos 179: inquote = '\0';
1.6 jtc 180: break;
181: }
182: }
1.8 jtc 183: if (!expand) {
1.41 rillig 184: if (word_start == NULL)
185: word_start = word_end;
186: *word_end++ = ch;
1.8 jtc 187: }
1.1 cgd 188: continue;
189: case ' ':
190: case '\t':
1.8 jtc 191: case '\n':
1.78 rillig 192: if (inquote != '\0')
1.1 cgd 193: break;
1.41 rillig 194: if (word_start == NULL)
1.1 cgd 195: continue;
196: /* FALLTHROUGH */
197: case '\0':
198: /*
1.41 rillig 199: * end of a token -- make sure there's enough words
1.1 cgd 200: * space and save off a pointer.
201: */
1.41 rillig 202: if (word_start == NULL)
1.56 rillig 203: goto done;
1.8 jtc 204:
1.41 rillig 205: *word_end++ = '\0';
206: if (words_len == words_cap) {
1.56 rillig 207: size_t new_size;
1.41 rillig 208: words_cap *= 2; /* ramp up fast */
1.56 rillig 209: new_size = (words_cap + 1) * sizeof(char *);
210: words = bmake_realloc(words, new_size);
1.1 cgd 211: }
1.41 rillig 212: words[words_len++] = word_start;
213: word_start = NULL;
1.31 christos 214: if (ch == '\n' || ch == '\0') {
1.77 rillig 215: if (expand && inquote != '\0') {
1.41 rillig 216: free(words);
217: free(words_buf);
1.64 rillig 218: return (Words){ NULL, 0, NULL };
1.31 christos 219: }
1.1 cgd 220: goto done;
1.31 christos 221: }
1.1 cgd 222: continue;
223: case '\\':
1.8 jtc 224: if (!expand) {
1.41 rillig 225: if (word_start == NULL)
226: word_start = word_end;
227: *word_end++ = '\\';
228: /* catch '\' at end of line */
229: if (str_p[1] == '\0')
1.26 erh 230: continue;
1.41 rillig 231: ch = *++str_p;
1.8 jtc 232: break;
233: }
1.13 christos 234:
1.41 rillig 235: switch (ch = *++str_p) {
1.1 cgd 236: case '\0':
237: case '\n':
238: /* hmmm; fix it up as best we can */
239: ch = '\\';
1.74 rillig 240: str_p--;
1.1 cgd 241: break;
242: case 'b':
243: ch = '\b';
244: break;
245: case 'f':
246: ch = '\f';
247: break;
248: case 'n':
249: ch = '\n';
250: break;
251: case 'r':
252: ch = '\r';
253: break;
254: case 't':
255: ch = '\t';
256: break;
257: }
258: break;
259: }
1.41 rillig 260: if (word_start == NULL)
261: word_start = word_end;
262: *word_end++ = ch;
1.1 cgd 263: }
1.56 rillig 264: done:
1.73 rillig 265: words[words_len] = NULL; /* useful for argv */
1.64 rillig 266: return (Words){ words, words_len, words_buf };
1.1 cgd 267: }
268:
269: /*
1.51 rillig 270: * Str_Match -- Test if a string matches a pattern like "*.[ch]".
1.73 rillig 271: * The following special characters are known *?\[] (as in fnmatch(3)).
1.13 christos 272: *
1.73 rillig 273: * XXX: this function does not detect or report malformed patterns.
1.1 cgd 274: */
1.51 rillig 275: Boolean
276: Str_Match(const char *str, const char *pat)
1.1 cgd 277: {
278: for (;;) {
279: /*
280: * See if we're at the end of both the pattern and the
1.73 rillig 281: * string. If so, we succeeded. If we're at the end of the
1.1 cgd 282: * pattern but not at the end of the string, we failed.
283: */
1.71 rillig 284: if (*pat == '\0')
285: return *str == '\0';
286: if (*str == '\0' && *pat != '*')
1.51 rillig 287: return FALSE;
288:
1.1 cgd 289: /*
1.51 rillig 290: * A '*' in the pattern matches any substring. We handle this
291: * by calling ourselves for each suffix of the string.
1.1 cgd 292: */
1.51 rillig 293: if (*pat == '*') {
294: pat++;
295: while (*pat == '*')
296: pat++;
1.71 rillig 297: if (*pat == '\0')
1.51 rillig 298: return TRUE;
1.71 rillig 299: while (*str != '\0') {
1.51 rillig 300: if (Str_Match(str, pat))
301: return TRUE;
302: str++;
1.1 cgd 303: }
1.51 rillig 304: return FALSE;
1.1 cgd 305: }
1.51 rillig 306:
307: /* A '?' in the pattern matches any single character. */
308: if (*pat == '?')
1.1 cgd 309: goto thisCharOK;
1.51 rillig 310:
1.1 cgd 311: /*
1.51 rillig 312: * A '[' in the pattern matches a character from a list.
313: * The '[' is followed by the list of acceptable characters,
314: * or by ranges (two characters separated by '-'). In these
315: * character lists, the backslash is an ordinary character.
1.1 cgd 316: */
1.51 rillig 317: if (*pat == '[') {
318: Boolean neg = pat[1] == '^';
1.63 rillig 319: pat += neg ? 2 : 1;
1.37 sjg 320:
1.1 cgd 321: for (;;) {
1.71 rillig 322: if (*pat == ']' || *pat == '\0') {
1.51 rillig 323: if (neg)
1.38 sjg 324: break;
1.51 rillig 325: return FALSE;
1.38 sjg 326: }
1.81 ! rillig 327: /*
! 328: * XXX: This naive comparison makes the
! 329: * control flow of the pattern parser
! 330: * dependent on the actual value of the
! 331: * string. This is unpredictable. It may be
! 332: * though that the code only looks wrong but
! 333: * actually all code paths result in the same
! 334: * behavior. This needs further tests.
! 335: */
1.51 rillig 336: if (*pat == *str)
1.1 cgd 337: break;
1.51 rillig 338: if (pat[1] == '-') {
1.71 rillig 339: if (pat[2] == '\0')
1.51 rillig 340: return neg;
341: if (*pat <= *str && pat[2] >= *str)
1.1 cgd 342: break;
1.51 rillig 343: if (*pat >= *str && pat[2] <= *str)
1.1 cgd 344: break;
1.51 rillig 345: pat += 2;
1.1 cgd 346: }
1.51 rillig 347: pat++;
1.1 cgd 348: }
1.71 rillig 349: if (neg && *pat != ']' && *pat != '\0')
1.51 rillig 350: return FALSE;
1.71 rillig 351: while (*pat != ']' && *pat != '\0')
1.51 rillig 352: pat++;
1.71 rillig 353: if (*pat == '\0')
1.51 rillig 354: pat--;
1.1 cgd 355: goto thisCharOK;
356: }
1.51 rillig 357:
1.1 cgd 358: /*
1.51 rillig 359: * A backslash in the pattern matches the character following
360: * it exactly.
1.1 cgd 361: */
1.51 rillig 362: if (*pat == '\\') {
363: pat++;
1.71 rillig 364: if (*pat == '\0')
1.51 rillig 365: return FALSE;
1.1 cgd 366: }
1.51 rillig 367:
368: if (*pat != *str)
369: return FALSE;
370:
371: thisCharOK:
372: pat++;
373: str++;
1.1 cgd 374: }
1.4 cgd 375: }
CVSweb <webmaster@jp.NetBSD.org>