Annotation of src/usr.bin/make/str.c, Revision 1.75
1.75 ! rillig 1: /* $NetBSD: str.c,v 1.74 2020/11/16 18:28:27 rillig Exp $ */
1.10 christos 2:
1.1 cgd 3: /*-
1.13 christos 4: * Copyright (c) 1988, 1989, 1990, 1993
5: * The Regents of the University of California. All rights reserved.
1.20 agc 6: *
7: * This code is derived from software contributed to Berkeley by
8: * Adam de Boor.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: * 3. Neither the name of the University nor the names of its contributors
19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: /*-
1.1 cgd 36: * Copyright (c) 1989 by Berkeley Softworks
37: * All rights reserved.
38: *
39: * This code is derived from software contributed to Berkeley by
40: * Adam de Boor.
41: *
42: * Redistribution and use in source and binary forms, with or without
43: * modification, are permitted provided that the following conditions
44: * are met:
45: * 1. Redistributions of source code must retain the above copyright
46: * notice, this list of conditions and the following disclaimer.
47: * 2. Redistributions in binary form must reproduce the above copyright
48: * notice, this list of conditions and the following disclaimer in the
49: * documentation and/or other materials provided with the distribution.
50: * 3. All advertising materials mentioning features or use of this software
51: * must display the following acknowledgement:
52: * This product includes software developed by the University of
53: * California, Berkeley and its contributors.
54: * 4. Neither the name of the University nor the names of its contributors
55: * may be used to endorse or promote products derived from this software
56: * without specific prior written permission.
57: *
58: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68: * SUCH DAMAGE.
69: */
70:
1.65 rillig 71: #include "make.h"
1.1 cgd 72:
1.65 rillig 73: /* "@(#)str.c 5.8 (Berkeley) 6/1/90" */
1.75 ! rillig 74: MAKE_RCSID("$NetBSD: str.c,v 1.74 2020/11/16 18:28:27 rillig Exp $");
1.1 cgd 75:
1.59 rillig 76: /* Return the concatenation of s1 and s2, freshly allocated. */
1.1 cgd 77: char *
1.59 rillig 78: str_concat2(const char *s1, const char *s2)
1.1 cgd 79: {
1.57 rillig 80: size_t len1 = strlen(s1);
81: size_t len2 = strlen(s2);
1.59 rillig 82: char *result = bmake_malloc(len1 + len2 + 1);
1.28 christos 83: memcpy(result, s1, len1);
84: memcpy(result + len1, s2, len2 + 1);
1.59 rillig 85: return result;
86: }
1.1 cgd 87:
1.59 rillig 88: /* Return the concatenation of s1, s2 and s3, freshly allocated. */
89: char *
90: str_concat3(const char *s1, const char *s2, const char *s3)
91: {
92: size_t len1 = strlen(s1);
93: size_t len2 = strlen(s2);
94: size_t len3 = strlen(s3);
95: char *result = bmake_malloc(len1 + len2 + len3 + 1);
96: memcpy(result, s1, len1);
97: memcpy(result + len1, s2, len2);
98: memcpy(result + len1 + len2, s3, len3 + 1);
1.45 rillig 99: return result;
1.1 cgd 100: }
101:
1.60 rillig 102: /* Return the concatenation of s1, s2, s3 and s4, freshly allocated. */
103: char *
104: str_concat4(const char *s1, const char *s2, const char *s3, const char *s4)
105: {
106: size_t len1 = strlen(s1);
107: size_t len2 = strlen(s2);
108: size_t len3 = strlen(s3);
109: size_t len4 = strlen(s4);
110: char *result = bmake_malloc(len1 + len2 + len3 + len4 + 1);
111: memcpy(result, s1, len1);
112: memcpy(result + len1, s2, len2);
113: memcpy(result + len1 + len2, s3, len3);
114: memcpy(result + len1 + len2 + len3, s4, len4 + 1);
115: return result;
116: }
117:
1.64 rillig 118: /* Fracture a string into an array of words (as delineated by tabs or spaces)
1.73 rillig 119: * taking quotation marks into account.
1.64 rillig 120: *
121: * If expand is TRUE, quotes are removed and escape sequences such as \r, \t,
1.73 rillig 122: * etc... are expanded. In this case, return NULL on parse errors.
1.64 rillig 123: *
1.73 rillig 124: * Returns the fractured words, which must be freed later using Words_Free,
125: * unless the returned Words.words was NULL.
1.1 cgd 126: */
1.64 rillig 127: Words
128: Str_Words(const char *str, Boolean expand)
1.1 cgd 129: {
1.56 rillig 130: size_t str_len;
131: char *words_buf;
1.61 rillig 132: size_t words_cap;
1.56 rillig 133: char **words;
1.61 rillig 134: size_t words_len;
1.56 rillig 135: char inquote;
136: char *word_start;
137: char *word_end;
1.55 rillig 138: const char *str_p;
139:
1.72 rillig 140: /* XXX: why only hspace, not whitespace? */
141: cpp_skip_hspace(&str); /* skip leading space chars. */
1.1 cgd 142:
1.41 rillig 143: /* words_buf holds the words, separated by '\0'. */
1.55 rillig 144: str_len = strlen(str);
1.75 ! rillig 145: words_buf = bmake_malloc(str_len + 1);
1.1 cgd 146:
1.70 rillig 147: words_cap = str_len / 5 > 50 ? str_len / 5 : 50;
1.55 rillig 148: words = bmake_malloc((words_cap + 1) * sizeof(char *));
1.35 sjg 149:
150: /*
1.1 cgd 151: * copy the string; at the same time, parse backslashes,
1.41 rillig 152: * quotes and build the word list.
1.1 cgd 153: */
1.55 rillig 154: words_len = 0;
155: inquote = '\0';
156: word_start = words_buf;
157: word_end = words_buf;
1.41 rillig 158: for (str_p = str;; ++str_p) {
159: char ch = *str_p;
1.56 rillig 160: switch (ch) {
1.1 cgd 161: case '"':
162: case '\'':
1.17 christos 163: if (inquote) {
1.1 cgd 164: if (inquote == ch)
1.4 cgd 165: inquote = '\0';
1.1 cgd 166: else
167: break;
1.56 rillig 168: } else {
1.69 rillig 169: inquote = ch;
1.6 jtc 170: /* Don't miss "" or '' */
1.41 rillig 171: if (word_start == NULL && str_p[1] == inquote) {
1.30 christos 172: if (!expand) {
1.41 rillig 173: word_start = word_end;
174: *word_end++ = ch;
1.30 christos 175: } else
1.41 rillig 176: word_start = word_end + 1;
177: str_p++;
1.25 christos 178: inquote = '\0';
1.6 jtc 179: break;
180: }
181: }
1.8 jtc 182: if (!expand) {
1.41 rillig 183: if (word_start == NULL)
184: word_start = word_end;
185: *word_end++ = ch;
1.8 jtc 186: }
1.1 cgd 187: continue;
188: case ' ':
189: case '\t':
1.8 jtc 190: case '\n':
1.1 cgd 191: if (inquote)
192: break;
1.41 rillig 193: if (word_start == NULL)
1.1 cgd 194: continue;
195: /* FALLTHROUGH */
196: case '\0':
197: /*
1.41 rillig 198: * end of a token -- make sure there's enough words
1.1 cgd 199: * space and save off a pointer.
200: */
1.41 rillig 201: if (word_start == NULL)
1.56 rillig 202: goto done;
1.8 jtc 203:
1.41 rillig 204: *word_end++ = '\0';
205: if (words_len == words_cap) {
1.56 rillig 206: size_t new_size;
1.41 rillig 207: words_cap *= 2; /* ramp up fast */
1.56 rillig 208: new_size = (words_cap + 1) * sizeof(char *);
209: words = bmake_realloc(words, new_size);
1.1 cgd 210: }
1.41 rillig 211: words[words_len++] = word_start;
212: word_start = NULL;
1.31 christos 213: if (ch == '\n' || ch == '\0') {
214: if (expand && inquote) {
1.41 rillig 215: free(words);
216: free(words_buf);
1.64 rillig 217: return (Words){ NULL, 0, NULL };
1.31 christos 218: }
1.1 cgd 219: goto done;
1.31 christos 220: }
1.1 cgd 221: continue;
222: case '\\':
1.8 jtc 223: if (!expand) {
1.41 rillig 224: if (word_start == NULL)
225: word_start = word_end;
226: *word_end++ = '\\';
227: /* catch '\' at end of line */
228: if (str_p[1] == '\0')
1.26 erh 229: continue;
1.41 rillig 230: ch = *++str_p;
1.8 jtc 231: break;
232: }
1.13 christos 233:
1.41 rillig 234: switch (ch = *++str_p) {
1.1 cgd 235: case '\0':
236: case '\n':
237: /* hmmm; fix it up as best we can */
238: ch = '\\';
1.74 rillig 239: str_p--;
1.1 cgd 240: break;
241: case 'b':
242: ch = '\b';
243: break;
244: case 'f':
245: ch = '\f';
246: break;
247: case 'n':
248: ch = '\n';
249: break;
250: case 'r':
251: ch = '\r';
252: break;
253: case 't':
254: ch = '\t';
255: break;
256: }
257: break;
258: }
1.41 rillig 259: if (word_start == NULL)
260: word_start = word_end;
261: *word_end++ = ch;
1.1 cgd 262: }
1.56 rillig 263: done:
1.73 rillig 264: words[words_len] = NULL; /* useful for argv */
1.64 rillig 265: return (Words){ words, words_len, words_buf };
1.1 cgd 266: }
267:
268: /*
1.51 rillig 269: * Str_Match -- Test if a string matches a pattern like "*.[ch]".
1.73 rillig 270: * The following special characters are known *?\[] (as in fnmatch(3)).
1.13 christos 271: *
1.73 rillig 272: * XXX: this function does not detect or report malformed patterns.
1.1 cgd 273: */
1.51 rillig 274: Boolean
275: Str_Match(const char *str, const char *pat)
1.1 cgd 276: {
277: for (;;) {
278: /*
279: * See if we're at the end of both the pattern and the
1.73 rillig 280: * string. If so, we succeeded. If we're at the end of the
1.1 cgd 281: * pattern but not at the end of the string, we failed.
282: */
1.71 rillig 283: if (*pat == '\0')
284: return *str == '\0';
285: if (*str == '\0' && *pat != '*')
1.51 rillig 286: return FALSE;
287:
1.1 cgd 288: /*
1.51 rillig 289: * A '*' in the pattern matches any substring. We handle this
290: * by calling ourselves for each suffix of the string.
1.1 cgd 291: */
1.51 rillig 292: if (*pat == '*') {
293: pat++;
294: while (*pat == '*')
295: pat++;
1.71 rillig 296: if (*pat == '\0')
1.51 rillig 297: return TRUE;
1.71 rillig 298: while (*str != '\0') {
1.51 rillig 299: if (Str_Match(str, pat))
300: return TRUE;
301: str++;
1.1 cgd 302: }
1.51 rillig 303: return FALSE;
1.1 cgd 304: }
1.51 rillig 305:
306: /* A '?' in the pattern matches any single character. */
307: if (*pat == '?')
1.1 cgd 308: goto thisCharOK;
1.51 rillig 309:
1.1 cgd 310: /*
1.51 rillig 311: * A '[' in the pattern matches a character from a list.
312: * The '[' is followed by the list of acceptable characters,
313: * or by ranges (two characters separated by '-'). In these
314: * character lists, the backslash is an ordinary character.
1.1 cgd 315: */
1.51 rillig 316: if (*pat == '[') {
317: Boolean neg = pat[1] == '^';
1.63 rillig 318: pat += neg ? 2 : 1;
1.37 sjg 319:
1.1 cgd 320: for (;;) {
1.71 rillig 321: if (*pat == ']' || *pat == '\0') {
1.51 rillig 322: if (neg)
1.38 sjg 323: break;
1.51 rillig 324: return FALSE;
1.38 sjg 325: }
1.73 rillig 326: /* XXX: This naive comparison makes the parser
327: * for the pattern dependent on the actual of
328: * the string. This is unpredictable. */
1.51 rillig 329: if (*pat == *str)
1.1 cgd 330: break;
1.51 rillig 331: if (pat[1] == '-') {
1.71 rillig 332: if (pat[2] == '\0')
1.51 rillig 333: return neg;
334: if (*pat <= *str && pat[2] >= *str)
1.1 cgd 335: break;
1.51 rillig 336: if (*pat >= *str && pat[2] <= *str)
1.1 cgd 337: break;
1.51 rillig 338: pat += 2;
1.1 cgd 339: }
1.51 rillig 340: pat++;
1.1 cgd 341: }
1.71 rillig 342: if (neg && *pat != ']' && *pat != '\0')
1.51 rillig 343: return FALSE;
1.71 rillig 344: while (*pat != ']' && *pat != '\0')
1.51 rillig 345: pat++;
1.71 rillig 346: if (*pat == '\0')
1.51 rillig 347: pat--;
1.1 cgd 348: goto thisCharOK;
349: }
1.51 rillig 350:
1.1 cgd 351: /*
1.51 rillig 352: * A backslash in the pattern matches the character following
353: * it exactly.
1.1 cgd 354: */
1.51 rillig 355: if (*pat == '\\') {
356: pat++;
1.71 rillig 357: if (*pat == '\0')
1.51 rillig 358: return FALSE;
1.1 cgd 359: }
1.51 rillig 360:
361: if (*pat != *str)
362: return FALSE;
363:
364: thisCharOK:
365: pat++;
366: str++;
1.1 cgd 367: }
1.4 cgd 368: }
CVSweb <webmaster@jp.NetBSD.org>