version 1.13, 2003/10/18 23:48:42 |
version 1.26, 2016/04/11 00:50:13 |
Line 41 __RCSID("$NetBSD$"); |
|
Line 41 __RCSID("$NetBSD$"); |
|
#endif |
#endif |
#endif /* not lint && not SCCSID */ |
#endif /* not lint && not SCCSID */ |
|
|
|
/* We build this file twice, once as NARROW, once as WIDE. */ |
/* |
/* |
* tokenize.c: Bourne shell like tokenizer |
* tokenize.c: Bourne shell like tokenizer |
*/ |
*/ |
#include <string.h> |
|
#include <stdlib.h> |
#include <stdlib.h> |
#include "tokenizer.h" |
#include <string.h> |
|
|
|
#include "histedit.h" |
|
#include "chartype.h" |
|
|
typedef enum { |
typedef enum { |
Q_none, Q_single, Q_double, Q_one, Q_doubleone |
Q_none, Q_single, Q_double, Q_one, Q_doubleone |
} quote_t; |
} quote_t; |
|
|
#define IFS "\t \n" |
|
|
|
#define TOK_KEEP 1 |
#define TOK_KEEP 1 |
#define TOK_EAT 2 |
#define TOK_EAT 2 |
|
|
#define WINCR 20 |
#define WINCR 20 |
#define AINCR 10 |
#define AINCR 10 |
|
|
#define tok_strdup(a) strdup(a) |
#define IFS STR("\t \n") |
|
|
#define tok_malloc(a) malloc(a) |
#define tok_malloc(a) malloc(a) |
#define tok_free(a) free(a) |
#define tok_free(a) free(a) |
#define tok_realloc(a, b) realloc(a, b) |
#define tok_realloc(a, b) realloc(a, b) |
|
|
|
#ifdef NARROWCHAR |
|
#define Char char |
|
#define FUN(prefix, rest) prefix ## _ ## rest |
|
#define TYPE(type) type |
|
#define STR(x) x |
|
#define Strchr(s, c) strchr(s, c) |
|
#define tok_strdup(s) strdup(s) |
|
#else |
|
#define Char wchar_t |
|
#define FUN(prefix, rest) prefix ## _w ## rest |
|
#define TYPE(type) type ## W |
|
#define STR(x) L ## x |
|
#define Strchr(s, c) wcschr(s, c) |
|
#define tok_strdup(s) wcsdup(s) |
|
#endif |
|
|
struct tokenizer { |
struct TYPE(tokenizer) { |
char *ifs; /* In field separator */ |
Char *ifs; /* In field separator */ |
int argc, amax; /* Current and maximum number of args */ |
size_t argc, amax; /* Current and maximum number of args */ |
char **argv; /* Argument list */ |
Char **argv; /* Argument list */ |
char *wptr, *wmax; /* Space and limit on the word buffer */ |
Char *wptr, *wmax; /* Space and limit on the word buffer */ |
char *wstart; /* Beginning of next word */ |
Char *wstart; /* Beginning of next word */ |
char *wspace; /* Space of word buffer */ |
Char *wspace; /* Space of word buffer */ |
quote_t quote; /* Quoting state */ |
quote_t quote; /* Quoting state */ |
int flags; /* flags; */ |
int flags; /* flags; */ |
}; |
}; |
|
|
|
|
private void tok_finish(Tokenizer *); |
private void FUN(tok,finish)(TYPE(Tokenizer) *); |
|
|
|
|
/* tok_finish(): |
/* FUN(tok,finish)(): |
* Finish a word in the tokenizer. |
* Finish a word in the tokenizer. |
*/ |
*/ |
private void |
private void |
tok_finish(Tokenizer *tok) |
FUN(tok,finish)(TYPE(Tokenizer) *tok) |
{ |
{ |
|
|
*tok->wptr = '\0'; |
*tok->wptr = '\0'; |
Line 98 tok_finish(Tokenizer *tok) |
|
Line 115 tok_finish(Tokenizer *tok) |
|
} |
} |
|
|
|
|
/* tok_init(): |
/* FUN(tok,init)(): |
* Initialize the tokenizer |
* Initialize the tokenizer |
*/ |
*/ |
public Tokenizer * |
public TYPE(Tokenizer) * |
tok_init(const char *ifs) |
FUN(tok,init)(const Char *ifs) |
{ |
{ |
Tokenizer *tok = (Tokenizer *) tok_malloc(sizeof(Tokenizer)); |
TYPE(Tokenizer) *tok = tok_malloc(sizeof(*tok)); |
|
|
if (tok == NULL) |
if (tok == NULL) |
return NULL; |
return NULL; |
tok->ifs = tok_strdup(ifs ? ifs : IFS); |
tok->ifs = tok_strdup(ifs ? ifs : IFS); |
if (tok->ifs == NULL) { |
if (tok->ifs == NULL) { |
tok_free((ptr_t)tok); |
tok_free(tok); |
return NULL; |
return NULL; |
} |
} |
tok->argc = 0; |
tok->argc = 0; |
tok->amax = AINCR; |
tok->amax = AINCR; |
tok->argv = (char **) tok_malloc(sizeof(char *) * tok->amax); |
tok->argv = tok_malloc(sizeof(*tok->argv) * tok->amax); |
if (tok->argv == NULL) { |
if (tok->argv == NULL) { |
tok_free((ptr_t)tok->ifs); |
tok_free(tok->ifs); |
tok_free((ptr_t)tok); |
tok_free(tok); |
return NULL; |
return NULL; |
} |
} |
tok->argv[0] = NULL; |
tok->argv[0] = NULL; |
tok->wspace = (char *) tok_malloc(WINCR); |
tok->wspace = tok_malloc(WINCR * sizeof(*tok->wspace)); |
if (tok->wspace == NULL) { |
if (tok->wspace == NULL) { |
tok_free((ptr_t)tok->argv); |
tok_free(tok->argv); |
tok_free((ptr_t)tok->ifs); |
tok_free(tok->ifs); |
tok_free((ptr_t)tok); |
tok_free(tok); |
return NULL; |
return NULL; |
} |
} |
tok->wmax = tok->wspace + WINCR; |
tok->wmax = tok->wspace + WINCR; |
Line 135 tok_init(const char *ifs) |
|
Line 152 tok_init(const char *ifs) |
|
tok->flags = 0; |
tok->flags = 0; |
tok->quote = Q_none; |
tok->quote = Q_none; |
|
|
return (tok); |
return tok; |
} |
} |
|
|
|
|
/* tok_reset(): |
/* FUN(tok,reset)(): |
* Reset the tokenizer |
* Reset the tokenizer |
*/ |
*/ |
public void |
public void |
tok_reset(Tokenizer *tok) |
FUN(tok,reset)(TYPE(Tokenizer) *tok) |
{ |
{ |
|
|
tok->argc = 0; |
tok->argc = 0; |
Line 154 tok_reset(Tokenizer *tok) |
|
Line 171 tok_reset(Tokenizer *tok) |
|
} |
} |
|
|
|
|
/* tok_end(): |
/* FUN(tok,end)(): |
* Clean up |
* Clean up |
*/ |
*/ |
public void |
public void |
tok_end(Tokenizer *tok) |
FUN(tok,end)(TYPE(Tokenizer) *tok) |
{ |
{ |
|
|
tok_free((ptr_t) tok->ifs); |
tok_free(tok->ifs); |
tok_free((ptr_t) tok->wspace); |
tok_free(tok->wspace); |
tok_free((ptr_t) tok->argv); |
tok_free(tok->argv); |
tok_free((ptr_t) tok); |
tok_free(tok); |
} |
} |
|
|
|
|
|
|
/* tok_line(): |
/* FUN(tok,line)(): |
* Bourne shell like tokenizing |
* Bourne shell (sh(1)) like tokenizing |
* Return: |
* Arguments: |
* -1: Internal error |
* tok current tokenizer state (setup with FUN(tok,init)()) |
* 3: Quoted return |
* line line to parse |
* 2: Unmatched double quote |
* Returns: |
* 1: Unmatched single quote |
* -1 Internal error |
* 0: Ok |
* 3 Quoted return |
|
* 2 Unmatched double quote |
|
* 1 Unmatched single quote |
|
* 0 Ok |
|
* Modifies (if return value is 0): |
|
* argc number of arguments |
|
* argv argument array |
|
* cursorc if !NULL, argv element containing cursor |
|
* cursorv if !NULL, offset in argv[cursorc] of cursor |
*/ |
*/ |
public int |
public int |
tok_line(Tokenizer *tok, const char *line, int *argc, const char ***argv) |
FUN(tok,line)(TYPE(Tokenizer) *tok, const TYPE(LineInfo) *line, |
|
int *argc, const Char ***argv, int *cursorc, int *cursoro) |
{ |
{ |
const char *ptr; |
const Char *ptr; |
|
int cc, co; |
|
|
for (;;) { |
cc = co = -1; |
switch (*(ptr = line++)) { |
ptr = line->buffer; |
|
for (ptr = line->buffer; ;ptr++) { |
|
if (ptr >= line->lastchar) |
|
ptr = STR(""); |
|
if (ptr == line->cursor) { |
|
cc = (int)tok->argc; |
|
co = (int)(tok->wptr - tok->wstart); |
|
} |
|
switch (*ptr) { |
case '\'': |
case '\'': |
tok->flags |= TOK_KEEP; |
tok->flags |= TOK_KEEP; |
tok->flags &= ~TOK_EAT; |
tok->flags &= ~TOK_EAT; |
Line 213 tok_line(Tokenizer *tok, const char *lin |
|
Line 248 tok_line(Tokenizer *tok, const char *lin |
|
break; |
break; |
|
|
default: |
default: |
return (-1); |
return -1; |
} |
} |
break; |
break; |
|
|
Line 244 tok_line(Tokenizer *tok, const char *lin |
|
Line 279 tok_line(Tokenizer *tok, const char *lin |
|
break; |
break; |
|
|
default: |
default: |
return (-1); |
return -1; |
} |
} |
break; |
break; |
|
|
Line 275 tok_line(Tokenizer *tok, const char *lin |
|
Line 310 tok_line(Tokenizer *tok, const char *lin |
|
break; |
break; |
|
|
default: |
default: |
return (-1); |
return -1; |
} |
} |
break; |
break; |
|
|
Line 283 tok_line(Tokenizer *tok, const char *lin |
|
Line 318 tok_line(Tokenizer *tok, const char *lin |
|
tok->flags &= ~TOK_EAT; |
tok->flags &= ~TOK_EAT; |
switch (tok->quote) { |
switch (tok->quote) { |
case Q_none: |
case Q_none: |
tok_finish(tok); |
goto tok_line_outok; |
*argv = (const char **)tok->argv; |
|
*argc = tok->argc; |
|
return (0); |
|
|
|
case Q_single: |
case Q_single: |
case Q_double: |
case Q_double: |
Line 304 tok_line(Tokenizer *tok, const char *lin |
|
Line 336 tok_line(Tokenizer *tok, const char *lin |
|
break; |
break; |
|
|
default: |
default: |
return (0); |
return 0; |
} |
} |
break; |
break; |
|
|
Line 314 tok_line(Tokenizer *tok, const char *lin |
|
Line 346 tok_line(Tokenizer *tok, const char *lin |
|
/* Finish word and return */ |
/* Finish word and return */ |
if (tok->flags & TOK_EAT) { |
if (tok->flags & TOK_EAT) { |
tok->flags &= ~TOK_EAT; |
tok->flags &= ~TOK_EAT; |
return (3); |
return 3; |
} |
} |
tok_finish(tok); |
goto tok_line_outok; |
*argv = (const char **)tok->argv; |
|
*argc = tok->argc; |
|
return (0); |
|
|
|
case Q_single: |
case Q_single: |
return (1); |
return 1; |
|
|
case Q_double: |
case Q_double: |
return (2); |
return 2; |
|
|
case Q_doubleone: |
case Q_doubleone: |
tok->quote = Q_double; |
tok->quote = Q_double; |
Line 338 tok_line(Tokenizer *tok, const char *lin |
|
Line 367 tok_line(Tokenizer *tok, const char *lin |
|
break; |
break; |
|
|
default: |
default: |
return (-1); |
return -1; |
} |
} |
break; |
break; |
|
|
Line 346 tok_line(Tokenizer *tok, const char *lin |
|
Line 375 tok_line(Tokenizer *tok, const char *lin |
|
tok->flags &= ~TOK_EAT; |
tok->flags &= ~TOK_EAT; |
switch (tok->quote) { |
switch (tok->quote) { |
case Q_none: |
case Q_none: |
if (strchr(tok->ifs, *ptr) != NULL) |
if (Strchr(tok->ifs, *ptr) != NULL) |
tok_finish(tok); |
FUN(tok,finish)(tok); |
else |
else |
*tok->wptr++ = *ptr; |
*tok->wptr++ = *ptr; |
break; |
break; |
Line 370 tok_line(Tokenizer *tok, const char *lin |
|
Line 399 tok_line(Tokenizer *tok, const char *lin |
|
break; |
break; |
|
|
default: |
default: |
return (-1); |
return -1; |
|
|
} |
} |
break; |
break; |
} |
} |
|
|
if (tok->wptr >= tok->wmax - 4) { |
if (tok->wptr >= tok->wmax - 4) { |
size_t size = tok->wmax - tok->wspace + WINCR; |
size_t size = (size_t)(tok->wmax - tok->wspace + WINCR); |
char *s = (char *) tok_realloc(tok->wspace, size); |
Char *s = tok_realloc(tok->wspace, |
|
size * sizeof(*s)); |
if (s == NULL) |
if (s == NULL) |
return (-1); |
return -1; |
|
|
if (s != tok->wspace) { |
if (s != tok->wspace) { |
int i; |
size_t i; |
for (i = 0; i < tok->argc; i++) { |
for (i = 0; i < tok->argc; i++) { |
tok->argv[i] = |
tok->argv[i] = |
(tok->argv[i] - tok->wspace) + s; |
(tok->argv[i] - tok->wspace) + s; |
Line 395 tok_line(Tokenizer *tok, const char *lin |
|
Line 425 tok_line(Tokenizer *tok, const char *lin |
|
tok->wmax = s + size; |
tok->wmax = s + size; |
} |
} |
if (tok->argc >= tok->amax - 4) { |
if (tok->argc >= tok->amax - 4) { |
char **p; |
Char **p; |
tok->amax += AINCR; |
tok->amax += AINCR; |
p = (char **) tok_realloc(tok->argv, |
p = tok_realloc(tok->argv, tok->amax * sizeof(*p)); |
tok->amax * sizeof(char *)); |
if (p == NULL) { |
if (p == NULL) |
tok->amax -= AINCR; |
return (-1); |
return -1; |
|
} |
tok->argv = p; |
tok->argv = p; |
} |
} |
} |
} |
|
tok_line_outok: |
|
if (cc == -1 && co == -1) { |
|
cc = (int)tok->argc; |
|
co = (int)(tok->wptr - tok->wstart); |
|
} |
|
if (cursorc != NULL) |
|
*cursorc = cc; |
|
if (cursoro != NULL) |
|
*cursoro = co; |
|
FUN(tok,finish)(tok); |
|
*argv = (const Char **)tok->argv; |
|
*argc = (int)tok->argc; |
|
return 0; |
|
} |
|
|
|
/* FUN(tok,str)(): |
|
* Simpler version of tok_line, taking a NUL terminated line |
|
* and splitting into words, ignoring cursor state. |
|
*/ |
|
public int |
|
FUN(tok,str)(TYPE(Tokenizer) *tok, const Char *line, int *argc, |
|
const Char ***argv) |
|
{ |
|
TYPE(LineInfo) li; |
|
|
|
memset(&li, 0, sizeof(li)); |
|
li.buffer = line; |
|
li.cursor = li.lastchar = Strchr(line, '\0'); |
|
return FUN(tok,line)(tok, &li, argc, argv, NULL, NULL); |
} |
} |