version 1.29, 2009/02/12 05:06:54 |
version 1.30, 2011/10/09 18:23:00 |
|
|
#endif |
#endif |
|
|
/* === regcomp.c === */ |
/* === regcomp.c === */ |
static void p_ere(struct parse *p, int stop); |
static void p_ere(struct parse *p, int stop, size_t reclimit); |
static void p_ere_exp(struct parse *p); |
static void p_ere_exp(struct parse *p, size_t reclimit); |
static void p_str(struct parse *p); |
static void p_str(struct parse *p); |
static void p_bre(struct parse *p, int end1, int end2); |
static void p_bre(struct parse *p, int end1, int end2, size_t reclimit); |
static int p_simp_re(struct parse *p, int starordinary); |
static int p_simp_re(struct parse *p, int starordinary, size_t reclimit); |
static int p_count(struct parse *p); |
static int p_count(struct parse *p); |
static void p_bracket(struct parse *p); |
static void p_bracket(struct parse *p); |
static void p_b_term(struct parse *p, cset *cs); |
static void p_b_term(struct parse *p, cset *cs); |
Line 141 static int othercase(int ch); |
|
Line 141 static int othercase(int ch); |
|
static void bothcases(struct parse *p, int ch); |
static void bothcases(struct parse *p, int ch); |
static void ordinary(struct parse *p, int ch); |
static void ordinary(struct parse *p, int ch); |
static void nonnewline(struct parse *p); |
static void nonnewline(struct parse *p); |
static void repeat(struct parse *p, sopno start, int from, int to); |
static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit); |
static int seterr(struct parse *p, int e); |
static int seterr(struct parse *p, int e); |
static cset *allocset(struct parse *p); |
static cset *allocset(struct parse *p); |
static void freeset(struct parse *p, cset *cs); |
static void freeset(struct parse *p, cset *cs); |
Line 163 static sopno dupl(struct parse *p, sopno |
|
Line 163 static sopno dupl(struct parse *p, sopno |
|
static void doemit(struct parse *p, sop op, sopno opnd); |
static void doemit(struct parse *p, sop op, sopno opnd); |
static void doinsert(struct parse *p, sop op, sopno opnd, sopno pos); |
static void doinsert(struct parse *p, sop op, sopno opnd, sopno pos); |
static void dofwd(struct parse *p, sopno pos, sopno value); |
static void dofwd(struct parse *p, sopno pos, sopno value); |
static void enlarge(struct parse *p, sopno size); |
static int enlarge(struct parse *p, sopno size); |
static void stripsnug(struct parse *p, struct re_guts *g); |
static void stripsnug(struct parse *p, struct re_guts *g); |
static void findmust(struct parse *p, struct re_guts *g); |
static void findmust(struct parse *p, struct re_guts *g); |
static sopno pluscount(struct parse *p, struct re_guts *g); |
static sopno pluscount(struct parse *p, struct re_guts *g); |
Line 211 static int never = 0; /* for use in ass |
|
Line 211 static int never = 0; /* for use in ass |
|
#define never 0 /* some <assert.h>s have bugs too */ |
#define never 0 /* some <assert.h>s have bugs too */ |
#endif |
#endif |
|
|
|
#define MEMLIMIT 0x8000000 |
|
#define MEMSIZE(p) \ |
|
((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \ |
|
(p)->ncsalloc * sizeof(cset) + \ |
|
(p)->ssize * sizeof(sop)) |
|
#define RECLIMIT 256 |
|
|
/* |
/* |
- regcomp - interface for parser and compilation |
- regcomp - interface for parser and compilation |
= extern int regcomp(regex_t *, const char *, int); |
= extern int regcomp(regex_t *, const char *, int); |
|
|
if (g == NULL) |
if (g == NULL) |
return(REG_ESPACE); |
return(REG_ESPACE); |
p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ |
p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ |
p->strip = (sop *)malloc(p->ssize * sizeof(sop)); |
p->strip = malloc(p->ssize * sizeof(sop)); |
p->slen = 0; |
p->slen = 0; |
if (p->strip == NULL) { |
if (p->strip == NULL) { |
free(g); |
free(g); |
|
|
EMIT(OEND, 0); |
EMIT(OEND, 0); |
g->firststate = THERE(); |
g->firststate = THERE(); |
if (cflags®_EXTENDED) |
if (cflags®_EXTENDED) |
p_ere(p, OUT); |
p_ere(p, OUT, 0); |
else if (cflags®_NOSPEC) |
else if (cflags®_NOSPEC) |
p_str(p); |
p_str(p); |
else |
else |
p_bre(p, OUT, OUT); |
p_bre(p, OUT, OUT, 0); |
EMIT(OEND, 0); |
EMIT(OEND, 0); |
g->laststate = THERE(); |
g->laststate = THERE(); |
|
|
|
|
|
|
/* |
/* |
- p_ere - ERE parser top level, concatenation and alternation |
- p_ere - ERE parser top level, concatenation and alternation |
== static void p_ere(struct parse *p, int stop); |
== static void p_ere(struct parse *p, int stop, size_t reclimit); |
*/ |
*/ |
static void |
static void |
p_ere( |
p_ere( |
struct parse *p, |
struct parse *p, |
int stop) /* character this ERE should end at */ |
int stop, /* character this ERE should end at */ |
|
size_t reclimit) |
{ |
{ |
char c; |
char c; |
sopno prevback = 0; /* pacify gcc */ |
sopno prevback = 0; /* pacify gcc */ |
|
|
|
|
_DIAGASSERT(p != NULL); |
_DIAGASSERT(p != NULL); |
|
|
|
if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { |
|
p->error = REG_ESPACE; |
|
return; |
|
} |
|
|
for (;;) { |
for (;;) { |
/* do a bunch of concatenated expressions */ |
/* do a bunch of concatenated expressions */ |
conc = HERE(); |
conc = HERE(); |
while (MORE() && (c = PEEK()) != '|' && c != stop) |
while (MORE() && (c = PEEK()) != '|' && c != stop) |
p_ere_exp(p); |
p_ere_exp(p, reclimit); |
REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ |
REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ |
|
|
if (!EAT('|')) |
if (!EAT('|')) |
|
|
|
|
/* |
/* |
- p_ere_exp - parse one subERE, an atom possibly followed by a repetition op |
- p_ere_exp - parse one subERE, an atom possibly followed by a repetition op |
== static void p_ere_exp(struct parse *p); |
== static void p_ere_exp(struct parse *p, size_t reclimit); |
*/ |
*/ |
static void |
static void |
p_ere_exp( |
p_ere_exp( |
struct parse *p) |
struct parse *p, |
|
size_t reclimit) |
{ |
{ |
char c; |
char c; |
sopno pos; |
sopno pos; |
|
|
p->pbegin[subno] = HERE(); |
p->pbegin[subno] = HERE(); |
EMIT(OLPAREN, subno); |
EMIT(OLPAREN, subno); |
if (!SEE(')')) |
if (!SEE(')')) |
p_ere(p, ')'); |
p_ere(p, ')', reclimit); |
if (subno < NPAREN) { |
if (subno < NPAREN) { |
p->pend[subno] = HERE(); |
p->pend[subno] = HERE(); |
assert(p->pend[subno] != 0); |
assert(p->pend[subno] != 0); |
|
|
count2 = INFINITY; |
count2 = INFINITY; |
} else /* just a single number */ |
} else /* just a single number */ |
count2 = count; |
count2 = count; |
repeat(p, pos, count, count2); |
repeat(p, pos, count, count2, 0); |
if (!EAT('}')) { /* error heuristics */ |
if (!EAT('}')) { /* error heuristics */ |
while (MORE() && PEEK() != '}') |
while (MORE() && PEEK() != '}') |
NEXT(); |
NEXT(); |
|
|
/* |
/* |
- p_bre - BRE parser top level, anchoring and concatenation |
- p_bre - BRE parser top level, anchoring and concatenation |
== static void p_bre(struct parse *p, int end1, \ |
== static void p_bre(struct parse *p, int end1, \ |
== int end2); |
== int end2, size_t reclimit); |
* Giving end1 as OUT essentially eliminates the end1/end2 check. |
* Giving end1 as OUT essentially eliminates the end1/end2 check. |
* |
* |
* This implementation is a bit of a kludge, in that a trailing $ is first |
* This implementation is a bit of a kludge, in that a trailing $ is first |
|
|
p_bre( |
p_bre( |
struct parse *p, |
struct parse *p, |
int end1, /* first terminating character */ |
int end1, /* first terminating character */ |
int end2) /* second terminating character */ |
int end2, /* second terminating character */ |
|
size_t reclimit) |
{ |
{ |
sopno start; |
sopno start; |
int first = 1; /* first subexpression? */ |
int first = 1; /* first subexpression? */ |
|
|
|
|
_DIAGASSERT(p != NULL); |
_DIAGASSERT(p != NULL); |
|
|
|
if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { |
|
p->error = REG_ESPACE; |
|
return; |
|
} |
|
|
start = HERE(); |
start = HERE(); |
|
|
if (EAT('^')) { |
if (EAT('^')) { |
|
|
p->g->nbol++; |
p->g->nbol++; |
} |
} |
while (MORE() && !SEETWO(end1, end2)) { |
while (MORE() && !SEETWO(end1, end2)) { |
wasdollar = p_simp_re(p, first); |
wasdollar = p_simp_re(p, first, reclimit); |
first = 0; |
first = 0; |
} |
} |
if (wasdollar) { /* oops, that was a trailing anchor */ |
if (wasdollar) { /* oops, that was a trailing anchor */ |
|
|
|
|
/* |
/* |
- p_simp_re - parse a simple RE, an atom possibly followed by a repetition |
- p_simp_re - parse a simple RE, an atom possibly followed by a repetition |
== static int p_simp_re(struct parse *p, int starordinary); |
== static int p_simp_re(struct parse *p, int starordinary, size_t reclimit); |
*/ |
*/ |
static int /* was the simple RE an unbackslashed $? */ |
static int /* was the simple RE an unbackslashed $? */ |
p_simp_re( |
p_simp_re( |
struct parse *p, |
struct parse *p, |
int starordinary) /* is a leading * an ordinary character? */ |
int starordinary, /* is a leading * an ordinary character? */ |
|
size_t reclimit) |
{ |
{ |
int c; |
int c; |
int count; |
int count; |
|
|
EMIT(OLPAREN, subno); |
EMIT(OLPAREN, subno); |
/* the MORE here is an error heuristic */ |
/* the MORE here is an error heuristic */ |
if (MORE() && !SEETWO('\\', ')')) |
if (MORE() && !SEETWO('\\', ')')) |
p_bre(p, '\\', ')'); |
p_bre(p, '\\', ')', reclimit); |
if (subno < NPAREN) { |
if (subno < NPAREN) { |
p->pend[subno] = HERE(); |
p->pend[subno] = HERE(); |
assert(p->pend[subno] != 0); |
assert(p->pend[subno] != 0); |
|
|
count2 = INFINITY; |
count2 = INFINITY; |
} else /* just a single number */ |
} else /* just a single number */ |
count2 = count; |
count2 = count; |
repeat(p, pos, count, count2); |
repeat(p, pos, count, count2, 0); |
if (!EATTWO('\\', '}')) { /* error heuristics */ |
if (!EATTWO('\\', '}')) { /* error heuristics */ |
while (MORE() && !SEETWO('\\', '}')) |
while (MORE() && !SEETWO('\\', '}')) |
NEXT(); |
NEXT(); |
|
|
_DIAGASSERT(p != NULL); |
_DIAGASSERT(p != NULL); |
|
|
cs = allocset(p); |
cs = allocset(p); |
|
if (cs == NULL) |
|
return; |
|
|
/* Dept of Truly Sickening Special-Case Kludges */ |
/* Dept of Truly Sickening Special-Case Kludges */ |
if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", |
if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", |
|
|
|
|
/* |
/* |
- repeat - generate code for a bounded repetition, recursively if needed |
- repeat - generate code for a bounded repetition, recursively if needed |
== static void repeat(struct parse *p, sopno start, int from, int to); |
== static void repeat(struct parse *p, sopno start, int from, int to, |
|
== size_t reclimit); |
*/ |
*/ |
static void |
static void |
repeat( |
repeat( |
struct parse *p, |
struct parse *p, |
sopno start, /* operand from here to end of strip */ |
sopno start, /* operand from here to end of strip */ |
int from, /* repeated from this number */ |
int from, /* repeated from this number */ |
int to) /* to this number of times (maybe INFINITY) */ |
int to, /* to this number of times (maybe INFINITY) */ |
|
size_t reclimit) |
{ |
{ |
sopno finish; |
sopno finish; |
# define N 2 |
# define N 2 |
|
|
|
|
_DIAGASSERT(p != NULL); |
_DIAGASSERT(p != NULL); |
|
|
finish = HERE(); |
if (reclimit++ > RECLIMIT) |
|
p->error = REG_ESPACE; |
if (p->error != 0) /* head off possible runaway recursion */ |
if (p->error) |
return; |
return; |
|
|
|
finish = HERE(); |
|
|
assert(from <= to); |
assert(from <= to); |
|
|
switch (REP(MAP(from), MAP(to))) { |
switch (REP(MAP(from), MAP(to))) { |
|
|
case REP(0, INF): /* as x{1,}? */ |
case REP(0, INF): /* as x{1,}? */ |
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ |
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ |
INSERT(OCH_, start); /* offset is wrong... */ |
INSERT(OCH_, start); /* offset is wrong... */ |
repeat(p, start+1, 1, to); |
repeat(p, start+1, 1, to, reclimit); |
ASTERN(OOR1, start); |
ASTERN(OOR1, start); |
AHEAD(start); /* ... fix it */ |
AHEAD(start); /* ... fix it */ |
EMIT(OOR2, 0); |
EMIT(OOR2, 0); |
|
|
ASTERN(O_CH, THERETHERE()); |
ASTERN(O_CH, THERETHERE()); |
copy = dupl(p, start+1, finish+1); |
copy = dupl(p, start+1, finish+1); |
assert(copy == finish+4); |
assert(copy == finish+4); |
repeat(p, copy, 1, to-1); |
repeat(p, copy, 1, to-1, reclimit); |
break; |
break; |
case REP(1, INF): /* as x+ */ |
case REP(1, INF): /* as x+ */ |
INSERT(OPLUS_, start); |
INSERT(OPLUS_, start); |
|
|
break; |
break; |
case REP(N, N): /* as xx{m-1,n-1} */ |
case REP(N, N): /* as xx{m-1,n-1} */ |
copy = dupl(p, start, finish); |
copy = dupl(p, start, finish); |
repeat(p, copy, from-1, to-1); |
repeat(p, copy, from-1, to-1, reclimit); |
break; |
break; |
case REP(N, INF): /* as xx{n-1,INF} */ |
case REP(N, INF): /* as xx{n-1,INF} */ |
copy = dupl(p, start, finish); |
copy = dupl(p, start, finish); |
repeat(p, copy, from-1, to); |
repeat(p, copy, from-1, to, reclimit); |
break; |
break; |
default: /* "can't happen" */ |
default: /* "can't happen" */ |
SETERROR(REG_ASSERT); /* just in case */ |
SETERROR(REG_ASSERT); /* just in case */ |
|
|
nc = p->ncsalloc; |
nc = p->ncsalloc; |
assert(nc % CHAR_BIT == 0); |
assert(nc % CHAR_BIT == 0); |
nbytes = nc / CHAR_BIT * css; |
nbytes = nc / CHAR_BIT * css; |
|
if (MEMSIZE(p) > MEMLIMIT) |
|
goto oomem; |
if (p->g->sets == NULL) |
if (p->g->sets == NULL) |
p->g->sets = malloc(nc * sizeof(cset)); |
p->g->sets = malloc(nc * sizeof(cset)); |
else |
else |
|
|
(void) memset((char *)p->g->setbits + (nbytes - css), |
(void) memset((char *)p->g->setbits + (nbytes - css), |
0, css); |
0, css); |
else { |
else { |
|
oomem: |
no = 0; |
no = 0; |
SETERROR(REG_ESPACE); |
SETERROR(REG_ESPACE); |
/* caller's responsibility not to do set ops */ |
/* caller's responsibility not to do set ops */ |
|
return NULL; |
} |
} |
} |
} |
|
|
assert(p->g->sets != NULL); /* xxx */ |
|
cs = &p->g->sets[no]; |
cs = &p->g->sets[no]; |
cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); |
cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); |
cs->mask = 1 << ((no) % CHAR_BIT); |
cs->mask = 1 << ((no) % CHAR_BIT); |
|
|
assert(finish >= start); |
assert(finish >= start); |
if (len == 0) |
if (len == 0) |
return(ret); |
return(ret); |
enlarge(p, p->ssize + len); /* this many unexpected additions */ |
if (!enlarge(p, p->ssize + len))/* this many unexpected additions */ |
assert(p->ssize >= p->slen + len); |
return ret; |
(void)memcpy(p->strip + p->slen, p->strip + start, |
(void)memcpy(p->strip + p->slen, p->strip + start, |
(size_t)len * sizeof(sop)); |
(size_t)len * sizeof(sop)); |
p->slen += len; |
p->slen += len; |
|
|
|
|
/* deal with undersized strip */ |
/* deal with undersized strip */ |
if (p->slen >= p->ssize) |
if (p->slen >= p->ssize) |
enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ |
if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ |
assert(p->slen < p->ssize); |
return; |
|
|
/* finally, it's all reduced to the easy case */ |
/* finally, it's all reduced to the easy case */ |
p->strip[p->slen++] = SOP(op, opnd); |
p->strip[p->slen++] = SOP(op, opnd); |
|
|
- enlarge - enlarge the strip |
- enlarge - enlarge the strip |
== static void enlarge(struct parse *p, sopno size); |
== static void enlarge(struct parse *p, sopno size); |
*/ |
*/ |
static void |
static int |
enlarge( |
enlarge( |
struct parse *p, |
struct parse *p, |
sopno size) |
sopno size) |
{ |
{ |
sop *sp; |
sop *sp; |
|
sopno osize; |
|
|
_DIAGASSERT(p != NULL); |
_DIAGASSERT(p != NULL); |
|
|
if (p->ssize >= size) |
if (p->ssize >= size) |
return; |
return 1; |
|
|
sp = (sop *)realloc(p->strip, size*sizeof(sop)); |
osize = p->ssize; |
|
p->ssize = size; |
|
if (MEMSIZE(p) > MEMLIMIT) |
|
goto oomem; |
|
sp = realloc(p->strip, p->ssize * sizeof(sop)); |
if (sp == NULL) { |
if (sp == NULL) { |
|
oomem: |
|
p->ssize = osize; |
SETERROR(REG_ESPACE); |
SETERROR(REG_ESPACE); |
return; |
return 0; |
} |
} |
p->strip = sp; |
p->strip = sp; |
p->ssize = size; |
return 1; |
} |
} |
|
|
/* |
/* |