[BACK]Return to regexec.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / external / bsd / tre / dist / lib

File: [cvs.NetBSD.org] / src / external / bsd / tre / dist / lib / regexec.c (download)

Revision 1.10, Sun Nov 19 13:00:31 2017 UTC (2 years, 8 months ago) by rin
Branch: MAIN
CVS Tags: phil-wifi-base, phil-wifi-20200421, phil-wifi-20200411, phil-wifi-20200406, phil-wifi-20191119, phil-wifi-20190609, phil-wifi, pgoyette-compat-merge-20190127, pgoyette-compat-base, pgoyette-compat-20190127, pgoyette-compat-20190118, pgoyette-compat-1226, pgoyette-compat-1126, pgoyette-compat-1020, pgoyette-compat-0930, pgoyette-compat-0906, pgoyette-compat-0728, pgoyette-compat-0625, pgoyette-compat-0521, pgoyette-compat-0502, pgoyette-compat-0422, pgoyette-compat-0415, pgoyette-compat-0407, pgoyette-compat-0330, pgoyette-compat-0322, pgoyette-compat-0315, pgoyette-compat, netbsd-9-base, netbsd-9-0-RELEASE, netbsd-9-0-RC2, netbsd-9-0-RC1, netbsd-9, is-mlppp-base, is-mlppp, HEAD
Changes since 1.9: +14 -4 lines

nmatch and pmatch should be ignored when regex was compiled with REG_NOSUB,
partially taken from musl libc:
https://git.musl-libc.org/cgit/musl/commit/src/regex/regexec.c?id=72ed3d47e567b1635a35d3c1d174c8a8b2787e30

/*
  tre_regexec.c - TRE POSIX compatible matching functions (and more).

  This software is released under a BSD-style license.
  See the file LICENSE for details and copyright.

*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */

#include "tre-alloca.h"

#include <assert.h>
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_WCHAR_H
#include <wchar.h>
#endif /* HAVE_WCHAR_H */
#ifdef HAVE_WCTYPE_H
#include <wctype.h>
#endif /* HAVE_WCTYPE_H */
#ifndef TRE_WCHAR
#include <ctype.h>
#endif /* !TRE_WCHAR */
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif /* HAVE_MALLOC_H */
#include <limits.h>

#include "tre-internal.h"
#include "tre.h"
#include "xmalloc.h"

/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
   endpoint values. */
void
tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
		const tre_tnfa_t *tnfa, int *tags, int match_eo)
{
  tre_submatch_data_t *submatch_data;
  unsigned int i, j;
  int *parents;

  if (cflags & REG_NOSUB)
    return;

  i = 0;
  if (match_eo >= 0)
    {
      /* Construct submatch offsets from the tags. */
      DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo));
      submatch_data = tnfa->submatch_data;
      while (i < tnfa->num_submatches && i < nmatch)
	{
	  if (submatch_data[i].so_tag == tnfa->end_tag)
	    pmatch[i].rm_so = match_eo;
	  else
	    pmatch[i].rm_so = tags[submatch_data[i].so_tag];

	  if (submatch_data[i].eo_tag == tnfa->end_tag)
	    pmatch[i].rm_eo = match_eo;
	  else
	    pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];

	  /* If either of the endpoints were not used, this submatch
	     was not part of the match. */
	  if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
	    pmatch[i].rm_so = pmatch[i].rm_eo = -1;

	  DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i,
		  submatch_data[i].so_tag, pmatch[i].rm_so,
		  submatch_data[i].eo_tag, pmatch[i].rm_eo));
	  i++;
	}
      /* Reset all submatches that are not within all of their parent
	 submatches. */
      i = 0;
      while (i < tnfa->num_submatches && i < nmatch)
	{
	  if (pmatch[i].rm_eo == -1)
	    assert(pmatch[i].rm_so == -1);
	  assert(pmatch[i].rm_so <= pmatch[i].rm_eo);

	  parents = submatch_data[i].parents;
	  if (parents != NULL)
	    for (j = 0; parents[j] >= 0; j++)
	      {
		DPRINT(("pmatch[%d] parent %d\n", i, parents[j]));
		if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
		    || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
		  pmatch[i].rm_so = pmatch[i].rm_eo = -1;
	      }
	  i++;
	}
    }

  while (i < nmatch)
    {
      pmatch[i].rm_so = -1;
      pmatch[i].rm_eo = -1;
      i++;
    }
}


/*
  Wrapper functions for POSIX compatible regexp matching.
*/

int
tre_have_backrefs(const regex_t *preg)
{
  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
  return tnfa->have_backrefs;
}

int
tre_have_approx(const regex_t *preg)
{
  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
  return tnfa->have_approx;
}

static int
tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len,
	  tre_str_type_t type, size_t nmatch, regmatch_t pmatch[],
	  int eflags)
{
  reg_errcode_t status;
  int *tags = NULL, eo;
  if (tnfa->cflags & REG_NOSUB) nmatch = 0;
  if (tnfa->num_tags > 0 && nmatch > 0)
    {
#ifdef TRE_USE_ALLOCA
      tags = alloca(sizeof(*tags) * tnfa->num_tags);
#else /* !TRE_USE_ALLOCA */
      tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
#endif /* !TRE_USE_ALLOCA */
      if (tags == NULL)
	return REG_ESPACE;
    }

  /* Dispatch to the appropriate matcher. */
  if (tnfa->have_backrefs || eflags & REG_BACKTRACKING_MATCHER)
    {
      /* The regex has back references, use the backtracking matcher. */
      if (type == STR_USER)
	{
	  const tre_str_source *source = string;
	  if (source->rewind == NULL || source->compare == NULL)
	    /* The backtracking matcher requires rewind and compare
	       capabilities from the input stream. */
	    return REG_BADPAT;
	}
      status = tre_tnfa_run_backtrack(tnfa, string, (int)len, type,
				      tags, eflags, &eo);
    }
#ifdef TRE_APPROX
  else if (tnfa->have_approx || eflags & REG_APPROX_MATCHER)
    {
      /* The regex uses approximate matching, use the approximate matcher. */
      regamatch_t match;
      regaparams_t params;
      tre_regaparams_default(&params);
      params.max_err = 0;
      params.max_cost = 0;
      status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags,
				   &match, params, eflags, &eo);
    }
#endif /* TRE_APPROX */
  else
    {
      /* Exact matching, no back references, use the parallel matcher. */
      status = tre_tnfa_run_parallel(tnfa, string, (int)len, type,
				     tags, eflags, &eo);
    }

  if (status == REG_OK)
    /* A match was found, so fill the submatch registers. */
    tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
#ifndef TRE_USE_ALLOCA
  if (tags)
    xfree(tags);
#endif /* !TRE_USE_ALLOCA */
  return status;
}

int
tre_regnexec(const regex_t *preg, const char *str, size_t len,
	 size_t nmatch, regmatch_t pmatch[], int eflags)
{
  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
  /* CONSTCOND */
  tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;

  return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags);
}

int
tre_regexec(const regex_t *preg, const char *str,
	size_t nmatch, regmatch_t pmatch[], int eflags)
{
	size_t shift, len, i;
	int ret;
	tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
	regmatch_t *p;

	if (eflags & REG_STARTEND) {
		if (pmatch == NULL || pmatch->rm_so < 0 || pmatch->rm_eo < 0
		    || pmatch->rm_so > pmatch->rm_eo)
			return REG_INVARG;
		str += shift = pmatch->rm_so;
		len = pmatch->rm_eo - pmatch->rm_so;
		eflags &= ~REG_STARTEND;
	} else {
		shift = 0;
		len = (size_t)-1;
	}

	ret = tre_regnexec(preg, str, len, nmatch, pmatch, eflags);

	if (!ret && !(tnfa->cflags & REG_NOSUB) && len != (size_t)-1) {
		for (i = nmatch, p = pmatch; i > 0; p++, i--) {
			if (p->rm_so >= 0) p->rm_so += shift;
			if (p->rm_eo >= 0) p->rm_eo += shift;
		}
	}

	return ret;
}

#ifdef REG_USEBYTES
int
tre_regexecb(const regex_t *preg, const char *str,
        size_t nmatch, regmatch_t pmatch[], int eflags)
{
  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;

  return tre_match(tnfa, str, (unsigned)-1, STR_BYTE, nmatch, pmatch, eflags);
}

int
tre_regnexecb(const regex_t *preg, const char *str, size_t len,
        size_t nmatch, regmatch_t pmatch[], int eflags)
{
  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;

  return tre_match(tnfa, str, len, STR_BYTE, nmatch, pmatch, eflags);
}
#endif /* REG_USEBYTES */


#ifdef TRE_WCHAR

int
tre_regwnexec(const regex_t *preg, const wchar_t *str, size_t len,
	  size_t nmatch, regmatch_t pmatch[], int eflags)
{
  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
  return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags);
}

int
tre_regwexec(const regex_t *preg, const wchar_t *str,
	 size_t nmatch, regmatch_t pmatch[], int eflags)
{
	size_t shift, len, i;
	int ret;
	tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
	regmatch_t *p;

	if (eflags & REG_STARTEND) {
		if (pmatch == NULL || pmatch->rm_so < 0 || pmatch->rm_eo < 0
		    || pmatch->rm_so > pmatch->rm_eo)
			return REG_INVARG;
		str += shift = pmatch->rm_so;
		len = pmatch->rm_eo - pmatch->rm_so;
		eflags &= ~REG_STARTEND;
	} else {
		shift = 0;
		len = (size_t)-1;
	}

	ret = tre_regwnexec(preg, str, len, nmatch, pmatch, eflags);

	if (!ret && !(tnfa->cflags & REG_NOSUB) && len != (size_t)-1) {
		for (i = nmatch, p = pmatch; i > 0; p++, i--) {
			if (p->rm_so >= 0) p->rm_so += shift;
			if (p->rm_eo >= 0) p->rm_eo += shift;
		}
	}

	return ret;
}

#endif /* TRE_WCHAR */

int
tre_reguexec(const regex_t *preg, const tre_str_source *str,
	 size_t nmatch, regmatch_t pmatch[], int eflags)
{
  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
  return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
}


#ifdef TRE_APPROX

/*
  Wrapper functions for approximate regexp matching.
*/

static int
tre_match_approx(const tre_tnfa_t *tnfa, const void *string, size_t len,
		 tre_str_type_t type, regamatch_t *match, regaparams_t params,
		 int eflags)
{
  reg_errcode_t status;
  int *tags = NULL, eo;
  size_t nmatch;

  if (tnfa->cflags & REG_NOSUB)
    nmatch = 0;
  else
    nmatch = match->nmatch;

  /* If the regexp does not use approximate matching features, the
     maximum cost is zero, and the approximate matcher isn't forced,
     use the exact matcher instead. */
  if (params.max_cost == 0 && !tnfa->have_approx
      && !(eflags & REG_APPROX_MATCHER))
    return tre_match(tnfa, string, len, type, nmatch, match->pmatch,
		     eflags);

  /* Back references are not supported by the approximate matcher. */
  if (tnfa->have_backrefs)
    return REG_BADPAT;

  if (tnfa->num_tags > 0 && nmatch > 0)
    {
#if TRE_USE_ALLOCA
      tags = alloca(sizeof(*tags) * tnfa->num_tags);
#else /* !TRE_USE_ALLOCA */
      tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
#endif /* !TRE_USE_ALLOCA */
      if (tags == NULL)
	return REG_ESPACE;
    }
  status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags,
			       match, params, eflags, &eo);
  if (status == REG_OK)
    tre_fill_pmatch(nmatch, match->pmatch, tnfa->cflags, tnfa, tags, eo);
#ifndef TRE_USE_ALLOCA
  if (tags)
    xfree(tags);
#endif /* !TRE_USE_ALLOCA */
  return status;
}

int
tre_reganexec(const regex_t *preg, const char *str, size_t len,
	  regamatch_t *match, regaparams_t params, int eflags)
{
  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
  tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;

  return tre_match_approx(tnfa, str, len, type, match, params, eflags);
}

int
tre_regaexec(const regex_t *preg, const char *str,
	 regamatch_t *match, regaparams_t params, int eflags)
{
  return tre_reganexec(preg, str, (unsigned)-1, match, params, eflags);
}

#ifdef REG_USEBYTES
int
tre_regaexecb(const regex_t *preg, const char *str,
          regamatch_t *match, regaparams_t params, int eflags)
{
  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;

  return tre_match_approx(tnfa, str, (unsigned)-1, STR_BYTE,
                          match, params, eflags);
}
#endif /* REG_USEBYTES */

#ifdef TRE_WCHAR

int
tre_regawnexec(const regex_t *preg, const wchar_t *str, size_t len,
	   regamatch_t *match, regaparams_t params, int eflags)
{
  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
  return tre_match_approx(tnfa, str, len, STR_WIDE,
			  match, params, eflags);
}

int
tre_regawexec(const regex_t *preg, const wchar_t *str,
	  regamatch_t *match, regaparams_t params, int eflags)
{
  return tre_regawnexec(preg, str, (unsigned)-1, match, params, eflags);
}

#endif /* TRE_WCHAR */

void
tre_regaparams_default(regaparams_t *params)
{
  memset(params, 0, sizeof(*params));
  params->cost_ins = 1;
  params->cost_del = 1;
  params->cost_subst = 1;
  params->max_cost = INT_MAX;
  params->max_ins = INT_MAX;
  params->max_del = INT_MAX;
  params->max_subst = INT_MAX;
  params->max_err = INT_MAX;
}

#endif /* TRE_APPROX */

/* EOF */