[BACK]Return to tre-filter.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / external / bsd / tre / dist / lib

File: [cvs.NetBSD.org] / src / external / bsd / tre / dist / lib / tre-filter.c (download)

Revision 1.1.1.1 (vendor branch), Fri Nov 17 16:11:12 2017 UTC (2 years, 8 months ago) by rin
Branch: MAIN, LAURIKARI
CVS Tags: tre-20171117, phil-wifi-base, phil-wifi-20200421, phil-wifi-20200411, phil-wifi-20200406, phil-wifi-20191119, phil-wifi-20190609, phil-wifi, pgoyette-compat-merge-20190127, pgoyette-compat-base, pgoyette-compat-20190127, pgoyette-compat-20190118, pgoyette-compat-1226, pgoyette-compat-1126, pgoyette-compat-1020, pgoyette-compat-0930, pgoyette-compat-0906, pgoyette-compat-0728, pgoyette-compat-0625, pgoyette-compat-0521, pgoyette-compat-0502, pgoyette-compat-0422, pgoyette-compat-0415, pgoyette-compat-0407, pgoyette-compat-0330, pgoyette-compat-0322, pgoyette-compat-0315, pgoyette-compat, netbsd-9-base, netbsd-9-0-RELEASE, netbsd-9-0-RC2, netbsd-9-0-RC1, netbsd-9, is-mlppp-base, is-mlppp, HEAD
Changes since 1.1: +0 -0 lines

Import tre from https://github.com/laurikari/tre as of 10171117:

- tre_reg*b() functions are added, that take bytes literally.
- minor bug fixes

/*
  tre-filter.c: Histogram filter to quickly find regexp match candidates

  This software is released under a BSD-style license.
  See the file LICENSE for details and copyright.

*/

/* The idea of this filter is quite simple.  First, let's assume the
   search pattern is a simple string.  In order for a substring of a
   longer string to match the search pattern, it must have the same
   numbers of different characters as the pattern, and those
   characters must occur in the same order as they occur in pattern. */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <stdio.h>
#include "tre-internal.h"
#include "tre-filter.h"

int
tre_filter_find(const unsigned char *str, size_t len, tre_filter_t *filter)
{
  unsigned short counts[256];
  unsigned int i;
  unsigned int window_len = filter->window_len;
  tre_filter_profile_t *profile = filter->profile;
  const unsigned char *str_orig = str;

  DPRINT(("tre_filter_find: %.*s\n", len, str));

  for (i = 0; i < elementsof(counts); i++)
    counts[i] = 0;

  i = 0;
  while (*str && i < window_len && i < len)
    {
      counts[*str]++;
      i++;
      str++;
      len--;
    }

  while (len > 0)
    {
      tre_filter_profile_t *p;
      counts[*str]++;
      counts[*(str - window_len)]--;

      p = profile;
      while (p->ch)
	{
	  if (counts[p->ch] < p->count)
	    break;
	  p++;
	}
      if (!p->ch)
	{
	  DPRINT(("Found possible match at %d\n",
		  str - str_orig));
	  return str - str_orig;
	}
      else
	{
	  DPRINT(("No match so far...\n"));
	}
      len--;
      str++;
    }
  DPRINT(("This string cannot match.\n"));
  return -1;
}