[BACK]Return to bench.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / external / bsd / tre / dist / tests

File: [cvs.NetBSD.org] / src / external / bsd / tre / dist / tests / bench.c (download)

Revision 1.1.1.1 (vendor branch), Thu Feb 25 07:33:19 2010 UTC (10 years, 5 months ago) by agc
Branch: MAIN, LAURIKARI
CVS Tags: yamt-pagecache-tag8, yamt-pagecache-base9, yamt-pagecache-base8, yamt-pagecache-base7, yamt-pagecache-base6, yamt-pagecache-base5, yamt-pagecache-base4, yamt-pagecache-base3, yamt-pagecache-base2, yamt-pagecache-base, yamt-pagecache, tre-20171117, tre-0-8-0-base, tls-maxphys-base, tls-maxphys, tls-earlyentropy-base, tls-earlyentropy, riastradh-xf86-video-intel-2-7-1-pre-2-21-15, riastradh-drm2-base3, riastradh-drm2-base2, riastradh-drm2-base1, riastradh-drm2-base, riastradh-drm2, prg-localcount2-base3, prg-localcount2-base2, prg-localcount2-base1, prg-localcount2-base, prg-localcount2, phil-wifi-base, phil-wifi-20200421, phil-wifi-20200411, phil-wifi-20200406, phil-wifi-20191119, phil-wifi-20190609, phil-wifi, pgoyette-localcount-base, pgoyette-localcount-20170426, pgoyette-localcount-20170320, pgoyette-localcount-20170107, pgoyette-localcount-20161104, pgoyette-localcount-20160806, pgoyette-localcount-20160726, pgoyette-localcount, pgoyette-compat-merge-20190127, pgoyette-compat-base, pgoyette-compat-20190127, pgoyette-compat-20190118, pgoyette-compat-1226, pgoyette-compat-1126, pgoyette-compat-1020, pgoyette-compat-0930, pgoyette-compat-0906, pgoyette-compat-0728, pgoyette-compat-0625, pgoyette-compat-0521, pgoyette-compat-0502, pgoyette-compat-0422, pgoyette-compat-0415, pgoyette-compat-0407, pgoyette-compat-0330, pgoyette-compat-0322, pgoyette-compat-0315, pgoyette-compat, perseant-stdc-iso10646-base, perseant-stdc-iso10646, netbsd-9-base, netbsd-9-0-RELEASE, netbsd-9-0-RC2, netbsd-9-0-RC1, netbsd-9, netbsd-8-base, netbsd-8-2-RELEASE, netbsd-8-1-RELEASE, netbsd-8-1-RC1, netbsd-8-0-RELEASE, netbsd-8-0-RC2, netbsd-8-0-RC1, netbsd-8, netbsd-7-nhusb-base-20170116, netbsd-7-nhusb-base, netbsd-7-nhusb, netbsd-7-base, netbsd-7-2-RELEASE, netbsd-7-1-RELEASE, netbsd-7-1-RC2, netbsd-7-1-RC1, netbsd-7-1-2-RELEASE, netbsd-7-1-1-RELEASE, netbsd-7-1, netbsd-7-0-RELEASE, netbsd-7-0-RC3, netbsd-7-0-RC2, netbsd-7-0-RC1, netbsd-7-0-2-RELEASE, netbsd-7-0-1-RELEASE, netbsd-7-0, netbsd-7, netbsd-6-base, netbsd-6-1-RELEASE, netbsd-6-1-RC4, netbsd-6-1-RC3, netbsd-6-1-RC2, netbsd-6-1-RC1, netbsd-6-1-5-RELEASE, netbsd-6-1-4-RELEASE, netbsd-6-1-3-RELEASE, netbsd-6-1-2-RELEASE, netbsd-6-1-1-RELEASE, netbsd-6-1, netbsd-6-0-RELEASE, netbsd-6-0-RC2, netbsd-6-0-RC1, netbsd-6-0-6-RELEASE, netbsd-6-0-5-RELEASE, netbsd-6-0-4-RELEASE, netbsd-6-0-3-RELEASE, netbsd-6-0-2-RELEASE, netbsd-6-0-1-RELEASE, netbsd-6-0, netbsd-6, matt-nb8-mediatek-base, matt-nb8-mediatek, matt-nb6-plus-nbase, matt-nb6-plus-base, matt-nb6-plus, matt-mips64-premerge-20101231, localcount-20160914, is-mlppp-base, is-mlppp, cherry-xenmp-base, cherry-xenmp, bouyer-socketcan-base1, bouyer-socketcan-base, bouyer-socketcan, bouyer-quota2-nbase, bouyer-quota2-base, bouyer-quota2, agc-symver-base, agc-symver, HEAD
Changes since 1.1: +0 -0 lines

initial import of tre-0.8.0 into the external sources framework.

with many thanks to ville laurikari for writing tre in the first place
and for changing the licensing to a 2-clause bsd license; thanks also to
matthias-christian ott for his work on the google summer of code 2009
project.

this import brings the distribution to src/external - the reachover
build files will follow

/*
  bench.c - simple regex benchmark program

  This software is released under a BSD-style license.
  See the file LICENSE for details and copyright.

*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */

#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif /* HAVE_GETOPT_H */
#include <time.h>
#include <unistd.h>
#include <math.h>
#include <sys/types.h>

#if 0
#include <hackerlab/rx-posix/regex.h>
#else
#include <regex.h>
#endif

/* T distribution for alpha = 0.025 (for 95% confidence).  XXX - is
   this correct? */
double t_distribution[] = {
  12.71,
  4.303,
  3.182,
  2.776,
  2.571,
  2.447,
  2.365,
  2.306,
  2.262,
  2.228,
  2.201,
  2.179,
  2.160,
  2.145,
  2.131,
  2.120,
  2.110,
  2.101,
  2.093,
  2.086,
  2.080,
  2.074,
  2.069,
  2.064,
  2.060,
  2.056,
  2.052,
  2.048,
  2.045,
  2.042
};

void
stats(double *sample_data, int samples, int len)
{
  double mean, tmp1, tmp2, variance, stddev, error, percent;
  int i;

  mean = 0;
  for (i = 0; i < samples; i++)
    mean += sample_data[i];
  mean = mean/i;
  printf("# mean: %.5f\n", mean);

  tmp1 = 0;
  for (i = 0; i < samples; i++) {
    tmp2 = sample_data[i] - mean;
    tmp1 += tmp2*tmp2;
  }
  if (samples > 1)
    variance = tmp1 / (samples-1);
  else
    variance = 0;
  stddev = sqrt(variance);
  printf("# variance: %.16f\n", variance);
  printf("# standard deviation: %.16f\n", stddev);

  error = t_distribution[samples-1] * stddev / sqrt(samples);
  if (mean != 0)
    percent = 100*error/mean;
  else
    percent = 0;
  printf("# error: .16f (.4f%%)\n", error, percent);

  printf("%d\t%.5f\t%.5f\n", len, mean, error);

  fflush(stdout);
}

void
run_tests(int len, int samples, double *sample_data, int repeats,
	  regex_t *reobj, char *str, char *tmpbuf)
{
  int i, j, errcode;
  clock_t c1, c2;
  regmatch_t pmatch[10];


  printf("# len = %d\n", len);
  fflush(stdout);
  for (i = 0; i < samples; i++) {
    c1 = clock();
    for (j = 0; j < repeats; j++)
      if ((errcode = tre_regexec(reobj, str, 10, pmatch, 0))) {
	tre_regerror(errcode, reobj, tmpbuf, 255);
	printf("error: %s\n", tmpbuf);
      }
    c2 = clock();

    sample_data[i] = (double)(c2-c1)/(CLOCKS_PER_SEC*repeats);

    printf("# sample: %.5f sec, clocks: %ld\n",
	   (double)(c2-c1)/(CLOCKS_PER_SEC*repeats),
	   (long)(c2-c1));
    fflush(stdout);
  }
  fflush(stdout);

  for (i = 0; i < 10; i += 2) {
    printf("# pmatch[%d].rm_so = %d\n", i/2, (int)pmatch[i/2].rm_so);
    printf("# pmatch[%d].rm_eo = %d\n", i/2, (int)pmatch[i/2].rm_eo);
  }
}


int
main(int argc, char **argv)
{
  regex_t reobj;
  char *str;
  char tmpbuf[256];
  int i, j;
  int max_len = 1024*1024*10;
  int steps = 20;
  int repeats = 10;
  int samples = 20;
  int len;
  clock_t c1, c2;
  int opt;
  double sample_data[30];

  int test_id = -1;

  while ((opt = getopt(argc, argv, "r:l:s:j:t:")) != -1) {
    switch (opt) {
    case 't':
      test_id = atoi(optarg);
      break;
    case 'l':
      max_len = atoi(optarg);
      break;
    case 'j':
      steps = atoi(optarg);
      break;
    case 's':
      samples = atoi(optarg);
      break;
    case 'r':
      repeats = atoi(optarg);
      break;
    default:
      printf("Pli.\n");
      return 1;
    }
  }

  /* XXX - Check that the correct results are returned.  For example, GNU
           regex-0.12 returns incorrect results for very long strings in
	   test number 1. */

  switch (test_id) {
  case 0:
    printf("# pattern: \"a*\"\n");
    printf("# string:  \"aaaaaa...\"\n");
    len = 0;
    tre_regcomp(&reobj, "a*", REG_EXTENDED);
    while (len <= max_len) {

      str = malloc(sizeof(char) * (len+1));
      for (i = 0; i < len; i++)
	str[i] = 'a';
      str[len-1] = '\0';

      run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
      stats(sample_data, samples, len);
      len = len + (max_len/steps);
      free(str);
    }
    break;


  case 1:
    printf("# pattern: \"(a)*\"\n");
    printf("# string:  \"aaaaaa...\"\n");
    len = 0;
    tre_regcomp(&reobj, "(a)*", REG_EXTENDED);
    while (len <= max_len) {

      str = malloc(sizeof(char) * (len+1));
      for (i = 0; i < len; i++)
	str[i] = 'a';
      str[len-1] = '\0';

      run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
      stats(sample_data, samples, len);
      len = len + (max_len/steps);
      free(str);
    }
    break;


  case 2:
    printf("# pattern: \"(a*)\"\n");
    printf("# string:  \"aaaaaa...\"\n");    len = 0;
    tre_regcomp(&reobj, "(a*)", REG_EXTENDED);
    while (len <= max_len) {

      str = malloc(sizeof(char) * (len+1));
      for (i = 0; i < len; i++)
	str[i] = 'a';
      str[len-1] = '\0';

      run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
      stats(sample_data, samples, len);
      len = len + (max_len/steps);
      free(str);
    }
    break;

  case 3:
    printf("# pattern: \"(a*)*|b*\"\n");
    printf("# string:  \"aaaaaa...b\"\n");
    len = 0;
    tre_regcomp(&reobj, "(a*)*|b*", REG_EXTENDED);
    while (len <= max_len) {
      str = malloc(sizeof(char) * (len+1));
      for (i = 0; i < len-1; i++)
	str[i] = 'a';
      if (len > 0)
	str[len-1] = 'b';
      str[len] = '\0';

      run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
      stats(sample_data, samples, len);
      len = len + (max_len/steps);
      free(str);
    }
    break;

  case 4:
    printf("# pattern: \"(a|a|a|...|a)\"\n");
    printf("# string:  \"aaaaaa...\"\n");
    len = 1024*1024;
    str = malloc(sizeof(char) * (len+1));
    for (i = 0; i < len-1; i++)
      str[i] = 'a';
    str[len] = '\0';
    len = 0;
    while (len <= max_len) {
      tmpbuf[0] = '(';
      for (i = 1; i < (len*2); i++) {
	tmpbuf[i] = 'a';
	if (i < len*2-2) {
	  i++;
	  tmpbuf[i] = '|';
	}
      }
      printf("# i = %d\n", i);
      tmpbuf[i] = ')';
      tmpbuf[i+1] = '*';
      tmpbuf[i+2] = '\0';
      printf("# pat = %s\n", tmpbuf);
      tre_regcomp(&reobj, tmpbuf, REG_EXTENDED);

      run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
      stats(sample_data, samples, len);
      len = len + (max_len/steps);
      tre_regfree(&reobj);
    }
    free(str);
    break;

  case 5:
    printf("# pattern: \"foobar\"\n");
    printf("# string:  \"aaaaaa...foobar\"\n");
    len = 0;
    tre_regcomp(&reobj, "foobar", REG_EXTENDED);
    while (len <= max_len) {
      str = malloc(sizeof(char) * (len+7));
      for (i = 0; i < len; i++) {
	if (i*i % 3)
	  str[i] = 'a';
	else
	  str[i] = 'a';
      }
      str[len+0] = 'f';
      str[len+1] = 'o';
      str[len+2] = 'o';
      str[len+3] = 'b';
      str[len+4] = 'a';
      str[len+5] = 'r';
      str[len+6] = '\0';

      run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
      stats(sample_data, samples, len);
      len = len + (max_len/steps);
      free(str);
    }
    break;


  case 6:
    printf("# pattern: \"a*foobar\"\n");
    printf("# string:  \"aaaaaa...foobar\"\n");
    len = 0;
    tre_regcomp(&reobj, "a*foobar", REG_EXTENDED);
    while (len <= max_len) {
      str = malloc(sizeof(char) * (len+7));
      for (i = 0; i < len; i++) {
	str[i] = 'a';
      }
      str[len+0] = 'f';
      str[len+1] = 'o';
      str[len+2] = 'o';
      str[len+3] = 'b';
      str[len+4] = 'a';
      str[len+5] = 'r';
      str[len+6] = '\0';

      run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
      stats(sample_data, samples, len);
      len = len + (max_len/steps);
      free(str);
    }
    break;


  case 7:
    printf("# pattern: \"(a)*foobar\"\n");
    printf("# string:  \"aaaaabbaaab...foobar\"\n");
    len = 0;
    tre_regcomp(&reobj, "(a)*foobar", REG_EXTENDED);
    while (len <= max_len) {
      str = malloc(sizeof(char) * (len+7));
      for (i = 0; i < len; i++) {
	/* Without this GNU regex won't find a match! */
	if (i*(i-1) % 3)
	  str[i] = 'b';
	else
	  str[i] = 'a';
      }
      str[len+0] = 'f';
      str[len+1] = 'o';
      str[len+2] = 'o';
      str[len+3] = 'b';
      str[len+4] = 'a';
      str[len+5] = 'r';
      str[len+6] = '\0';

      run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
      stats(sample_data, samples, len);
      len = len + (max_len/steps);
      free(str);
    }
    break;


  case 8:
    printf("# pattern: \"(a|b)*foobar\"\n");
    printf("# string:  \"aaaaabbaaab...foobar\"\n");
    len = 0;
    tre_regcomp(&reobj, "(a|b)*foobar", REG_EXTENDED);
    while (len <= max_len) {
      str = malloc(sizeof(char) * (len+7));
      for (i = 0; i < len; i++) {
	if (i*(i-1) % 3)
	  str[i] = 'b';
	else
	  str[i] = 'a';
	/* Without this GNU regex won't find a match! */
	if (i % (1024*1024*10 - 100))
	  str[i] = 'f';
      }
      str[len+0] = 'f';
      str[len+1] = 'o';
      str[len+2] = 'o';
      str[len+3] = 'b';
      str[len+4] = 'a';
      str[len+5] = 'r';
      str[len+6] = '\0';

      run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
      stats(sample_data, samples, len);
      len = len + (max_len/steps);
      free(str);
    }
    break;


  case 9:
    printf("# pattern: hand-coded a*\n");
    printf("# string:  \"aaaaaa...\"\n");
    len = 0;
    while (len <= max_len) {
      printf("# len = %d\n", len);

      str = malloc(sizeof(char)*(len+1));
      for (i = 0; i < len; i++)
	str[i] = 'a';
      str[len-1] = '\0';

      for (i = 0; i < samples; i++) {
	c1 = clock();
	for (j = 0; j < repeats; j++) {
	  char *s;
	  int l;

	  s = str;
	  l = 0;


	  while (s != '\0') {
	    if (*s == 'a') {
	      s++;
	      l++;
	    } else
	      break;
	  }
	}
      	c2 = clock();
	sample_data[i] = (double)(c2-c1)/(CLOCKS_PER_SEC*repeats);

	printf("# sample: %.5f sec, clocks: %ld\n",
	       (double)(c2-c1)/(CLOCKS_PER_SEC*repeats),
	       (long)(c2-c1));
	fflush(stdout);
      }
      fflush(stdout);

      stats(sample_data, samples, len);
      len = len + (max_len/steps);
      free(str);
    }
    break;


  default:
    printf("Pelle.\n");
    return 1;
  }

  tre_regfree(&reobj);

  return 0;
}