*** src.rgx/config.c	Thu Jan  2 23:34:31 1997
--- config.c	Thu Jan  2 23:51:21 1997
***************
*** 19,24 ****
--- 19,25 ----
  # include "compile.h"
  # include "csupport.h"
  # include "table.h"
+ # include "rgx.h"
  
  typedef struct {
      char *name;		/* name of the option */
***************
*** 810,815 ****
--- 811,819 ----
  
      /* initialize interpreter */
      i_init(conf[CREATE].u.str);
+ 
+     /* initialize regular expressions */
+     rgx_init();
  
      /* initialize compiler */
      c_init(conf[AUTO_OBJECT].u.str,
*** src.rgx/kfun/extra.c	Tue Sep 27 09:28:26 1994
--- kfun/extra.c	Thu Feb  2 22:25:18 1995
***************
*** 560,562 ****
--- 560,640 ----
      error("Not yet implemented");
  }
  # endif
+ 
+ 
+ # ifdef FUNCDEF
+ FUNCDEF("regexp_compile", kf_regexp_compile, p_regexp_compile)
+ # else
+ char p_regexp_compile[] = { C_TYPECHECKED | C_STATIC | C_VARARGS,
+ 			    T_STRING | (1 << REFSHIFT), 2, T_STRING, T_INT };
+ 
+ /*
+  * NAME:	kfun->regexp_compile()
+  * DESCRIPTION:	compile a regexp pattern
+  */
+ int kf_regexp_compile(nargs)
+ int nargs;
+ {
+   int case_matters;
+   array *compiled;
+ 
+   if (nargs < 1)
+     return -1;
+ 
+   case_matters = (nargs == 2 ? ! (sp++)->u.number : 1);
+ 
+   compiled = rgx_new(sp->u.string, case_matters);
+ 
+   str_del(sp->u.string);
+   sp->type = T_ARRAY;
+   arr_ref(sp->u.array = compiled);
+ 
+   return 0;
+ }
+ # endif
+ 
+ 
+ # ifdef FUNCDEF
+ FUNCDEF("regexp_match", kf_regexp_match, p_regexp_match)
+ # else
+ char p_regexp_match[] = { C_TYPECHECKED | C_STATIC | C_VARARGS,
+ 			  T_INT | (1 << REFSHIFT), 3,
+ 			  T_STRING | (1 << REFSHIFT), T_STRING, T_INT };
+ 
+ /*
+  * NAME:	kfun->regexp_match()
+  * DESCRIPTION:	perform regexp matching with a previously compiled pattern
+  */
+ int kf_regexp_match(nargs)
+ int nargs;
+ {
+   int reverse;
+   string *subject;
+   array *compiled, *result;
+ 
+   if (nargs < 2)
+     return -1;
+ 
+   reverse = (nargs == 3 ? (sp++)->u.number : 0);
+   subject = sp->u.string;
+   compiled = sp[1].u.array;
+ 
+   if (compiled->size != 3)
+     return 1;
+ 
+   result = rgx_match(d_get_elts(compiled), subject, reverse);
+ 
+   str_del((sp++)->u.string);
+   arr_del(sp->u.array);
+ 
+   if (result == (array *) 0)
+     {
+       sp->type = T_INT;
+       sp->u.number = 0;
+     }
+   else
+     arr_ref(sp->u.array = result);
+ 
+   return 0;
+ }
+ # endif
*** src.rgx/kfun/kfun.h	Sun May  8 08:15:01 1994
--- kfun/kfun.h	Thu Feb  2 22:25:18 1995
***************
*** 5,7 ****
--- 5,8 ----
  # include "xfloat.h"
  # include "interpret.h"
  # include "data.h"
+ # include "rgx.h"
*** src.rgx/rgx.c	Thu Jan  2 21:41:55 1997
--- rgx.c	Thu Jan  2 21:17:46 1997
***************
*** 0 ****
--- 1,213 ----
+ # include "dgd.h"
+ # include "str.h"
+ # include "array.h"
+ # include "interpret.h"
+ # include <gnuregex.h>
+ # include "rgx.h"
+ # include <memory.h>
+ 
+ static char trans_table[256];
+ 
+ /*
+  * NAME:	regexp->init()
+  * DESCRIPTION:	initialize regexp handling
+  */
+ void rgx_init()
+ {
+   register int i;
+ 
+   for (i = 0; i < 256; ++i)
+     trans_table[i] = i;
+   for (i = 'a'; i <= 'z'; ++i)
+     trans_table[i] = i + 'A' - 'a';
+ }
+ 
+ /*
+  * NAME:	regexp->new()
+  * DESCRIPTION:	create a new regexp buffer
+  */
+ array *rgx_new(pattern, case_matters)
+ string *pattern;
+ int case_matters;
+ {
+   char *translate;
+   struct re_pattern_buffer patbuf;
+   char fastmap[256];
+   const char *compile_error;
+   array *result;
+   register value *v;
+   string *s;
+ 
+   translate = (case_matters ? (char *) 0 : trans_table);
+ 
+   patbuf.buffer    = 0;
+   patbuf.allocated = 0;
+   patbuf.used      = 0;
+ 
+   patbuf.fastmap   = fastmap;
+   patbuf.translate = translate;
+ 
+   patbuf.fastmap_accurate = 0;
+ 
+   {
+ 	int i;
+ 	long n = 0;
+ 	for (i = 0; i < pattern->len; i++) {
+ 		switch (pattern->text[i]) {
+ 		case '[':
+ 			if (pattern->text[++i] == '^')
+ 				i++;
+ 			for (i++; i < pattern->len; i++)
+ 				if (pattern->text[i] == ']')
+ 					break;
+ 			break;
+ 		case '%':
+ 			pattern->text[i++] = '\\'; /* skip escaped char */
+ 			break;
+ 		case '\\':
+ 			pattern->text[i] == '%';   /* mark for expansion */
+ 			n++;
+ 			break;
+ 		}
+ 	}
+ 	if (n) {
+ 		int j;
+ 
+ 		s = str_new(NULL, pattern->len + n);
+ 		for (i = j = 0; i < pattern->len; i++, j++) {
+ 			switch (pattern->text[i]) {
+ 			case '[':
+ 				s->text[j++] = pattern->text[i++];
+ 				if (i == pattern->len)
+ 					goto breakout;
+ 				if (pattern->text[i] == '^') {
+ 					s->text[j++] = pattern->text[i++];
+ 					if (i == pattern->len)
+ 						goto breakout;
+ 				}
+ 				s->text[j++] = pattern->text[i++];
+ 				if (i == pattern->len)
+ 					goto breakout;
+ 				for ( ; i < pattern->len; i++, j++) {
+ 					if ((s->text[j] = pattern->text[i]) == ']')
+ 						break;
+ 				}
+ 				break;
+ 			case '%': /* expand */
+ 				s->text[j++] = '\\';
+ 				s->text[j] = '\\';
+ 				break;
+ 			case '\\': /* skip escaped char */
+ 				s->text[j++] = pattern->text[i++];
+ 				if (i == pattern->len)
+ 					goto breakout;
+ 				/* fallthru */
+ 			default:
+ 				s->text[j] = pattern->text[i];
+ 			}
+ 		}
+ breakout:
+ 	}
+   }
+   compile_error = re_compile_pattern(s->text, s->len, &patbuf);
+   str_del(s);
+   if (compile_error != (char *) 0)
+     {
+       regfree(&patbuf);
+       error(compile_error);
+     }
+ 
+   re_compile_fastmap(&patbuf);
+ 
+   result = arr_new(3L);
+   v = result->elts;
+ 
+   v->type = T_STRING;
+   str_ref(v->u.string = str_new((char *) &patbuf, (long) sizeof(patbuf)));
+   ++v;
+   v->type = T_STRING;
+   str_ref(v->u.string = str_new((char *) patbuf.buffer,
+ 				(long) patbuf.allocated));
+   ++v;
+   v->type = T_STRING;
+   str_ref(v->u.string = str_new(fastmap, 256L));
+ 
+   /* don't let regfree() try to free these */
+   patbuf.fastmap   = 0;
+   patbuf.translate = 0;
+ 
+   regfree(&patbuf);
+ 
+   return result;
+ }
+ 
+ /*
+  * NAME:	regexp->match()
+  * DESCRIPTION:	perform regexp matching, given a pattern and subject string
+  */
+ array *rgx_match(pattern, subject, reverse)
+ value *pattern;
+ string *subject;
+ int reverse;
+ {
+   long sub_len;
+   struct re_pattern_buffer patbuf;
+   struct re_registers regs;
+   regoff_t starts[RGX_NREGS + 1], ends[RGX_NREGS + 1];
+   array *result;
+   register value *v;
+   register int i;
+ 
+   if (pattern[0].u.string->len != sizeof(struct re_pattern_buffer))
+     error("Invalid compiled pattern");
+ 
+   memcpy((char *) &patbuf, pattern[0].u.string->text,
+ 	 sizeof(struct re_pattern_buffer));
+ 
+   if (patbuf.allocated != (unsigned long) pattern[1].u.string->len ||
+       pattern[2].u.string->len != 256)
+     error("Invalid compiled pattern");
+ 
+   patbuf.buffer  = (unsigned char *) pattern[1].u.string->text;
+   patbuf.fastmap = pattern[2].u.string->text;
+ 
+   regs.num_regs = RGX_NREGS;
+   regs.start = starts;
+   regs.end   = ends;
+   patbuf.regs_allocated = REGS_FIXED;
+ 
+   sub_len = subject->len;
+   if (re_search(&patbuf, subject->text, sub_len, reverse ? sub_len : 0,
+ 		reverse ? -(sub_len + 1) : sub_len + 1, &regs) == -1)
+     return (array *) 0;
+ 
+   result = arr_new((long) RGX_NREGS * 2);
+   v = result->elts;
+ 
+   v->type = T_INT;
+   v->u.number = starts[0];
+   ++v;
+ 
+   v->type = T_INT;
+   v->u.number = ends[0] - 1;
+   ++v;
+ 
+   for (i = 1; i < RGX_NREGS; ++i, v += 2)
+     {
+       v[0].type = T_INT;
+       v[1].type = T_INT;
+ 
+       if (starts[i] == -1)
+ 	{
+ 	  v[0].u.number = 0;
+ 	  v[1].u.number = -1;
+ 	}
+       else
+ 	{
+ 	  v[0].u.number = starts[i];
+ 	  v[1].u.number = ends[i] - 1;
+ 	}
+     }
+ 
+   return result;
+ }
*** src.rgx/rgx.h	Thu Jan  2 21:42:05 1997
--- rgx.h	Fri Feb  3 03:09:54 1995
***************
*** 0 ****
--- 1,5 ----
+ # define RGX_NREGS  10
+ 
+ extern void		rgx_init	P((void));
+ extern array	       *rgx_new		P((string*, int));
+ extern array	       *rgx_match	P((value*, string*, int));
*** doc.rgx/rgx_example.c	Thu Jan  1 00:00:00 1970
--- ../doc/rgx_example.c	Fri Feb  3 03:30:01 1995
***************
*** 0 ****
--- 1,49 ----
+ /*
+  * This file shows how an interface can be built to cache regexp patterns
+  * and ultimately provide a more streamlined interface to the regexp kfuns.
+  *
+  * Note that since regexp_match() severely depends on the return result from
+  * regexp_compile() being unaltered, it is a good idea to provide an
+  * interface like this, and also to mask the regexp_match() kfun from the
+  * auto object.
+  */
+ 
+ # define CACHE_SIZE  10
+ 
+ private	mapping	cache;
+ private	string *list;
+ private	string	last_pattern;
+ 
+ static
+ void create(void)
+ {
+   cache = ([ ]);
+   list  = ({ });
+ }
+ 
+ int *match(string subject, string pattern)
+ {
+   string *buffer;
+ 
+   if ((buffer = cache[pattern]) == 0)
+     {
+       buffer = regexp_compile(pattern);
+ 
+       if (sizeof(list) >= CACHE_SIZE)
+ 	{
+ 	  cache[list[0]] = 0;
+ 	  list = list[1 ..] + ({ pattern });
+ 	}
+       else
+ 	list += ({ pattern });
+ 
+       cache[pattern] = buffer;
+     }
+   else if (pattern != last_pattern)
+     {
+       list = list - ({ pattern }) + ({ pattern });
+       last_pattern = pattern;
+     }
+ 
+   return regexp_match(buffer, subject);
+ }
*** doc.rgx/kfun/regexp_compile	Thu Jan  1 00:00:00 1970
--- ../doc/kfun/regexp_compile	Tue Jul 26 00:02:34 1994
***************
*** 0 ****
--- 1,27 ----
+ NAME
+ 	regexp_compile - compile a regular expression
+ 
+ SYNOPSIS
+ 	varargs string *regexp_compile(string pattern, int case_insensitive)
+ 
+ DESCRIPTION
+ 	The argument pattern is compiled as a regular expression. If the
+ 	argument case_insensitive is nonzero, the pattern is compiled in
+ 	such a way that subsequent matching will be done without case
+ 	sensitivity. The default is to be case-sensitive.
+ 
+ 	An array of strings is returned; these strings contain binary
+ 	data and must not be altered in any way before being passed to
+ 	regexp_match().
+ 
+ 	The compiled regexp can be saved and used any number of times with
+ 	regexp_match().
+ 
+ ERRORS
+ 	If the argument pattern contains a syntactically malformed regular
+ 	expression, an error will result. An error can also occur if the
+ 	pattern is too complicated, or if there is not enough memory to
+ 	compile the pattern.
+ 
+ SEE ALSO
+ 	kfun/regexp_match
*** doc.rgx/kfun/regexp_match	Thu Jan  1 00:00:00 1970
--- ../doc/kfun/regexp_match	Mon Jul 25 22:19:42 1994
***************
*** 0 ****
--- 1,34 ----
+ NAME
+ 	regexp_match - perform regular expression matching
+ 
+ SYNOPSIS
+ 	varargs int *regexp_match(string *pattern, string subject, int reverse)
+ 
+ DESCRIPTION
+ 	The argument subject is matched against the compiled regular
+ 	expression pattern. If the argument reverse is nonzero, matching
+ 	is performed from right-to-left; otherwise, matching is performed
+ 	left-to-right.
+ 
+ 	The pattern argument must be an array of strings exactly as it
+ 	was received from regexp_compile(); otherwise, the result of
+ 	calling this function is undefined.
+ 
+ 	If the argument subject could not be matched with the regular
+ 	expression, 0 is returned. Otherwise, an array of 20 integers
+ 	is returned with this format:
+ 
+ 	    ({ start0, end0, start1, end1, ..., start9, end9 })
+ 
+ 	Each element is a character index into the subject string. The
+ 	first two elements, start0 and end0, indicate the part of the subject
+ 	that was matched by the regular expression as a whole. The following
+ 	elements indicate the starting and ending indices of each
+ 	subexpression (denoted by "%(" and "%)" pairs in the original
+ 	pattern) that were matched.
+ 
+ 	If any subexpression was not matched, the corresponding start and
+ 	end elements will be 0 and -1, respectively.
+ 
+ SEE ALSO
+ 	kfun/regexp_compile
*** doc.rgx/regexps	Thu Jan  1 00:00:00 1970
--- ../doc/regexps	Mon Jul 25 22:58:57 1994
***************
*** 0 ****
--- 1,32 ----
+ 
+ Regular expressions are composed of the following operators:
+ 
+ .		Match any single character
+ XY		Match X immediately followed by Y
+ X*		Match zero-or-more of X
+ X+		Match one-or-more of X
+ X?		Match zero-or-one of X
+ X%|Y		Match either X or Y
+ [charset]	Match any single character in `charset'
+ [^charset]	Match any single character not in `charset'
+ %(X%)		Match X, but also remember the match as a subexpression
+ %digit		Match the numbered previous subexpression
+ ^X		Match X anchored at the beginning of a line
+ X$		Match X anchored at the end of a line
+ %b		Match the empty string at the beginning or end of a word
+ %B		Match the empty string only within the middle of a word
+ %<		Match the beginning of a word
+ %>		Match the end of a word
+ %w		Match any word-constituent character
+ %W		Match any character that is not word-constituent
+ 
+ Any other character in a regular expression is matched literally with itself.
+ To match any of the special operator characters .*+?%[^$ literally, precede
+ the character with `%'.
+ 
+ A `charset' is formed by listing all desired characters with brackets. To
+ include a literal `^' in a charset, do not list it in the first position. To
+ include a literal `]', list it immediately after the opening `[' or `[^'. All
+ characters are non-special (and should not be escaped) within a charset,
+ except `-', which denotes a character range. To include a literal `-', list it
+ either first or last.
*** README.rgx.old	Fri Jan  3 03:17:21 1997
--- ../README.rgx	Fri Jan  3 03:14:29 1997
***************
*** 0 ****
--- 1,18 ----
+ dgd-rgx was written by Robert Leslie <rob@ccs.neu.edu> as an LPC interface to
+ GNU regex, adding two kfuns to DGD for regular expression matching:
+ 
+ 	regexp_compile()
+ 	regexp_match()
+ 
+ For a description of the regular expression language accepted by these kfuns,
+ please read doc/regexps.
+ 
+ Complete details for the two kfuns can be found in the doc/kfun directory.
+ 
+ Adapted by Adam David <adam@veda.is> for DGD 1.0.97 and to use the unmodified
+ GNU regexp library.
+ 
+ This software is a modification of DGD, and is therefore protected by the
+ DGD Copyright.
+ 
+ There is no warranty for this software.
