summaryrefslogtreecommitdiffstats
path: root/ldap/servers/slapd/regex.c
diff options
context:
space:
mode:
authorcvsadm <cvsadm>2005-01-21 00:44:34 +0000
committercvsadm <cvsadm>2005-01-21 00:44:34 +0000
commitb2093e3016027d6b5cf06b3f91f30769bfc099e2 (patch)
treecf58939393a9032182c4fbc4441164a9456e82f8 /ldap/servers/slapd/regex.c
downloadds-b2093e3016027d6b5cf06b3f91f30769bfc099e2.tar.gz
ds-b2093e3016027d6b5cf06b3f91f30769bfc099e2.tar.xz
ds-b2093e3016027d6b5cf06b3f91f30769bfc099e2.zip
Moving NSCP Directory Server from DirectoryBranch to TRUNK, initial drop. (foxworth)ldapserver7x
Diffstat (limited to 'ldap/servers/slapd/regex.c')
-rw-r--r--ldap/servers/slapd/regex.c1045
1 files changed, 1045 insertions, 0 deletions
diff --git a/ldap/servers/slapd/regex.c b/ldap/servers/slapd/regex.c
new file mode 100644
index 00000000..d9382b24
--- /dev/null
+++ b/ldap/servers/slapd/regex.c
@@ -0,0 +1,1045 @@
+/** BEGIN COPYRIGHT BLOCK
+ * Copyright 2001 Sun Microsystems, Inc.
+ * Portions copyright 1999, 2001-2003 Netscape Communications Corporation.
+ * All rights reserved.
+ * END COPYRIGHT BLOCK **/
+#include "slap.h" /* must come before regex.h */
+#include "portable.h"
+
+static PRLock *regex_mutex = NULL;
+
+int
+slapd_re_init( void )
+{
+ if ( NULL == regex_mutex ) {
+ regex_mutex = PR_NewLock();
+ }
+ return( NULL == regex_mutex ? -1 : 0 );
+}
+
+void
+slapd_re_lock( void )
+{
+ PR_ASSERT( NULL != regex_mutex );
+ PR_Lock( regex_mutex );
+}
+
+int
+slapd_re_unlock( void )
+{
+ PR_ASSERT( NULL != regex_mutex );
+ return( PR_Unlock( regex_mutex ) );
+}
+
+#if defined( MACOS ) || defined( DOS ) || defined( _WIN32 ) || defined( NEED_BSDREGEX )
+#include "regex.h"
+
+/*
+ * regex - Regular expression pattern matching and replacement
+ *
+ * By: Ozan S. Yigit (oz)
+ * Dept. of Computer Science
+ * York University
+ *
+ * These routines are the PUBLIC DOMAIN equivalents of regex
+ * routines as found in 4.nBSD UN*X, with minor extensions.
+ *
+ * These routines are derived from various implementations found
+ * in software tools books, and Conroy's grep. They are NOT derived
+ * from licensed/restricted software.
+ * For more interesting/academic/complicated implementations,
+ * see Henry Spencer's regexp routines, or GNU Emacs pattern
+ * matching module.
+ *
+ * Modification history:
+ *
+ * $Log: regex.c,v $
+ * Revision 1.1 2005/01/21 00:40:51 cvsadm
+ * Initial revision
+ *
+ * Revision 1.3.20.1.2.11 2005/01/14 01:22:12 nhosoi
+ * For the open-source project.
+ * 1) eliminated 'netsite' level
+ * 2) moved ns/config one level lower
+ * 3) moved fasttime to lib/base
+ *
+ * Revision 1.3.20.1.2.10 2004/10/01 18:46:09 dboreham
+ * Rename the built in regex functions to avoid colliding with the native OS versions, where presnet
+ *
+ * Revision 1.2 2004/10/01 06:29:11 david
+ * rename regex functions to avoid collision with native OS functions on Solaris
+ *
+ * Revision 1.1.1.1 2004/06/03 22:32:48 telackey
+ * Initial import Thu Jun 3 15:32:43 PDT 2004
+ *
+ * Revision 1.3.20.1.2.9 2003/09/22 19:42:12 ulfw
+ * Update copyright years from 2001 to 2001-2003
+ *
+ * Revision 1.3.20.1.2.8 2001/11/03 00:13:55 richm
+ * XXX use new copyright XXX
+ *
+ * Revision 1.3.20.1.2.7 2001/10/07 00:59:03 richm
+ * ldapserver/ldap/servers/slapd/regex.c
+ * 1.3.20.1.2.7
+ * 20010918
+ *
+ * Remove copyright caracter form copyright
+ *
+ *
+ * ====================================================
+ *
+ * Revision 1.3.20.1.2.7 2001/09/18 11:43:06 rmarco
+ * Remove copyright caracter form copyright
+ *
+ * Revision 1.3.20.1.2.6 2001/02/13 09:45:16 rmarco
+ * copyrights
+ *
+ * Revision 1.3.20.1.2.5 1999/12/06 10:49:33 robey
+ * fix gcc warning
+ *
+ * Revision 1.3.20.1.2.4 1999/11/18 01:26:26 robey
+ * fix gcc warnings
+ *
+ * Revision 1.3.20.1.2.3 1999/08/20 23:13:33 merrells
+ * tidy up extern things
+ *
+ * Revision 1.3.20.1.2.2 1999/05/19 23:41:58 merrells
+ * Keep the Solaris compiler quiet
+ *
+ * Revision 1.3.20.1.2.1 1999/05/14 18:39:50 merrells
+ * value manipulation code extraction and reworking.
+ *
+ * Revision 1.3.20.1 1998/10/10 02:28:25 ggood
+ * Copy from Directory40RtmBranchpoint to DirectoryBranch
+ *
+ * Revision 1.3.10.5 1997/12/11 23:12:01 kristian
+ * fix bugs 97502, 97504 & 96569: handle 8-bit char's (especially UTF-8) correctly.
+ *
+ * Revision 1.3.10.4 1997/12/07 22:13:08 howes
+ * Always compile re_init(), re_lock(), and re_unlock(), even
+ * on platforms where we use the native regex stuff.
+ *
+ * Revision 1.3.10.3 1997/12/07 00:01:16 howes
+ * Add re_init(), re_lock(), and re_unlock() routines, to avoid race
+ * between acl and syntax code.
+ *
+ * Revision 1.3.10.2 1997/07/17 07:49:13 mcs
+ * merge changes made on ldapsdk_10_branch into server3_directory_branch
+ *
+ * Revision 1.3.10.1 1997/03/27 06:39:33 ggood
+ * Fix up more compiler warnings.
+ *
+ * Revision 1.3 1996/11/07 00:44:44 mcs
+ * eliminate a few compiler warnings
+ *
+ * Revision 1.2 1996/11/05 21:38:05 kristian
+ * copied from Directory_1996_11_04
+ *
+ * Revision 1.1.1.1.2.1 1996/05/07 19:54:53 kristian
+ * Merged UMich ldap-3_3 into Dogbert
+ *
+ * Revision 1.1.1.2 1996/05/04 19:11:02 kristian
+ * UMich version 3.3
+ *
+ * Revision 1.2 1996/04/25 16:24:11 mcs
+ * make re_exec() match "" with ".*" and similar patterns
+ * hopefully this change doesn't break anything else!
+ *
+ * Revision 1.1 1995/02/03 15:56:52 tim
+ * Initial revision
+ *
+ * Revision 1.11 1994/12/14 21:33:45 mcs
+ * use new NEED_BSDREGEX
+ * fix pmatch() prototype
+ *
+ * Revision 1.10 1994/12/12 18:16:39 mcs
+ * use on NetBSD
+ *
+ * Revision 1.9 1994/11/15 19:16:35 mcs
+ * add (CHAR) cast to make VisualC++ happy
+ *
+ * Revision 1.8 1994/11/08 21:14:32 mcs
+ * WIN32 changes
+ *
+ * Revision 1.7 1994/07/23 19:51:24 mcs
+ * use ANSI-style inline function parameters
+ *
+ * Revision 1.6 1993/10/18 01:52:32 tim
+ * include for VMS
+ *
+ * Revision 1.5 1993/09/28 21:37:54 mcs
+ * HP/UX needs the regex we include (not in its libc)
+ *
+ * Revision 1.4 1993/08/27 15:59:52 mcs
+ * use CHAR for deftab
+ *
+ * Revision 1.3 1993/08/27 15:49:47 mcs
+ * added missing 0 to octal constants
+ * use unsigned char for CHAR under DOS
+ *
+ * Revision 1.2 1993/08/27 14:57:48 mcs
+ * add proto. for pmatch
+ *
+ * Revision 1.1 1993/08/18 21:20:02 mcs
+ * Initial revision
+ *
+ * Revision 1.4 1991/10/17 03:56:42 oz
+ * miscellaneous changes, small cleanups etc.
+ *
+ * Revision 1.3 1989/04/01 14:18:09 oz
+ * Change all references to a dfa: this is actually an nfa.
+ *
+ * Revision 1.2 88/08/28 15:36:04 oz
+ * Use a complement bitmap to represent NCL.
+ * This removes the need to have seperate
+ * code in the pmatch case block - it is
+ * just CCL code now.
+ *
+ * Use the actual CCL code in the CLO
+ * section of pmatch. No need for a recursive
+ * pmatch call.
+ *
+ * Use a bitmap table to set char bits in an
+ * 8-bit chunk.
+ *
+ * Interfaces:
+ * The following three interfaces were added to avoid a race
+ * condition in slapd. The better long-term solution is to make
+ * the regex code thread-safe, by passing in the buffers needed.
+ *
+ * re_init: initializes the regex system. must be called
+ * before any other regex calls are made.
+ *
+ * re_lock: locks the regex system. must be called to avoid
+ * races between calls to re_comp and re_exec.
+ *
+ * re_unlock unlocks the regex system. must be called after
+ * a set of calls to re_comp and re_exec.
+ *
+ * re_comp: compile a regular expression into a NFA.
+ *
+ * char *re_comp(s)
+ * char *s;
+ *
+ * re_exec: execute the NFA to match a pattern.
+ *
+ * int re_exec(s)
+ * char *s;
+ *
+ * re_modw change re_exec's understanding of what a "word"
+ * looks like (for \< and \>) by adding into the
+ * hidden word-syntax table.
+ *
+ * void re_modw(s)
+ * char *s;
+ *
+ * re_subs: substitute the matched portions in a new string.
+ *
+ * int re_subs(src, dst)
+ * char *src;
+ * char *dst;
+ *
+ * re_fail: failure routine for re_exec.
+ *
+ * void re_fail(msg, op)
+ * char *msg;
+ * char op;
+ *
+ * Regular Expressions:
+ *
+ * [1] char matches itself, unless it is a special
+ * character (metachar): . \ [ ] * + ^ $
+ *
+ * [2] . matches any character.
+ *
+ * [3] \ matches the character following it, except
+ * when followed by a left or right round bracket,
+ * a digit 1 to 9 or a left or right angle bracket.
+ * (see [7], [8] and [9])
+ * It is used as an escape character for all
+ * other meta-characters, and itself. When used
+ * in a set ([4]), it is treated as an ordinary
+ * character.
+ *
+ * [4] [set] matches one of the characters in the set.
+ * If the first character in the set is "^",
+ * it matches a character NOT in the set, i.e.
+ * complements the set. A shorthand S-E is
+ * used to specify a set of characters S upto
+ * E, inclusive. The special characters "]" and
+ * "-" have no special meaning if they appear
+ * as the first chars in the set.
+ * examples: match:
+ *
+ * [a-z] any lowercase alpha
+ *
+ * [^]-] any char except ] and -
+ *
+ * [^A-Z] any char except uppercase
+ * alpha
+ *
+ * [a-zA-Z] any alpha
+ *
+ * [5] * any regular expression form [1] to [4], followed by
+ * closure char (*) matches zero or more matches of
+ * that form.
+ *
+ * [6] + same as [5], except it matches one or more.
+ *
+ * [7] a regular expression in the form [1] to [10], enclosed
+ * as \(form\) matches what form matches. The enclosure
+ * creates a set of tags, used for [8] and for
+ * pattern substution. The tagged forms are numbered
+ * starting from 1.
+ *
+ * [8] a \ followed by a digit 1 to 9 matches whatever a
+ * previously tagged regular expression ([7]) matched.
+ *
+ * [9] \< a regular expression starting with a \< construct
+ * \> and/or ending with a \> construct, restricts the
+ * pattern matching to the beginning of a word, and/or
+ * the end of a word. A word is defined to be a character
+ * string beginning and/or ending with the characters
+ * A-Z a-z 0-9 and _. It must also be preceded and/or
+ * followed by any character outside those mentioned.
+ *
+ * [10] a composite regular expression xy where x and y
+ * are in the form [1] to [10] matches the longest
+ * match of x followed by a match for y.
+ *
+ * [11] ^ a regular expression starting with a ^ character
+ * $ and/or ending with a $ character, restricts the
+ * pattern matching to the beginning of the line,
+ * or the end of line. [anchors] Elsewhere in the
+ * pattern, ^ and $ are treated as ordinary characters.
+ *
+ *
+ * Acknowledgements:
+ *
+ * HCR's Hugh Redelmeier has been most helpful in various
+ * stages of development. He convinced me to include BOW
+ * and EOW constructs, originally invented by Rob Pike at
+ * the University of Toronto.
+ *
+ * References:
+ * Software tools Kernighan & Plauger
+ * Software tools in Pascal Kernighan & Plauger
+ * Grep [rsx-11 C dist] David Conroy
+ * ed - text editor Un*x Programmer's Manual
+ * Advanced editing on Un*x B. W. Kernighan
+ * RegExp routines Henry Spencer
+ *
+ * Notes:
+ *
+ * This implementation uses a bit-set representation for character
+ * classes for speed and compactness. Each character is represented
+ * by one bit in a 128-bit block. Thus, CCL always takes a
+ * constant 16 bytes in the internal nfa, and re_exec does a single
+ * bit comparison to locate the character in the set.
+ *
+ * Examples:
+ *
+ * pattern: foo*.*
+ * compile: CHR f CHR o CLO CHR o END CLO ANY END END
+ * matches: fo foo fooo foobar fobar foxx ...
+ *
+ * pattern: fo[ob]a[rz]
+ * compile: CHR f CHR o CCL bitset CHR a CCL bitset END
+ * matches: fobar fooar fobaz fooaz
+ *
+ * pattern: foo\\+
+ * compile: CHR f CHR o CHR o CHR \ CLO CHR \ END END
+ * matches: foo\ foo\\ foo\\\ ...
+ *
+ * pattern: \(foo\)[1-3]\1 (same as foo[1-3]foo)
+ * compile: BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END
+ * matches: foo1foo foo2foo foo3foo
+ *
+ * pattern: \(fo.*\)-\1
+ * compile: BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
+ * matches: foo-foo fo-fo fob-fob foobar-foobar ...
+ */
+
+#define MAXNFA 1024
+#define MAXTAG 10
+
+#define OKP 1
+#define NOP 0
+
+#define CHR 1
+#define ANY 2
+#define CCL 3
+#define BOL 4
+#define EOL 5
+#define BOT 6
+#define EOT 7
+#define BOW 8
+#define EOW 9
+#define REF 10
+#define CLO 11
+
+#define END 0
+
+/*
+ * The following defines are not meant to be changeable.
+ * They are for readability only.
+ */
+#define MAXCHR 128
+#define CHRBIT 8
+#define BITBLK MAXCHR/CHRBIT
+#define BLKIND 0170
+#define BITIND 07
+
+#define ASCIIB 0177
+
+typedef unsigned char UCHAR;
+/* char, on the other hand, may be signed or unsigned;
+ * it's platform-dependent. A hard fact of life, in C.
+ */
+
+static int tagstk[MAXTAG]; /* subpat tag stack..*/
+static UCHAR nfa[MAXNFA]; /* automaton.. */
+static int sta = NOP; /* status of lastpat */
+
+static UCHAR bittab[BITBLK]; /* bit table for CCL */
+ /* pre-set bits... */
+static UCHAR bitarr[] = {1,2,4,8,16,32,64,128};
+
+#ifdef DEBUG
+static void nfadump( UCHAR *ap);
+#endif
+
+static void
+chset(UCHAR c)
+{
+ bittab[((c) & (unsigned)BLKIND) >> 3] |= bitarr[(c) & BITIND];
+}
+
+#define badpat(x) (*nfa = END, x)
+#define store(x) *mp++ = x
+
+char *
+slapd_re_comp( char *pat )
+{
+ register UCHAR *p; /* pattern pointer */
+ register UCHAR *mp=nfa; /* nfa pointer */
+ register UCHAR *lp; /* saved pointer.. */
+ register UCHAR *sp=nfa; /* another one.. */
+
+ register int tagi = 0; /* tag stack index */
+ register int tagc = 1; /* actual tag count */
+
+ register int n;
+ register UCHAR mask; /* xor mask -CCL/NCL */
+ int c1, c2;
+
+ if (!pat || !*pat) {
+ if (sta)
+ return 0;
+ else
+ return badpat("No previous regular expression");
+ }
+ sta = NOP;
+
+ for (p = (UCHAR*)pat; *p; p++) {
+ lp = mp;
+ switch(*p) {
+
+ case '.': /* match any char.. */
+ store(ANY);
+ break;
+
+ case '^': /* match beginning.. */
+ if (p == (UCHAR*)pat)
+ store(BOL);
+ else {
+ store(CHR);
+ store(*p);
+ }
+ break;
+
+ case '$': /* match endofline.. */
+ if (!*(p+1))
+ store(EOL);
+ else {
+ store(CHR);
+ store(*p);
+ }
+ break;
+
+ case '[': /* match char class..*/
+ store(CCL);
+
+ if (*++p == '^') {
+ mask = 0377;
+ p++;
+ }
+ else
+ mask = 0;
+
+ if (*p == '-') /* real dash */
+ chset(*p++);
+ if (*p == ']') /* real brac */
+ chset(*p++);
+ while (*p && *p != ']') {
+ if (*p == '-' && *(p+1) && *(p+1) != ']') {
+ p++;
+ c1 = *(p-2) + 1;
+ c2 = *p++;
+ while (c1 <= c2)
+ chset((UCHAR)c1++);
+ }
+#ifdef EXTEND
+ else if (*p == '\\' && *(p+1)) {
+ p++;
+ chset(*p++);
+ }
+#endif
+ else
+ chset(*p++);
+ }
+ if (!*p)
+ return badpat("Missing ]");
+
+ for (n = 0; n < BITBLK; bittab[n++] = (UCHAR) 0)
+ store(mask ^ bittab[n]);
+
+ break;
+
+ case '*': /* match 0 or more.. */
+ case '+': /* match 1 or more.. */
+ if (p == (UCHAR*)pat)
+ return badpat("Empty closure");
+ lp = sp; /* previous opcode */
+ if (*lp == CLO) /* equivalence.. */
+ break;
+ switch(*lp) {
+
+ case BOL:
+ case BOT:
+ case EOT:
+ case BOW:
+ case EOW:
+ case REF:
+ return badpat("Illegal closure");
+ default:
+ break;
+ }
+
+ if (*p == '+')
+ for (sp = mp; lp < sp; lp++)
+ store(*lp);
+
+ store(END);
+ store(END);
+ sp = mp;
+ while (--mp > lp)
+ *mp = mp[-1];
+ store(CLO);
+ mp = sp;
+ break;
+
+ case '\\': /* tags, backrefs .. */
+ switch(*++p) {
+
+ case '(':
+ if (tagc < MAXTAG) {
+ tagstk[++tagi] = tagc;
+ store(BOT);
+ store(tagc++);
+ }
+ else
+ return badpat("Too many \\(\\) pairs");
+ break;
+ case ')':
+ if (*sp == BOT)
+ return badpat("Null pattern inside \\(\\)");
+ if (tagi > 0) {
+ store(EOT);
+ store(tagstk[tagi--]);
+ }
+ else
+ return badpat("Unmatched \\)");
+ break;
+ case '<':
+ store(BOW);
+ break;
+ case '>':
+ if (*sp == BOW)
+ return badpat("Null pattern inside \\<\\>");
+ store(EOW);
+ break;
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ n = *p-'0';
+ if (tagi > 0 && tagstk[tagi] == n)
+ return badpat("Cyclical reference");
+ if (tagc > n) {
+ store(REF);
+ store(n);
+ }
+ else
+ return badpat("Undetermined reference");
+ break;
+#ifdef EXTEND
+ case 'b':
+ store(CHR);
+ store('\b');
+ break;
+ case 'n':
+ store(CHR);
+ store('\n');
+ break;
+ case 'f':
+ store(CHR);
+ store('\f');
+ break;
+ case 'r':
+ store(CHR);
+ store('\r');
+ break;
+ case 't':
+ store(CHR);
+ store('\t');
+ break;
+#endif
+ default:
+ store(CHR);
+ store(*p);
+ }
+ break;
+
+ default : /* an ordinary char */
+ store(CHR);
+ store(*p);
+ break;
+ }
+ sp = lp;
+ }
+ if (tagi > 0)
+ return badpat("Unmatched \\(");
+ store(END);
+ sta = OKP;
+ return 0;
+}
+
+
+static UCHAR *bol;
+static UCHAR *bopat[MAXTAG];
+static UCHAR *eopat[MAXTAG];
+#ifdef NEEDPROTOS
+static UCHAR *pmatch( UCHAR *lp, UCHAR *ap );
+#else /* NEEDPROTOS */
+static UCHAR *pmatch();
+#endif /* NEEDPROTOS */
+
+/*
+ * re_exec:
+ * execute nfa to find a match.
+ *
+ * special cases: (nfa[0])
+ * BOL
+ * Match only once, starting from the
+ * beginning.
+ * CHR
+ * First locate the character without
+ * calling pmatch, and if found, call
+ * pmatch for the remaining string.
+ * END
+ * re_comp failed, poor luser did not
+ * check for it. Fail fast.
+ *
+ * If a match is found, bopat[0] and eopat[0] are set
+ * to the beginning and the end of the matched fragment,
+ * respectively.
+ *
+ */
+
+int
+slapd_re_exec( char *lp )
+{
+ register UCHAR c;
+ register UCHAR *ep = 0;
+ register UCHAR *ap = nfa;
+
+ bol = (UCHAR*)lp;
+
+ bopat[0] = 0;
+ bopat[1] = 0;
+ bopat[2] = 0;
+ bopat[3] = 0;
+ bopat[4] = 0;
+ bopat[5] = 0;
+ bopat[6] = 0;
+ bopat[7] = 0;
+ bopat[8] = 0;
+ bopat[9] = 0;
+
+ switch(*ap) {
+
+ case BOL: /* anchored: match from BOL only */
+ ep = pmatch((UCHAR*)lp,ap);
+ break;
+ case CHR: /* ordinary char: locate it fast */
+ c = *(ap+1);
+ while (*lp && *(UCHAR*)lp != c)
+ lp++;
+ if (!*lp) /* if EOS, fail, else fall thru. */
+ return 0;
+ default: /* regular matching all the way. */
+ do {
+ if ((ep = pmatch((UCHAR*)lp,ap)))
+ break;
+ lp++;
+ } while (*lp);
+
+ break;
+ case END: /* munged automaton. fail always */
+ return 0;
+ }
+ if (!ep)
+ return 0;
+
+ bopat[0] = (UCHAR*)lp;
+ eopat[0] = ep;
+ return 1;
+}
+
+/*
+ * pmatch: internal routine for the hard part
+ *
+ * This code is partly snarfed from an early grep written by
+ * David Conroy. The backref and tag stuff, and various other
+ * innovations are by oz.
+ *
+ * special case optimizations: (nfa[n], nfa[n+1])
+ * CLO ANY
+ * We KNOW .* will match everything upto the
+ * end of line. Thus, directly go to the end of
+ * line, without recursive pmatch calls. As in
+ * the other closure cases, the remaining pattern
+ * must be matched by moving backwards on the
+ * string recursively, to find a match for xy
+ * (x is ".*" and y is the remaining pattern)
+ * where the match satisfies the LONGEST match for
+ * x followed by a match for y.
+ * CLO CHR
+ * We can again scan the string forward for the
+ * single char and at the point of failure, we
+ * execute the remaining nfa recursively, same as
+ * above.
+ *
+ * At the end of a successful match, bopat[n] and eopat[n]
+ * are set to the beginning and end of subpatterns matched
+ * by tagged expressions (n = 1 to 9).
+ *
+ */
+
+#ifndef re_fail
+void re_fail();
+#endif /* re_fail */
+
+/*
+ * character classification table for word boundary operators BOW
+ * and EOW. the reason for not using ctype macros is that we can
+ * let the user add into our own table. see re_modw. This table
+ * is not in the bitset form, since we may wish to extend it in the
+ * future for other character classifications.
+ *
+ * TRUE for 0-9 A-Z a-z _
+ */
+static char chrtyp[MAXCHR] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0, 0
+ };
+
+#define inascii(x) (0177&(x))
+#define iswordc(x) chrtyp[inascii(x)]
+#define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND])
+
+/*
+ * skip values for CLO XXX to skip past the closure
+ */
+
+#define ANYSKIP 2 /* [CLO] ANY END ... */
+#define CHRSKIP 3 /* [CLO] CHR chr END ... */
+#define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */
+
+static UCHAR *
+pmatch( UCHAR *lp, UCHAR *ap)
+{
+ register int op, c, n;
+ register UCHAR *e; /* extra pointer for CLO */
+ register UCHAR *bp; /* beginning of subpat.. */
+ register UCHAR *ep; /* ending of subpat.. */
+ UCHAR *are; /* to save the line ptr. */
+
+ while ((op = *ap++) != END)
+ switch(op) {
+
+ case CHR:
+ if (*lp++ != *ap++)
+ return 0;
+ break;
+ case ANY:
+ if (!*lp++)
+ return 0;
+ break;
+ case CCL:
+ c = *lp++;
+ if (!isinset(ap,c))
+ return 0;
+ ap += BITBLK;
+ break;
+ case BOL:
+ if (lp != bol)
+ return 0;
+ break;
+ case EOL:
+ if (*lp)
+ return 0;
+ break;
+ case BOT:
+ bopat[*ap++] = lp;
+ break;
+ case EOT:
+ eopat[*ap++] = lp;
+ break;
+ case BOW:
+ if ((lp!=bol && iswordc(lp[-1])) || !iswordc(*lp))
+ return 0;
+ break;
+ case EOW:
+ if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp))
+ return 0;
+ break;
+ case REF:
+ n = *ap++;
+ bp = bopat[n];
+ ep = eopat[n];
+ while (bp < ep)
+ if (*bp++ != *lp++)
+ return 0;
+ break;
+ case CLO:
+ are = lp;
+ switch(*ap) {
+
+ case ANY:
+ while (*lp)
+ lp++;
+ n = ANYSKIP;
+ break;
+ case CHR:
+ c = *(ap+1);
+ while (*lp && c == *lp)
+ lp++;
+ n = CHRSKIP;
+ break;
+ case CCL:
+ while ((c = *lp) && isinset(ap+1,c))
+ lp++;
+ n = CCLSKIP;
+ break;
+ default:
+ re_fail("closure: bad nfa.", *ap);
+ return 0;
+ }
+
+ ap += n;
+
+ while (lp >= are) {
+ if ((e = pmatch(lp, ap)) != NULL)
+ return e;
+ --lp;
+ }
+ return 0;
+ default:
+ re_fail("re_exec: bad nfa.", op);
+ return 0;
+ }
+ return lp;
+}
+
+/*
+ * re_modw:
+ * add new characters into the word table to change re_exec's
+ * understanding of what a word should look like. Note that we
+ * only accept additions into the word definition.
+ *
+ * If the string parameter is 0 or null string, the table is
+ * reset back to the default containing A-Z a-z 0-9 _. [We use
+ * the compact bitset representation for the default table]
+ */
+
+static UCHAR deftab[16] = {
+ 0, 0, 0, 0, 0, 0, 0377, 003, 0376, 0377, 0377, 0207,
+ 0376, 0377, 0377, 007
+};
+
+void
+slapd_re_modw( char *s )
+{
+ register int i;
+
+ if (!s || !*s) {
+ for (i = 0; i < MAXCHR; i++)
+ if (!isinset(deftab,i))
+ iswordc(i) = 0;
+ }
+ else
+ while(*s)
+ iswordc(*s++) = 1;
+}
+
+/*
+ * re_subs:
+ * substitute the matched portions of the src in dst.
+ *
+ * & substitute the entire matched pattern.
+ *
+ * \digit substitute a subpattern, with the given tag number.
+ * Tags are numbered from 1 to 9. If the particular
+ * tagged subpattern does not exist, null is substituted.
+ */
+int
+slapd_re_subs( char *src, char *dst)
+{
+ register char c;
+ register int pin;
+ register UCHAR *bp;
+ register UCHAR *ep;
+
+ if (!*src || !bopat[0])
+ return 0;
+
+ while ((c = *src++) != 0) {
+ switch(c) {
+
+ case '&':
+ pin = 0;
+ break;
+
+ case '\\':
+ c = *src++;
+ if (c >= '0' && c <= '9') {
+ pin = c - '0';
+ break;
+ }
+
+ default:
+ *dst++ = c;
+ continue;
+ }
+
+ if ((bp = bopat[pin]) && (ep = eopat[pin])) {
+ while (*bp && bp < ep)
+ *dst++ = *(char*)bp++;
+ if (bp < ep)
+ return 0;
+ }
+ }
+ *dst = (char) 0;
+ return 1;
+}
+
+#ifdef DEBUG
+/*
+ * symbolic - produce a symbolic dump of the nfa
+ */
+void
+symbolic( char *s )
+{
+ printf("pattern: %s\n", s);
+ printf("nfacode:\n");
+ nfadump(nfa);
+}
+
+static void
+nfadump( UCHAR *ap)
+{
+ register int n;
+
+ while (*ap != END)
+ switch(*ap++) {
+ case CLO:
+ printf("CLOSURE");
+ nfadump(ap);
+ switch(*ap) {
+ case CHR:
+ n = CHRSKIP;
+ break;
+ case ANY:
+ n = ANYSKIP;
+ break;
+ case CCL:
+ n = CCLSKIP;
+ break;
+ }
+ ap += n;
+ break;
+ case CHR:
+ printf("\tCHR %c\n",*ap++);
+ break;
+ case ANY:
+ printf("\tANY .\n");
+ break;
+ case BOL:
+ printf("\tBOL -\n");
+ break;
+ case EOL:
+ printf("\tEOL -\n");
+ break;
+ case BOT:
+ printf("BOT: %d\n",*ap++);
+ break;
+ case EOT:
+ printf("EOT: %d\n",*ap++);
+ break;
+ case BOW:
+ printf("BOW\n");
+ break;
+ case EOW:
+ printf("EOW\n");
+ break;
+ case REF:
+ printf("REF: %d\n",*ap++);
+ break;
+ case CCL:
+ printf("\tCCL [");
+ for (n = 0; n < MAXCHR; n++)
+ if (isinset(ap,(UCHAR)n)) {
+ if (n < ' ')
+ printf("^%c", n ^ 0x040);
+ else
+ printf("%c", n);
+ }
+ printf("]\n");
+ ap += BITBLK;
+ break;
+ default:
+ printf("bad nfa. opcode %o\n", ap[-1]);
+ exit(1);
+ break;
+ }
+}
+#endif
+#endif /* MACOS or DOS or NEED_BSDREGEX */