summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-03-05 15:31:51 +0000
committerksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-03-05 15:31:51 +0000
commite312f37e445e74b44269c93bf927ed73b54333ed (patch)
treed75456ec841c8a8d8eba9c3a4d25b39243c56ef6
parent761419b3912182cbea5a6545c110df01bc012941 (diff)
downloadruby-e312f37e445e74b44269c93bf927ed73b54333ed.tar.gz
ruby-e312f37e445e74b44269c93bf927ed73b54333ed.tar.xz
ruby-e312f37e445e74b44269c93bf927ed73b54333ed.zip
Initial revision
git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@5892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ascii.c54
-rw-r--r--euc_jp.c191
-rw-r--r--oniggnu.h77
-rw-r--r--oniguruma.h715
-rw-r--r--regcomp.c5440
-rw-r--r--regenc.c586
-rw-r--r--regenc.h96
-rw-r--r--regerror.c291
-rw-r--r--regexec.c3299
-rw-r--r--reggnu.c256
-rw-r--r--regint.h685
-rw-r--r--regparse.c4815
-rw-r--r--regparse.h277
-rw-r--r--sjis.c174
-rw-r--r--utf8.c566
15 files changed, 17522 insertions, 0 deletions
diff --git a/ascii.c b/ascii.c
new file mode 100644
index 000000000..44cc78f77
--- /dev/null
+++ b/ascii.c
@@ -0,0 +1,54 @@
+/**********************************************************************
+
+ ascii.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regenc.h"
+
+static int
+ascii_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingASCII = {
+ {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ },
+ "US-ASCII", /* name */
+ 1, /* max byte length */
+ FALSE, /* is_fold_match */
+ ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
+ TRUE, /* is continuous sb mb codepoint */
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ onigenc_ascii_mbc_to_lower,
+ onigenc_ascii_mbc_is_case_ambig,
+ ascii_code_is_ctype,
+ onigenc_nothing_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_single_byte_is_allowed_reverse_match,
+ onigenc_nothing_get_all_fold_match_code,
+ onigenc_nothing_get_fold_match_info
+};
diff --git a/euc_jp.c b/euc_jp.c
new file mode 100644
index 000000000..848016ba5
--- /dev/null
+++ b/euc_jp.c
@@ -0,0 +1,191 @@
+/**********************************************************************
+
+ euc_jp.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regenc.h"
+
+#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
+
+static OnigCodePoint
+eucjp_mbc_to_code(UChar* p, UChar* end)
+{
+ int c, i, len;
+ OnigCodePoint n;
+
+ c = *p++;
+ len = enc_len(ONIG_ENCODING_EUC_JP, c);
+ n = c;
+ if (len == 1) return n;
+
+ for (i = 1; i < len; i++) {
+ if (p >= end) break;
+ c = *p++;
+ n <<= 8; n += c;
+ }
+ return n;
+}
+
+static int
+eucjp_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff0000) != 0) return 3;
+ else if ((code & 0xff00) != 0) return 2;
+ else return 1;
+}
+
+static int
+eucjp_code_to_mbc_first(OnigCodePoint code)
+{
+ int first;
+
+ if ((code & 0xff0000) != 0) {
+ first = (code >> 16) & 0xff;
+ /*
+ if (enc_len(ONIG_ENCODING_EUC_JP, first) != 3)
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ */
+ }
+ else if ((code & 0xff00) != 0) {
+ first = (code >> 8) & 0xff;
+ /*
+ if (enc_len(ONIG_ENCODING_EUC_JP, first) != 2)
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ */
+ }
+ else {
+ /*
+ if (enc_len(ONIG_ENCODING_EUC_JP, code) != 1)
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ */
+ return (int )code;
+ }
+ return first;
+}
+
+static int
+eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
+ if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
+ *p++ = (UChar )(code & 0xff);
+
+#if 1
+ if (enc_len(ONIG_ENCODING_EUC_JP, buf[0]) != (p - buf))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+static int
+eucjp_mbc_to_lower(UChar* p, UChar* lower)
+{
+ int len;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ return 1;
+ }
+ else {
+ len = enc_len(ONIG_ENCODING_EUC_JP, *p);
+ if (lower != p) {
+ /* memcpy(lower, p, len); */
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+eucjp_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ int first = eucjp_code_to_mbc_first(code);
+ return (enc_len(ONIG_ENCODING_EUC_JP, first) > 1 ? TRUE : FALSE);
+ }
+
+ ctype &= ~ONIGENC_CTYPE_WORD;
+ if (ctype == 0) return FALSE;
+ }
+
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static UChar*
+eucjp_left_adjust_char_head(UChar* start, UChar* s)
+{
+ /* Assumed in this encoding,
+ mb-trail bytes don't mix with single bytes.
+ */
+ UChar *p;
+ int len;
+
+ if (s <= start) return s;
+ p = s;
+
+ while (!eucjp_islead(*p) && p > start) p--;
+ len = enc_len(ONIG_ENCODING_EUC_JP, *p);
+ if (p + len > s) return p;
+ p += len;
+ return p + ((s - p) & ~1);
+}
+
+static int
+eucjp_is_allowed_reverse_match(UChar* s, UChar* end)
+{
+ UChar c = *s;
+ if (c <= 0x7e || c == 0x8e || c == 0x8f)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingEUC_JP = {
+ {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+ },
+ "EUC-JP", /* name */
+ 3, /* max byte length */
+ FALSE, /* is_fold_match */
+ ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
+ FALSE, /* is continuous sb mb codepoint */
+ eucjp_mbc_to_code,
+ eucjp_code_to_mbclen,
+ eucjp_code_to_mbc,
+ eucjp_mbc_to_lower,
+ onigenc_mbn_mbc_is_case_ambig,
+ eucjp_code_is_ctype,
+ onigenc_nothing_get_ctype_code_range,
+ eucjp_left_adjust_char_head,
+ eucjp_is_allowed_reverse_match,
+ onigenc_nothing_get_all_fold_match_code,
+ onigenc_nothing_get_fold_match_info
+};
diff --git a/oniggnu.h b/oniggnu.h
new file mode 100644
index 000000000..d78dc18b1
--- /dev/null
+++ b/oniggnu.h
@@ -0,0 +1,77 @@
+/**********************************************************************
+
+ oniggnu.h - Oniguruma (regular expression library)
+
+ Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#ifndef ONIGGNU_H
+#define ONIGGNU_H
+
+#include "oniguruma.h"
+
+#define MBCTYPE_ASCII 0
+#define MBCTYPE_EUC 1
+#define MBCTYPE_SJIS 2
+#define MBCTYPE_UTF8 3
+
+/* GNU regex options */
+#ifndef RE_NREGS
+#define RE_NREGS ONIG_NREGION
+#endif
+#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
+#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
+#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
+#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
+#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
+#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
+
+#ifdef RUBY_PLATFORM
+#define re_mbcinit ruby_re_mbcinit
+#define re_compile_pattern ruby_re_compile_pattern
+#define re_recompile_pattern ruby_re_recompile_pattern
+#define re_free_pattern ruby_re_free_pattern
+#define re_adjust_startpos ruby_re_adjust_startpos
+#define re_search ruby_re_search
+#define re_match ruby_re_match
+#define re_set_casetable ruby_re_set_casetable
+#define re_copy_registers ruby_re_copy_registers
+#define re_free_registers ruby_re_free_registers
+#define register_info_type ruby_register_info_type
+#define re_error_code_to_str ruby_error_code_to_str
+
+#define ruby_error_code_to_str onig_error_code_to_str
+#define ruby_re_copy_registers onig_region_copy
+#else
+#define re_error_code_to_str onig_error_code_to_str
+#define re_copy_registers onig_region_copy
+#endif
+
+#ifdef ONIG_RUBY_M17N
+ONIG_EXTERN
+void re_mbcinit P_((OnigEncoding));
+#else
+ONIG_EXTERN
+void re_mbcinit P_((int));
+#endif
+
+ONIG_EXTERN
+int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
+ONIG_EXTERN
+int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
+ONIG_EXTERN
+void re_free_pattern P_((struct re_pattern_buffer*));
+ONIG_EXTERN
+int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
+ONIG_EXTERN
+int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
+ONIG_EXTERN
+int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
+ONIG_EXTERN
+void re_set_casetable P_((const char*));
+ONIG_EXTERN
+void re_free_registers P_((struct re_registers*));
+ONIG_EXTERN
+int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
+
+#endif /* ONIGGNU_H */
diff --git a/oniguruma.h b/oniguruma.h
new file mode 100644
index 000000000..8654bff18
--- /dev/null
+++ b/oniguruma.h
@@ -0,0 +1,715 @@
+/**********************************************************************
+
+ oniguruma.h - Oniguruma (regular expression library)
+
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#ifndef ONIGURUMA_H
+#define ONIGURUMA_H
+
+#define ONIGURUMA
+#define ONIGURUMA_VERSION_MAJOR 2
+#define ONIGURUMA_VERSION_MINOR 2
+#define ONIGURUMA_VERSION_TEENY 4
+
+#ifndef P_
+#if defined(__STDC__) || defined(_WIN32)
+# define P_(args) args
+#else
+# define P_(args) ()
+#endif
+#endif
+
+#ifndef PV_
+#ifdef HAVE_STDARG_PROTOTYPES
+# define PV_(args) args
+#else
+# define PV_(args) ()
+#endif
+#endif
+
+#ifndef ONIG_EXTERN
+#if defined(_WIN32) && !defined(__CYGWIN__)
+#if defined(EXPORT) || defined(RUBY_EXPORT)
+#define ONIG_EXTERN extern __declspec(dllexport)
+#else
+#define ONIG_EXTERN extern __declspec(dllimport)
+#endif
+#endif
+#endif
+
+#ifndef ONIG_EXTERN
+#define ONIG_EXTERN extern
+#endif
+
+/* PART: character encoding */
+
+typedef unsigned char UChar;
+typedef unsigned long OnigCodePoint;
+typedef unsigned int OnigDistance;
+
+#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
+
+typedef struct {
+ OnigCodePoint from;
+ OnigCodePoint to;
+} OnigCodePointRange;
+
+#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16
+typedef struct {
+ int target_num;
+ int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
+ UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
+} OnigEncFoldMatchInfo;
+
+
+#if defined(RUBY_PLATFORM) && defined(M17N_H)
+
+#define ONIG_RUBY_M17N
+typedef m17n_encoding* OnigEncoding;
+
+#else
+
+typedef struct {
+ const char len_table[256];
+ const char* name;
+ int max_enc_len;
+ int is_fold_match;
+ int ctype_support_level; /* sb-only/full */
+ int is_continuous_sb_mb; /* code point is continuous from sb to mb */
+ OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end);
+ int (*code_to_mbclen)(OnigCodePoint code);
+ int (*code_to_mbc)(OnigCodePoint code, UChar *buf);
+ int (*mbc_to_lower)(UChar* p, UChar* lower);
+ int (*mbc_is_case_ambig)(UChar* p);
+ int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype);
+ int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
+ UChar* (*left_adjust_char_head)(UChar* start, UChar* s);
+ int (*is_allowed_reverse_match)(UChar* p, UChar* e);
+ int (*get_all_fold_match_code)(OnigCodePoint** codes);
+ int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info);
+} OnigEncodingType;
+
+typedef OnigEncodingType* OnigEncoding;
+
+ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN;
+ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
+ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
+ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
+ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
+
+#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
+#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
+#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2)
+#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3)
+#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4)
+#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5)
+#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6)
+#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7)
+#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8)
+#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9)
+#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10)
+#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11)
+#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13)
+#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14)
+#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
+#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
+#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
+#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
+#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
+#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
+#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN)
+#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS)
+#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8)
+#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
+#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
+
+#endif /* else RUBY && M17N */
+
+#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
+
+
+/* work size */
+#define ONIGENC_CODE_TO_MBC_MAXLEN 7
+#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
+
+/* character types */
+#define ONIGENC_CTYPE_ALPHA (1<< 0)
+#define ONIGENC_CTYPE_BLANK (1<< 1)
+#define ONIGENC_CTYPE_CNTRL (1<< 2)
+#define ONIGENC_CTYPE_DIGIT (1<< 3)
+#define ONIGENC_CTYPE_GRAPH (1<< 4)
+#define ONIGENC_CTYPE_LOWER (1<< 5)
+#define ONIGENC_CTYPE_PRINT (1<< 6)
+#define ONIGENC_CTYPE_PUNCT (1<< 7)
+#define ONIGENC_CTYPE_SPACE (1<< 8)
+#define ONIGENC_CTYPE_UPPER (1<< 9)
+#define ONIGENC_CTYPE_XDIGIT (1<<10)
+#define ONIGENC_CTYPE_WORD (1<<11)
+#define ONIGENC_CTYPE_ASCII (1<<12)
+#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
+
+/* ctype support level */
+#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0
+#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1
+
+
+#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte)
+
+#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
+#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
+#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1)
+#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
+#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
+#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
+ (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
+#define ONIGENC_IS_MBC_WORD(enc,s,end) \
+ ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
+
+
+#ifdef ONIG_RUBY_M17N
+
+#include <ctype.h> /* for isblank(), isgraph() */
+
+#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf)
+#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p)
+
+#define ONIGENC_IS_FOLD_MATCH(enc) FALSE
+#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE
+#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB
+#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
+ onigenc_is_allowed_reverse_match(enc, s, end)
+#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
+ onigenc_get_left_adjust_char_head(enc, start, s)
+#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0
+#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
+ ONIG_NO_SUPPORT_CONFIG
+#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b)
+#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
+#define ONIGENC_MBC_MAXLEN_DIST(enc) \
+ (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
+ : ONIG_INFINITE_DISTANCE)
+#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e))
+#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code))
+#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf)
+
+#if 0
+#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */
+#endif
+
+#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
+ onigenc_is_code_ctype(enc,code,ctype)
+
+#ifdef isblank
+# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code)
+#else
+# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t')
+#endif
+#ifdef isgraph
+# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code)
+#else
+# define ONIGENC_IS_CODE_GRAPH(enc,code) \
+ (isprint((int )code) && !isspace((int )code))
+#endif
+
+#define ONIGENC_IS_CODE_PRINT(enc,code) m17n_isprint(enc,code)
+#define ONIGENC_IS_CODE_ALNUM(enc,code) m17n_isalnum(enc,code)
+#define ONIGENC_IS_CODE_ALPHA(enc,code) m17n_isalpha(enc,code)
+#define ONIGENC_IS_CODE_LOWER(enc,code) m17n_islower(enc,code)
+#define ONIGENC_IS_CODE_UPPER(enc,code) m17n_isupper(enc,code)
+#define ONIGENC_IS_CODE_CNTRL(enc,code) m17n_iscntrl(enc,code)
+#define ONIGENC_IS_CODE_PUNCT(enc,code) m17n_ispunct(enc,code)
+#define ONIGENC_IS_CODE_SPACE(enc,code) m17n_isspace(enc,code)
+#define ONIGENC_IS_CODE_DIGIT(enc,code) m17n_isdigit(enc,code)
+#define ONIGENC_IS_CODE_XDIGIT(enc,code) m17n_isxdigit(enc,code)
+#define ONIGENC_IS_CODE_WORD(enc,code) m17n_iswchar(enc,code)
+
+ONIG_EXTERN
+int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
+ONIG_EXTERN
+int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+ONIG_EXTERN
+int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf));
+ONIG_EXTERN
+int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p));
+ONIG_EXTERN
+int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end));
+
+#else /* ONIG_RUBY_M17N */
+
+#define ONIGENC_NAME(enc) ((enc)->name)
+
+#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf)
+#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p)
+
+#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match)
+#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb)
+#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level)
+#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
+ (enc)->is_allowed_reverse_match(s,end)
+#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
+ (enc)->left_adjust_char_head(start, s)
+#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \
+ (enc)->get_all_fold_match_code(codes)
+#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \
+ (enc)->get_fold_match_info(p,end,info)
+#define ONIGENC_STEP_BACK(enc,start,s,n) \
+ onigenc_step_back((enc),(start),(s),(n))
+
+#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)])
+#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
+#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
+#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e))
+#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
+#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
+
+#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype)
+
+#define ONIGENC_IS_CODE_GRAPH(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
+#define ONIGENC_IS_CODE_PRINT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
+#define ONIGENC_IS_CODE_ALNUM(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
+#define ONIGENC_IS_CODE_ALPHA(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
+#define ONIGENC_IS_CODE_LOWER(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
+#define ONIGENC_IS_CODE_UPPER(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
+#define ONIGENC_IS_CODE_CNTRL(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
+#define ONIGENC_IS_CODE_PUNCT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
+#define ONIGENC_IS_CODE_SPACE(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
+#define ONIGENC_IS_CODE_BLANK(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
+#define ONIGENC_IS_CODE_DIGIT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
+#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
+#define ONIGENC_IS_CODE_WORD(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
+
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
+ (enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr)
+
+ONIG_EXTERN
+UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n));
+
+#endif /* is not ONIG_RUBY_M17N */
+
+
+/* encoding API */
+ONIG_EXTERN
+int onigenc_init P_(());
+ONIG_EXTERN
+int onigenc_set_default_encoding P_((OnigEncoding enc));
+ONIG_EXTERN
+OnigEncoding onigenc_get_default_encoding P_(());
+ONIG_EXTERN
+void onigenc_set_default_caseconv_table P_((UChar* table));
+ONIG_EXTERN
+UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, UChar* start, UChar* s, UChar** prev));
+ONIG_EXTERN
+UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
+ONIG_EXTERN
+UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
+ONIG_EXTERN
+UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
+
+
+
+/* PART: regular expression */
+
+/* config parameters */
+#define ONIG_NREGION 10
+#define ONIG_MAX_BACKREF_NUM 1000
+#define ONIG_MAX_REPEAT_NUM 100000
+#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 1000
+/* constants */
+#define ONIG_MAX_ERROR_MESSAGE_LEN 90
+
+#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N)
+ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
+#undef ismbchar
+#define ismbchar(c) (mbclen((c)) != 1)
+#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)])
+#endif
+
+typedef unsigned int OnigOptionType;
+
+#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
+
+/* options */
+#define ONIG_OPTION_NONE 0
+#define ONIG_OPTION_IGNORECASE 1L
+#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
+#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
+#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
+#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
+#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
+#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
+#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
+#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
+/* options (search time) */
+#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
+#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
+#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
+
+#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
+#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
+#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
+
+/* syntax */
+typedef struct {
+ unsigned int op;
+ unsigned int op2;
+ unsigned int behavior;
+ OnigOptionType options; /* default option */
+} OnigSyntaxType;
+
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
+
+/* predefined syntaxes (see regparse.c) */
+#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
+#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
+#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
+#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
+#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
+#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
+#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
+#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
+
+/* default syntax */
+ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
+#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
+
+/* syntax (operators) */
+#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1<<0)
+#define ONIG_SYN_OP_DOT_ANYCHAR (1<<1) /* . */
+#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1<<2) /* * */
+#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1<<3)
+#define ONIG_SYN_OP_PLUS_ONE_INF (1<<4) /* + */
+#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1<<5)
+#define ONIG_SYN_OP_QMARK_ZERO_ONE (1<<6) /* ? */
+#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1<<7)
+#define ONIG_SYN_OP_BRACE_INTERVAL (1<<8) /* {lower,upper} */
+#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1<<9) /* \{lower,upper\} */
+#define ONIG_SYN_OP_VBAR_ALT (1<<10) /* | */
+#define ONIG_SYN_OP_ESC_VBAR_ALT (1<<11) /* \| */
+#define ONIG_SYN_OP_LPAREN_SUBEXP (1<<12) /* (...) */
+#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1<<13) /* \(...\) */
+#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1<<14) /* \A, \Z, \z */
+#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1<<15) /* \G */
+#define ONIG_SYN_OP_DECIMAL_BACKREF (1<<16) /* \num */
+#define ONIG_SYN_OP_BRACKET_CC (1<<17) /* [...] */
+#define ONIG_SYN_OP_ESC_W_WORD (1<<18) /* \w, \W */
+#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1<<19) /* \<. \> */
+#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1<<20) /* \b, \B */
+#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1<<21) /* \s, \S */
+#define ONIG_SYN_OP_ESC_D_DIGIT (1<<22) /* \d, \D */
+#define ONIG_SYN_OP_LINE_ANCHOR (1<<23) /* ^, $ */
+#define ONIG_SYN_OP_POSIX_BRACKET (1<<24) /* [:xxxx:] */
+#define ONIG_SYN_OP_QMARK_NON_GREEDY (1<<25) /* ??,*?,+?,{n,m}? */
+#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1<<26) /* \n,\r,\t,\a ... */
+#define ONIG_SYN_OP_ESC_C_CONTROL (1<<27) /* \cx */
+#define ONIG_SYN_OP_ESC_OCTAL3 (1<<28) /* \OOO */
+#define ONIG_SYN_OP_ESC_X_HEX2 (1<<29) /* \xHH */
+#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1<<30) /* \x{7HHHHHHH} */
+
+#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1<<0) /* \Q...\E */
+#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1<<1) /* (?...) */
+#define ONIG_SYN_OP2_OPTION_PERL (1<<2) /* (?imsx),(?-imsx) */
+#define ONIG_SYN_OP2_OPTION_RUBY (1<<3) /* (?imx), (?-imx) */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1<<4) /* ?+,*+,++ */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1<<5) /* {n,m}+ */
+#define ONIG_SYN_OP2_CCLASS_SET_OP (1<<6) /* [...&&..[..]..] */
+#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1<<7) /* (?<name>...) */
+#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1<<8) /* \k<name> */
+#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1<<9) /* \g<name>, \g<n> */
+#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1<<10) /* (?@..),(?@<x>..) */
+#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<11) /* \C-x */
+#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1<<12) /* \M-x */
+#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */
+#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */
+#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */
+#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
+
+/* syntax (behavior) */
+#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */
+#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1<<0) /* ?, *, +, {n,m} */
+#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1<<1) /* error or ignore */
+#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<2) /* ...)... */
+#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1<<3) /* {??? */
+#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1<<4) /* {,n} => {0,n} */
+#define ONIG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ ..*/
+#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */
+#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */
+#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */
+
+/* syntax (behavior) in char class [...] */
+#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */
+#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1<<21) /* [..\w..] etc.. */
+#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<22)
+#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1<<23) /* [0-9-a]=[0-9\-a] */
+/* syntax (behavior) warning */
+#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1<<24) /* [,-,] */
+#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1<<25) /* (?:a*)+ */
+
+/* meta character specifiers (onig_set_meta_char()) */
+#define ONIG_META_CHAR_ESCAPE 0
+#define ONIG_META_CHAR_ANYCHAR 1
+#define ONIG_META_CHAR_ANYTIME 2
+#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
+#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
+#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
+
+#define ONIG_INEFFECTIVE_META_CHAR 0
+
+/* error codes */
+#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
+/* normal return */
+#define ONIG_NORMAL 0
+#define ONIG_MISMATCH -1
+#define ONIG_NO_SUPPORT_CONFIG -2
+/* internal error */
+#define ONIGERR_PARSER_BUG -11
+#define ONIGERR_STACK_BUG -12
+#define ONIGERR_UNDEFINED_BYTECODE -13
+#define ONIGERR_UNEXPECTED_BYTECODE -14
+#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
+#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
+#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
+/* general error */
+#define ONIGERR_INVALID_ARGUMENT -30
+/* syntax error */
+#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
+#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
+#define ONIGERR_EMPTY_CHAR_CLASS -102
+#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
+#define ONIGERR_END_PATTERN_AT_BACKSLASH -104
+#define ONIGERR_END_PATTERN_AT_META -105
+#define ONIGERR_END_PATTERN_AT_CONTROL -106
+#define ONIGERR_META_CODE_SYNTAX -108
+#define ONIGERR_CONTROL_CODE_SYNTAX -109
+#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
+#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
+#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
+#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
+#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
+#define ONIGERR_NESTED_REPEAT_OPERATOR -115
+#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
+#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
+#define ONIGERR_END_PATTERN_IN_GROUP -118
+#define ONIGERR_UNDEFINED_GROUP_OPTION -119
+#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
+#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
+#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
+/* values error (syntax error) */
+#define ONIGERR_TOO_BIG_NUMBER -200
+#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
+#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
+#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
+#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
+#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
+#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
+#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
+#define ONIGERR_INVALID_BACKREF -208
+#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
+#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
+#define ONIGERR_EMPTY_GROUP_NAME -214
+#define ONIGERR_INVALID_GROUP_NAME -215
+#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
+#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
+#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
+#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
+#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
+#define ONIGERR_NEVER_ENDING_RECURSION -221
+#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
+#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
+/* errors related to thread */
+#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
+
+
+/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
+#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
+#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
+ ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
+
+/* match result region type */
+struct re_registers {
+ int allocated;
+ int num_regs;
+ int* beg;
+ int* end;
+ /* extended */
+ struct re_registers** list; /* capture history. list[1]-list[31] */
+};
+
+#define ONIG_REGION_NOTPOS -1
+
+typedef struct re_registers OnigRegion;
+
+typedef struct {
+ UChar* par;
+ UChar* par_end;
+} OnigErrorInfo;
+
+typedef struct {
+ int lower;
+ int upper;
+} OnigRepeatRange;
+
+typedef void (*OnigWarnFunc) P_((char* s));
+extern void onig_null_warn P_((char* s));
+#define ONIG_NULL_WARN onig_null_warn
+
+#define ONIG_CHAR_TABLE_SIZE 256
+
+/* regex_t state */
+#define ONIG_STATE_NORMAL 0
+#define ONIG_STATE_SEARCHING 1
+#define ONIG_STATE_COMPILING -1
+#define ONIG_STATE_MODIFY -2
+
+#define ONIG_STATE(reg) \
+ ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
+
+typedef struct re_pattern_buffer {
+ /* common members of BBuf(bytes-buffer) */
+ unsigned char* p; /* compiled pattern */
+ unsigned int used; /* used space for p */
+ unsigned int alloc; /* allocated space for p */
+
+ int state; /* normal, searching, compiling */
+ int num_mem; /* used memory(...) num counted from 1 */
+ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
+ int num_null_check; /* OP_NULL_CHECK_START/END id counter */
+ int num_call; /* number of subexp call */
+ unsigned int capture_history; /* (?@...) flag (1-31) */
+ unsigned int bt_mem_start; /* need backtrack flag */
+ unsigned int bt_mem_end; /* need backtrack flag */
+ int stack_pop_level;
+ int repeat_range_alloc;
+ OnigRepeatRange* repeat_range;
+
+ OnigEncoding enc;
+ OnigOptionType options;
+ OnigSyntaxType* syntax;
+ void* name_table;
+
+ /* optimization info (string search, char-map and anchors) */
+ int optimize; /* optimize flag */
+ int threshold_len; /* search str-length for apply optimize */
+ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
+ OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
+ OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
+ int sub_anchor; /* start-anchor for exact or map */
+ unsigned char *exact;
+ unsigned char *exact_end;
+ unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
+ int *int_map; /* BM skip for exact_len > 255 */
+ int *int_map_backward; /* BM skip for backward search */
+ OnigDistance dmin; /* min-distance of exact or map */
+ OnigDistance dmax; /* max-distance of exact or map */
+
+ /* regex_t link chain */
+ struct re_pattern_buffer* chain; /* escape compile-conflict */
+} regex_t;
+
+
+/* Oniguruma Native API */
+ONIG_EXTERN
+int onig_init P_((void));
+ONIG_EXTERN
+int onig_error_code_to_str PV_((UChar* s, int err_code, ...));
+ONIG_EXTERN
+void onig_set_warn_func P_((OnigWarnFunc f));
+ONIG_EXTERN
+void onig_set_verb_warn_func P_((OnigWarnFunc f));
+ONIG_EXTERN
+int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+ONIG_EXTERN
+void onig_free P_((regex_t*));
+ONIG_EXTERN
+int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+ONIG_EXTERN
+int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option));
+ONIG_EXTERN
+int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option));
+ONIG_EXTERN
+OnigRegion* onig_region_new P_((void));
+ONIG_EXTERN
+void onig_region_free P_((OnigRegion* region, int free_self));
+ONIG_EXTERN
+void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
+ONIG_EXTERN
+void onig_region_clear P_((OnigRegion* region));
+ONIG_EXTERN
+int onig_region_resize P_((OnigRegion* region, int n));
+ONIG_EXTERN
+int onig_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end,
+ int** nums));
+ONIG_EXTERN
+int onig_name_to_backref_number P_((regex_t* reg, UChar* name, UChar* name_end, OnigRegion *region));
+ONIG_EXTERN
+int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), void* arg));
+ONIG_EXTERN
+int onig_number_of_names P_((regex_t* reg));
+ONIG_EXTERN
+OnigEncoding onig_get_encoding P_((regex_t* reg));
+ONIG_EXTERN
+OnigOptionType onig_get_options P_((regex_t* reg));
+ONIG_EXTERN
+OnigSyntaxType* onig_get_syntax P_((regex_t* reg));
+ONIG_EXTERN
+int onig_set_default_syntax P_((OnigSyntaxType* syntax));
+ONIG_EXTERN
+void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
+ONIG_EXTERN
+void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
+ONIG_EXTERN
+void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
+ONIG_EXTERN
+void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
+ONIG_EXTERN
+void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
+ONIG_EXTERN
+int onig_set_meta_char P_((unsigned int what, unsigned int c));
+ONIG_EXTERN
+int onig_end P_((void));
+ONIG_EXTERN
+const char* onig_version P_((void));
+
+#endif /* ONIGURUMA_H */
diff --git a/regcomp.c b/regcomp.c
new file mode 100644
index 000000000..24d44dd1b
--- /dev/null
+++ b/regcomp.c
@@ -0,0 +1,5440 @@
+/**********************************************************************
+
+ regcomp.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regparse.h"
+
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
+#endif
+
+static void
+swap_node(Node* a, Node* b)
+{
+ Node c;
+ c = *a; *a = *b; *b = c;
+}
+
+static OnigDistance
+distance_add(OnigDistance d1, OnigDistance d2)
+{
+ if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
+ return ONIG_INFINITE_DISTANCE;
+ else {
+ if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
+ else return ONIG_INFINITE_DISTANCE;
+ }
+}
+
+static OnigDistance
+distance_multiply(OnigDistance d, int m)
+{
+ if (m == 0) return 0;
+
+ if (d < ONIG_INFINITE_DISTANCE / m)
+ return d * m;
+ else
+ return ONIG_INFINITE_DISTANCE;
+}
+
+static int
+bitset_is_empty(BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ if (bs[i] != 0) return 0;
+ }
+ return 1;
+}
+
+#ifdef ONIG_DEBUG
+static int
+bitset_on_num(BitSetRef bs)
+{
+ int i, n;
+
+ n = 0;
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(bs, i)) n++;
+ }
+ return n;
+}
+#endif
+
+extern int
+onig_bbuf_init(BBuf* buf, int size)
+{
+ buf->p = (UChar* )xmalloc(size);
+ if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
+
+ buf->alloc = size;
+ buf->used = 0;
+ return 0;
+}
+
+
+#ifdef USE_SUBEXP_CALL
+
+static int
+unset_addr_list_init(UnsetAddrList* uslist, int size)
+{
+ UnsetAddr* p;
+
+ p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ uslist->num = 0;
+ uslist->alloc = size;
+ uslist->us = p;
+ return 0;
+}
+
+static void
+unset_addr_list_end(UnsetAddrList* uslist)
+{
+ if (IS_NOT_NULL(uslist->us))
+ xfree(uslist->us);
+}
+
+static int
+unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
+{
+ UnsetAddr* p;
+ int size;
+
+ if (uslist->num >= uslist->alloc) {
+ size = uslist->alloc * 2;
+ p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ uslist->alloc = size;
+ uslist->us = p;
+ }
+
+ uslist->us[uslist->num].offset = offset;
+ uslist->us[uslist->num].target = node;
+ uslist->num++;
+ return 0;
+}
+#endif /* USE_SUBEXP_CALL */
+
+
+#if 0
+static int
+bitset_mbmaxlen(BitSetRef bs, int negative, OnigEncoding enc)
+{
+ int i;
+ int len, maxlen = 0;
+
+ if (negative) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (! BITSET_AT(bs, i)) {
+ len = enc_len(enc, i);
+ if (len > maxlen) maxlen = len;
+ }
+ }
+ }
+ else {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(bs, i)) {
+ len = enc_len(enc, i);
+ if (len > maxlen) maxlen = len;
+ }
+ }
+ }
+ return maxlen;
+}
+#endif
+
+static int
+add_opcode(regex_t* reg, int opcode)
+{
+ BBUF_ADD1(reg, opcode);
+ return 0;
+}
+
+static int
+add_rel_addr(regex_t* reg, int addr)
+{
+ RelAddrType ra = (RelAddrType )addr;
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ BBUF_ADD(reg, &ra, SIZE_RELADDR);
+#else
+ UChar buf[SERIALIZE_BUFSIZE];
+ SERIALIZE_RELADDR(ra, buf);
+ BBUF_ADD(reg, buf, SIZE_RELADDR);
+#endif
+ return 0;
+}
+
+static int
+add_abs_addr(regex_t* reg, int addr)
+{
+ AbsAddrType ra = (AbsAddrType )addr;
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ BBUF_ADD(reg, &ra, SIZE_ABSADDR);
+#else
+ UChar buf[SERIALIZE_BUFSIZE];
+ SERIALIZE_ABSADDR(ra, buf);
+ BBUF_ADD(reg, buf, SIZE_ABSADDR);
+#endif
+ return 0;
+}
+
+static int
+add_length(regex_t* reg, int len)
+{
+ LengthType l = (LengthType )len;
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ BBUF_ADD(reg, &l, SIZE_LENGTH);
+#else
+ UChar buf[SERIALIZE_BUFSIZE];
+ SERIALIZE_LENGTH(l, buf);
+ BBUF_ADD(reg, buf, SIZE_LENGTH);
+#endif
+ return 0;
+}
+
+static int
+add_mem_num(regex_t* reg, int num)
+{
+ MemNumType n = (MemNumType )num;
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ BBUF_ADD(reg, &n, SIZE_MEMNUM);
+#else
+ UChar buf[SERIALIZE_BUFSIZE];
+ SERIALIZE_MEMNUM(n, buf);
+ BBUF_ADD(reg, buf, SIZE_MEMNUM);
+#endif
+ return 0;
+}
+
+#if 0
+static int
+add_repeat_num(regex_t* reg, int num)
+{
+ RepeatNumType n = (RepeatNumType )num;
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ BBUF_ADD(reg, &n, SIZE_REPEATNUM);
+#else
+ UChar buf[SERIALIZE_BUFSIZE];
+ SERIALIZE_REPEATNUM(n, buf);
+ BBUF_ADD(reg, buf, SIZE_REPEATNUM);
+#endif
+ return 0;
+}
+#endif
+
+static int
+add_option(regex_t* reg, OnigOptionType option)
+{
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ BBUF_ADD(reg, &option, SIZE_OPTION);
+#else
+ UChar buf[SERIALIZE_BUFSIZE];
+ SERIALIZE_OPTION(option, buf);
+ BBUF_ADD(reg, buf, SIZE_OPTION);
+#endif
+ return 0;
+}
+
+static int
+add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
+{
+ int r;
+
+ r = add_opcode(reg, opcode);
+ if (r) return r;
+ r = add_rel_addr(reg, addr);
+ return r;
+}
+
+static int
+add_bytes(regex_t* reg, UChar* bytes, int len)
+{
+ BBUF_ADD(reg, bytes, len);
+ return 0;
+}
+
+static int
+add_bitset(regex_t* reg, BitSetRef bs)
+{
+ BBUF_ADD(reg, bs, SIZE_BITSET);
+ return 0;
+}
+
+static int
+add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
+{
+ int r;
+
+ r = add_opcode(reg, opcode);
+ if (r) return r;
+ r = add_option(reg, option);
+ return r;
+}
+
+static int compile_length_tree(Node* node, regex_t* reg);
+static int compile_tree(Node* node, regex_t* reg);
+
+
+#define IS_NEED_STR_LEN_OP_EXACT(op) \
+ ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
+ (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
+
+static int
+select_str_opcode(int mb_len, int str_len, int ignore_case)
+{
+ int op;
+
+ switch (mb_len) {
+ case 1:
+ if (ignore_case) {
+ switch (str_len) {
+ case 1: op = OP_EXACT1_IC; break;
+ default: op = OP_EXACTN_IC; break;
+ }
+ }
+ else {
+ switch (str_len) {
+ case 1: op = OP_EXACT1; break;
+ case 2: op = OP_EXACT2; break;
+ case 3: op = OP_EXACT3; break;
+ case 4: op = OP_EXACT4; break;
+ case 5: op = OP_EXACT5; break;
+ default: op = OP_EXACTN; break;
+ }
+ }
+ break;
+
+ case 2:
+ switch (str_len) {
+ case 1: op = OP_EXACTMB2N1; break;
+ case 2: op = OP_EXACTMB2N2; break;
+ case 3: op = OP_EXACTMB2N3; break;
+ default: op = OP_EXACTMB2N; break;
+ }
+ break;
+
+ case 3:
+ op = OP_EXACTMB3N;
+ break;
+
+ default:
+ op = OP_EXACTMBN;
+ break;
+ }
+ return op;
+}
+
+static int
+compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
+{
+ int r;
+ int saved_num_null_check = reg->num_null_check;
+
+ if (empty_info != 0) {
+ r = add_opcode(reg, OP_NULL_CHECK_START);
+ if (r) return r;
+ r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
+ if (r) return r;
+ reg->num_null_check++;
+ }
+
+ r = compile_tree(node, reg);
+ if (r) return r;
+
+ if (empty_info != 0) {
+ if (empty_info == NQ_TARGET_IS_EMPTY)
+ r = add_opcode(reg, OP_NULL_CHECK_END);
+ else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
+ r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
+ else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
+ r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
+
+ if (r) return r;
+ r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
+ }
+ return r;
+}
+
+#ifdef USE_SUBEXP_CALL
+static int
+compile_call(CallNode* node, regex_t* reg)
+{
+ int r;
+
+ r = add_opcode(reg, OP_CALL);
+ if (r) return r;
+ r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
+ node->target);
+ if (r) return r;
+ r = add_abs_addr(reg, 0 /*dummy addr.*/);
+ return r;
+}
+#endif
+
+static int
+compile_tree_n_times(Node* node, int n, regex_t* reg)
+{
+ int i, r;
+
+ for (i = 0; i < n; i++) {
+ r = compile_tree(node, reg);
+ if (r) return r;
+ }
+ return 0;
+}
+
+static int
+add_compile_string_length(UChar* s, int mb_len, int str_len,
+ regex_t* reg, int ignore_case)
+{
+ int len;
+ int op = select_str_opcode(mb_len, str_len, ignore_case);
+
+ len = SIZE_OPCODE;
+ if (op == OP_EXACTMBN)
+ len += SIZE_LENGTH;
+
+ if (IS_NEED_STR_LEN_OP_EXACT(op))
+ len += SIZE_LENGTH;
+
+ len += mb_len * str_len;
+ return len;
+}
+
+static int
+add_compile_string(UChar* s, int mb_len, int str_len,
+ regex_t* reg, int ignore_case)
+{
+ int op = select_str_opcode(mb_len, str_len, ignore_case);
+ add_opcode(reg, op);
+
+ if (op == OP_EXACTMBN)
+ add_length(reg, mb_len);
+
+ if (IS_NEED_STR_LEN_OP_EXACT(op))
+ add_length(reg, str_len);
+
+ add_bytes(reg, s, mb_len * str_len);
+ return 0;
+}
+
+
+static int
+compile_length_string_node(StrNode* sn, regex_t* reg)
+{
+ int rlen, r, len, prev_len, slen, ambig, ic;
+ OnigEncoding enc = reg->enc;
+ UChar *p, *prev;
+
+ if (sn->end <= sn->s)
+ return 0;
+
+ ic = IS_IGNORECASE(reg->options);
+
+ p = prev = sn->s;
+ prev_len = enc_len(enc, *p);
+ if (ic != 0 && prev_len == 1)
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
+ else
+ ambig = 0;
+
+ p += prev_len;
+ slen = 1;
+ rlen = 0;
+
+ for (; p < sn->end; ) {
+ len = enc_len(enc, *p);
+ if (len == prev_len) {
+ slen++;
+ if (ic != 0 && ambig == 0 && len == 1)
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
+ }
+ else {
+ r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ rlen += r;
+
+ if (ic != 0 && len == 1)
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
+ else
+ ambig = 0;
+
+ prev = p;
+ slen = 1;
+ prev_len = len;
+ }
+
+ p += len;
+ }
+ r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ rlen += r;
+ return rlen;
+}
+
+static int
+compile_length_string_raw_node(StrNode* sn, regex_t* reg)
+{
+ if (sn->end <= sn->s)
+ return 0;
+
+ return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
+}
+
+static int
+compile_string_node(StrNode* sn, regex_t* reg)
+{
+ int r, len, prev_len, slen, ambig, ic;
+ OnigEncoding enc = reg->enc;
+ UChar *p, *prev;
+
+ if (sn->end <= sn->s)
+ return 0;
+
+ ic = IS_IGNORECASE(reg->options);
+
+ p = prev = sn->s;
+ prev_len = enc_len(enc, *p);
+ if (ic != 0 && prev_len == 1) {
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
+ if (ambig != 0)
+ ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
+ }
+ else
+ ambig = 0;
+
+ p += prev_len;
+ slen = 1;
+
+ for (; p < sn->end; ) {
+ len = enc_len(enc, *p);
+ if (len == prev_len) {
+ slen++;
+ if (ic != 0 && len == 1) {
+ if (ambig == 0)
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
+ if (ambig != 0) ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
+ }
+ }
+ else {
+ r = add_compile_string(prev, prev_len, slen, reg, ambig);
+ if (r) return r;
+ if (ic != 0 && len == 1) {
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
+ if (ambig != 0) ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
+ }
+ else
+ ambig = 0;
+
+ prev = p;
+ slen = 1;
+ prev_len = len;
+ }
+
+ p += len;
+ }
+ return add_compile_string(prev, prev_len, slen, reg, ambig);
+}
+
+static int
+compile_string_raw_node(StrNode* sn, regex_t* reg)
+{
+ if (sn->end <= sn->s)
+ return 0;
+
+ return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
+}
+
+static int
+add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
+{
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ add_length(reg, mbuf->used);
+ return add_bytes(reg, mbuf->p, mbuf->used);
+#else
+ int r, pad_size;
+ UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
+
+ GET_ALIGNMENT_PAD_SIZE(p, pad_size);
+ add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
+ if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
+
+ r = add_bytes(reg, mbuf->p, mbuf->used);
+
+ /* padding for return value from compile_length_cclass_node() to be fix. */
+ pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
+ if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
+ return r;
+#endif
+}
+
+static int
+compile_length_cclass_node(CClassNode* cc, regex_t* reg)
+{
+ int len;
+
+ if (IS_NULL(cc->mbuf)) {
+ len = SIZE_OPCODE + SIZE_BITSET;
+ }
+ else {
+ if (bitset_is_empty(cc->bs)) {
+ /* SIZE_BITSET is included in mbuf->used. */
+ len = SIZE_OPCODE;
+ }
+ else {
+ len = SIZE_OPCODE + SIZE_BITSET;
+ }
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ len += SIZE_LENGTH + cc->mbuf->used;
+#else
+ len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
+#endif
+ }
+
+ return len;
+}
+
+static int
+compile_cclass_node(CClassNode* cc, regex_t* reg)
+{
+ int r;
+
+ if (IS_NULL(cc->mbuf)) {
+ if (cc->not) add_opcode(reg, OP_CCLASS_NOT);
+ else add_opcode(reg, OP_CCLASS);
+
+ r = add_bitset(reg, cc->bs);
+ }
+ else {
+ if (bitset_is_empty(cc->bs)) {
+ if (cc->not) add_opcode(reg, OP_CCLASS_MB_NOT);
+ else add_opcode(reg, OP_CCLASS_MB);
+
+ r = add_multi_byte_cclass(cc->mbuf, reg);
+ }
+ else {
+ if (cc->not) add_opcode(reg, OP_CCLASS_MIX_NOT);
+ else add_opcode(reg, OP_CCLASS_MIX);
+
+ r = add_bitset(reg, cc->bs);
+ if (r) return r;
+ r = add_multi_byte_cclass(cc->mbuf, reg);
+ }
+ }
+
+ return r;
+}
+
+static int
+entry_repeat_range(regex_t* reg, int id, int lower, int upper)
+{
+#define REPEAT_RANGE_ALLOC 4
+
+ OnigRepeatRange* p;
+
+ if (reg->repeat_range_alloc == 0) {
+ p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ reg->repeat_range = p;
+ reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
+ }
+ else if (reg->repeat_range_alloc <= id) {
+ int n;
+ n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
+ p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
+ sizeof(OnigRepeatRange) * n);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ reg->repeat_range = p;
+ reg->repeat_range_alloc = n;
+ }
+ else {
+ p = reg->repeat_range;
+ }
+
+ p[id].lower = lower;
+ p[id].upper = upper;
+ return 0;
+}
+
+static int
+compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
+ regex_t* reg)
+{
+ int r;
+ int num_repeat = reg->num_repeat;
+
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
+ if (r) return r;
+ r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
+ reg->num_repeat++;
+ if (r) return r;
+ r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
+ if (r) return r;
+
+ r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
+ if (r) return r;
+
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
+ if (r) return r;
+ r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
+ return r;
+}
+
+#define QUALIFIER_EXPAND_LIMIT_SIZE 50
+
+static int
+compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
+{
+ int len, mod_tlen;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ /* anychar repeat */
+ if (NTYPE(qn->target) == N_ANYCHAR) {
+ if (qn->greedy && infinite) {
+ if (IS_NOT_NULL(qn->next_head_exact))
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
+ else
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite &&
+ (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) {
+ len = SIZE_OP_JUMP;
+ }
+ else {
+ len = tlen * qn->lower;
+ }
+
+ if (qn->greedy) {
+ if (IS_NOT_NULL(qn->head_exact))
+ len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
+ else if (IS_NOT_NULL(qn->next_head_exact))
+ len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
+ else
+ len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
+ }
+ else
+ len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
+ }
+ else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ len = SIZE_OP_JUMP + tlen;
+ }
+ else if (!infinite && qn->greedy &&
+ (tlen + SIZE_OP_PUSH) * qn->upper <= QUALIFIER_EXPAND_LIMIT_SIZE) {
+ len = tlen * qn->lower;
+ len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
+ }
+ else {
+ len = SIZE_OP_REPEAT_INC
+ + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
+ }
+
+ return len;
+}
+
+static int
+is_anychar_star_qualifier(QualifierNode* qn)
+{
+ if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+ NTYPE(qn->target) == N_ANYCHAR)
+ return 1;
+ else
+ return 0;
+}
+
+static int
+compile_qualifier_node(QualifierNode* qn, regex_t* reg)
+{
+ int i, r, mod_tlen;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ if (is_anychar_star_qualifier(qn)) {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ if (IS_NOT_NULL(qn->next_head_exact)) {
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ else
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ if (r) return r;
+ return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+ }
+ else {
+ if (IS_MULTILINE(reg->options))
+ return add_opcode(reg, OP_ANYCHAR_ML_STAR);
+ else
+ return add_opcode(reg, OP_ANYCHAR_STAR);
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite &&
+ (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) {
+ if (qn->greedy) {
+ if (IS_NOT_NULL(qn->head_exact))
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
+ else if (IS_NOT_NULL(qn->next_head_exact))
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
+ else
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
+ }
+ if (r) return r;
+ }
+ else {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ }
+
+ if (qn->greedy) {
+ if (IS_NOT_NULL(qn->head_exact)) {
+ r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
+ mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ add_bytes(reg, NSTRING(qn->head_exact).s, 1);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
+ }
+ else if (IS_NOT_NULL(qn->next_head_exact)) {
+ r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
+ mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
+ }
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+ }
+ }
+ else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else if (!infinite && qn->greedy &&
+ (tlen + SIZE_OP_PUSH) * qn->upper <= QUALIFIER_EXPAND_LIMIT_SIZE) {
+ int n = qn->upper - qn->lower;
+
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+
+ for (i = 0; i < n; i++) {
+ r = add_opcode_rel_addr(reg, OP_PUSH,
+ (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ if (r) return r;
+ }
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else {
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+ }
+ return r;
+}
+
+static int
+compile_length_option_node(EffectNode* node, regex_t* reg)
+{
+ int tlen;
+ OnigOptionType prev = reg->options;
+
+ reg->options = node->option;
+ tlen = compile_length_tree(node->target, reg);
+ reg->options = prev;
+
+ if (tlen < 0) return tlen;
+
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
+ return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
+ + tlen + SIZE_OP_SET_OPTION;
+ }
+ else
+ return tlen;
+}
+
+static int
+compile_option_node(EffectNode* node, regex_t* reg)
+{
+ int r;
+ OnigOptionType prev = reg->options;
+
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
+ r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
+ if (r) return r;
+ r = add_opcode_option(reg, OP_SET_OPTION, prev);
+ if (r) return r;
+ r = add_opcode(reg, OP_FAIL);
+ if (r) return r;
+
+ reg->options = node->option;
+ r = compile_tree(node->target, reg);
+ reg->options = prev;
+ if (r) return r;
+ r = add_opcode_option(reg, OP_SET_OPTION, prev);
+ }
+ else {
+ reg->options = node->option;
+ r = compile_tree(node->target, reg);
+ reg->options = prev;
+ }
+ return r;
+}
+
+static int
+compile_length_effect_node(EffectNode* node, regex_t* reg)
+{
+ int len;
+ int tlen;
+
+ if (node->type == EFFECT_OPTION)
+ return compile_length_option_node(node, reg);
+
+ if (node->target) {
+ tlen = compile_length_tree(node->target, reg);
+ if (tlen < 0) return tlen;
+ }
+ else
+ tlen = 0;
+
+ switch (node->type) {
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_CALLED(node)) {
+ len = SIZE_OP_MEMORY_START_PUSH + tlen
+ + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ len += (IS_EFFECT_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ else
+ len += (IS_EFFECT_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ }
+ else
+#endif
+ {
+ if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
+ len = SIZE_OP_MEMORY_START_PUSH;
+ else
+ len = SIZE_OP_MEMORY_START;
+
+ len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
+ ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
+ }
+ break;
+
+ case EFFECT_STOP_BACKTRACK:
+ if (IS_EFFECT_SIMPLE_REPEAT(node)) {
+ QualifierNode* qn = &NQUALIFIER(node->target);
+ tlen = compile_length_tree(qn->target, reg);
+ if (tlen < 0) return tlen;
+
+ len = tlen * qn->lower
+ + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
+ }
+ else {
+ len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
+ }
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return len;
+}
+
+static int get_char_length_tree(Node* node, regex_t* reg, int* len);
+
+static int
+compile_effect_node(EffectNode* node, regex_t* reg)
+{
+ int r, len;
+
+ if (node->type == EFFECT_OPTION)
+ return compile_option_node(node, reg);
+
+ switch (node->type) {
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_CALLED(node)) {
+ r = add_opcode(reg, OP_CALL);
+ if (r) return r;
+ node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
+ node->state |= NST_ADDR_FIXED;
+ r = add_abs_addr(reg, (int )node->call_addr);
+ if (r) return r;
+ len = compile_length_tree(node->target, reg);
+ len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ len += (IS_EFFECT_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ else
+ len += (IS_EFFECT_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+
+ r = add_opcode_rel_addr(reg, OP_JUMP, len);
+ if (r) return r;
+ }
+#endif
+ if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
+ r = add_opcode(reg, OP_MEMORY_START_PUSH);
+ else
+ r = add_opcode(reg, OP_MEMORY_START);
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_CALLED(node)) {
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
+ ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
+ else
+ r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
+ ? OP_MEMORY_END_REC : OP_MEMORY_END));
+
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ if (r) return r;
+ r = add_opcode(reg, OP_RETURN);
+ }
+ else
+#endif
+ {
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ r = add_opcode(reg, OP_MEMORY_END_PUSH);
+ else
+ r = add_opcode(reg, OP_MEMORY_END);
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ }
+ break;
+
+ case EFFECT_STOP_BACKTRACK:
+ if (IS_EFFECT_SIMPLE_REPEAT(node)) {
+ QualifierNode* qn = &NQUALIFIER(node->target);
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+
+ len = compile_length_tree(qn->target, reg);
+ if (len < 0) return len;
+
+ r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_POP);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
+ }
+ else {
+ r = add_opcode(reg, OP_PUSH_STOP_BT);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_POP_STOP_BT);
+ }
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+compile_length_anchor_node(AnchorNode* node, regex_t* reg)
+{
+ int len;
+ int tlen = 0;
+
+ if (node->target) {
+ tlen = compile_length_tree(node->target, reg);
+ if (tlen < 0) return tlen;
+ }
+
+ switch (node->type) {
+ case ANCHOR_PREC_READ:
+ len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
+ break;
+ case ANCHOR_PREC_READ_NOT:
+ len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
+ break;
+ case ANCHOR_LOOK_BEHIND:
+ len = SIZE_OP_LOOK_BEHIND + tlen;
+ break;
+ case ANCHOR_LOOK_BEHIND_NOT:
+ len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
+ break;
+
+ default:
+ len = SIZE_OPCODE;
+ break;
+ }
+
+ return len;
+}
+
+static int
+compile_anchor_node(AnchorNode* node, regex_t* reg)
+{
+ int r, len;
+
+ switch (node->type) {
+ case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
+ case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
+ case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
+ case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
+ case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
+ case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
+
+ case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
+ case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
+#ifdef USE_WORD_BEGIN_END
+ case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
+ case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
+#endif
+
+ case ANCHOR_PREC_READ:
+ r = add_opcode(reg, OP_PUSH_POS);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_POP_POS);
+ break;
+
+ case ANCHOR_PREC_READ_NOT:
+ len = compile_length_tree(node->target, reg);
+ if (len < 0) return len;
+ r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_FAIL_POS);
+ break;
+
+ case ANCHOR_LOOK_BEHIND:
+ {
+ int n;
+ r = add_opcode(reg, OP_LOOK_BEHIND);
+ if (r) return r;
+ if (node->char_len < 0) {
+ r = get_char_length_tree(node->target, reg, &n);
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ }
+ else
+ n = node->char_len;
+ r = add_length(reg, n);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ }
+ break;
+
+ case ANCHOR_LOOK_BEHIND_NOT:
+ {
+ int n;
+ len = compile_length_tree(node->target, reg);
+ r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
+ len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
+ if (r) return r;
+ if (node->char_len < 0) {
+ r = get_char_length_tree(node->target, reg, &n);
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ }
+ else
+ n = node->char_len;
+ r = add_length(reg, n);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
+ }
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+compile_length_tree(Node* node, regex_t* reg)
+{
+ int len, type, r;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ len = 0;
+ do {
+ r = compile_length_tree(NCONS(node).left, reg);
+ if (r < 0) return r;
+ len += r;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ r = len;
+ break;
+
+ case N_ALT:
+ {
+ int n;
+
+ n = r = 0;
+ do {
+ r += compile_length_tree(NCONS(node).left, reg);
+ n++;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
+ }
+ break;
+
+ case N_STRING:
+ if (NSTRING_IS_RAW(node))
+ r = compile_length_string_raw_node(&(NSTRING(node)), reg);
+ else
+ r = compile_length_string_node(&(NSTRING(node)), reg);
+ break;
+
+ case N_CCLASS:
+ r = compile_length_cclass_node(&(NCCLASS(node)), reg);
+ break;
+
+ case N_CTYPE:
+ case N_ANYCHAR:
+ r = SIZE_OPCODE;
+ break;
+
+ case N_BACKREF:
+ {
+ BackrefNode* br = &(NBACKREF(node));
+
+ if (br->back_num == 1) {
+ r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3)
+ ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
+ }
+ else {
+ r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ r = SIZE_OP_CALL;
+ break;
+#endif
+
+ case N_QUALIFIER:
+ r = compile_length_qualifier_node(&(NQUALIFIER(node)), reg);
+ break;
+
+ case N_EFFECT:
+ r = compile_length_effect_node(&NEFFECT(node), reg);
+ break;
+
+ case N_ANCHOR:
+ r = compile_length_anchor_node(&(NANCHOR(node)), reg);
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+compile_tree(Node* node, regex_t* reg)
+{
+ int n, type, len, pos, r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ do {
+ r = compile_tree(NCONS(node).left, reg);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_ALT:
+ {
+ Node* x = node;
+ len = 0;
+ do {
+ len += compile_length_tree(NCONS(x).left, reg);
+ if (NCONS(x).right != NULL) {
+ len += SIZE_OP_PUSH + SIZE_OP_JUMP;
+ }
+ } while (IS_NOT_NULL(x = NCONS(x).right));
+ pos = reg->used + len; /* goal position */
+
+ do {
+ len = compile_length_tree(NCONS(node).left, reg);
+ if (IS_NOT_NULL(NCONS(node).right)) {
+ r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
+ if (r) break;
+ }
+ r = compile_tree(NCONS(node).left, reg);
+ if (r) break;
+ if (IS_NOT_NULL(NCONS(node).right)) {
+ len = pos - (reg->used + SIZE_OP_JUMP);
+ r = add_opcode_rel_addr(reg, OP_JUMP, len);
+ if (r) break;
+ }
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_STRING:
+ if (NSTRING_IS_RAW(node))
+ r = compile_string_raw_node(&(NSTRING(node)), reg);
+ else
+ r = compile_string_node(&(NSTRING(node)), reg);
+ break;
+
+ case N_CCLASS:
+ r = compile_cclass_node(&(NCCLASS(node)), reg);
+ break;
+
+ case N_CTYPE:
+ {
+ int op;
+
+ switch (NCTYPE(node).type) {
+ case CTYPE_WORD: op = OP_WORD; break;
+ case CTYPE_NOT_WORD: op = OP_NOT_WORD; break;
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+ r = add_opcode(reg, op);
+ }
+ break;
+
+ case N_ANYCHAR:
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML);
+ else
+ r = add_opcode(reg, OP_ANYCHAR);
+ break;
+
+ case N_BACKREF:
+ {
+ int i;
+ BackrefNode* br = &(NBACKREF(node));
+
+ if (br->back_num == 1) {
+ n = br->back_static[0];
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREFN_IC);
+ if (r) return r;
+ r = add_mem_num(reg, n);
+ }
+ else {
+ switch (n) {
+ case 1: r = add_opcode(reg, OP_BACKREF1); break;
+ case 2: r = add_opcode(reg, OP_BACKREF2); break;
+ case 3: r = add_opcode(reg, OP_BACKREF3); break;
+ default:
+ r = add_opcode(reg, OP_BACKREFN);
+ if (r) return r;
+ r = add_mem_num(reg, n);
+ break;
+ }
+ }
+ }
+ else {
+ int* p;
+ add_opcode(reg, (IS_IGNORECASE(reg->options) ?
+ OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI));
+ if (r) return r;
+ add_length(reg, br->back_num);
+ if (r) return r;
+ p = BACKREFS_P(br);
+ for (i = br->back_num - 1; i >= 0; i--) {
+ r = add_mem_num(reg, p[i]);
+ if (r) return r;
+ }
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ r = compile_call(&(NCALL(node)), reg);
+ break;
+#endif
+
+ case N_QUALIFIER:
+ r = compile_qualifier_node(&(NQUALIFIER(node)), reg);
+ break;
+
+ case N_EFFECT:
+ r = compile_effect_node(&NEFFECT(node), reg);
+ break;
+
+ case N_ANCHOR:
+ r = compile_anchor_node(&(NANCHOR(node)), reg);
+ break;
+
+ default:
+#ifdef ONIG_DEBUG
+ fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
+#endif
+ break;
+ }
+
+ return r;
+}
+
+#ifdef USE_NAMED_GROUP
+typedef struct {
+ int new_val;
+} NumMap;
+
+static int
+noname_disable_map(Node** plink, NumMap* map, int* counter)
+{
+ int r = 0;
+ Node* node = *plink;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = noname_disable_map(&(NCONS(node).left), map, counter);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_QUALIFIER:
+ {
+ Node** ptarget = &(NQUALIFIER(node).target);
+ Node* old = *ptarget;
+ r = noname_disable_map(ptarget, map, counter);
+ if (*ptarget != old && NTYPE(*ptarget) == N_QUALIFIER) {
+ onig_reduce_nested_qualifier(node, *ptarget);
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ if (en->type == EFFECT_MEMORY) {
+ if (IS_EFFECT_NAMED_GROUP(en)) {
+ (*counter)++;
+ map[en->regnum].new_val = *counter;
+ en->regnum = *counter;
+ r = noname_disable_map(&(en->target), map, counter);
+ }
+ else {
+ *plink = en->target;
+ en->target = NULL_NODE;
+ onig_node_free(node);
+ r = noname_disable_map(plink, map, counter);
+ }
+ }
+ else
+ r = noname_disable_map(&(en->target), map, counter);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+renumber_node_backref(Node* node, NumMap* map)
+{
+ int i, pos, n, old_num;
+ int *backs;
+ BackrefNode* bn = &(NBACKREF(node));
+
+ if (! IS_BACKREF_NAME_REF(bn))
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+
+ old_num = bn->back_num;
+ if (IS_NULL(bn->back_dynamic))
+ backs = bn->back_static;
+ else
+ backs = bn->back_dynamic;
+
+ for (i = 0, pos = 0; i < old_num; i++) {
+ n = map[backs[i]].new_val;
+ if (n > 0) {
+ backs[pos] = n;
+ pos++;
+ }
+ }
+
+ bn->back_num = pos;
+ return 0;
+}
+
+static int
+renumber_by_map(Node* node, NumMap* map)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = renumber_by_map(NCONS(node).left, map);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+ case N_QUALIFIER:
+ r = renumber_by_map(NQUALIFIER(node).target, map);
+ break;
+ case N_EFFECT:
+ r = renumber_by_map(NEFFECT(node).target, map);
+ break;
+
+ case N_BACKREF:
+ r = renumber_node_backref(node, map);
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+numbered_ref_check(Node* node)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = numbered_ref_check(NCONS(node).left);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+ case N_QUALIFIER:
+ r = numbered_ref_check(NQUALIFIER(node).target);
+ break;
+ case N_EFFECT:
+ r = numbered_ref_check(NEFFECT(node).target);
+ break;
+
+ case N_BACKREF:
+ if (! IS_BACKREF_NAME_REF(&(NBACKREF(node))))
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
+{
+ int r, i, pos, counter;
+ BitStatusType loc;
+ NumMap* map;
+
+ map = (NumMap* )xalloca(sizeof(NumMap) * (env->num_mem + 1));
+ CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
+ for (i = 1; i <= env->num_mem; i++) {
+ map[i].new_val = 0;
+ }
+ counter = 0;
+ r = noname_disable_map(root, map, &counter);
+ if (r != 0) return r;
+
+ r = renumber_by_map(*root, map);
+ if (r != 0) return r;
+
+ for (i = 1, pos = 1; i <= env->num_mem; i++) {
+ if (map[i].new_val > 0) {
+ SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
+ pos++;
+ }
+ }
+
+ loc = env->capture_history;
+ BIT_STATUS_CLEAR(env->capture_history);
+ for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (BIT_STATUS_AT(loc, i)) {
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
+ }
+ }
+
+ env->num_mem = env->num_named;
+ reg->num_mem = env->num_named;
+ return 0;
+}
+#endif /* USE_NAMED_GROUP */
+
+#ifdef USE_SUBEXP_CALL
+static int
+unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
+{
+ int i, offset;
+ EffectNode* en;
+ AbsAddrType addr;
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+ UChar buf[SERIALIZE_BUFSIZE];
+#endif
+
+ for (i = 0; i < uslist->num; i++) {
+ en = &(NEFFECT(uslist->us[i].target));
+ if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
+ addr = en->call_addr;
+ offset = uslist->us[i].offset;
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
+#else
+ SERIALIZE_ABSADDR(addr, buf);
+ BBUF_WRITE(reg, offset, buf, SIZE_ABSADDR);
+#endif
+ }
+ return 0;
+}
+#endif
+
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+static int
+qualifiers_memory_node_info(Node* node)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ {
+ int v;
+ do {
+ v = qualifiers_memory_node_info(NCONS(node).left);
+ if (v > r) r = v;
+ } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (IS_CALL_RECURSION(&NCALL(node))) {
+ return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
+ }
+ else
+ r = qualifiers_memory_node_info(NCALL(node).target);
+ break;
+#endif
+
+ case N_QUALIFIER:
+ {
+ QualifierNode* qn = &(NQUALIFIER(node));
+ if (qn->upper != 0) {
+ r = qualifiers_memory_node_info(qn->target);
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ switch (en->type) {
+ case EFFECT_MEMORY:
+ return NQ_TARGET_IS_EMPTY_MEM;
+ break;
+
+ case EFFECT_OPTION:
+ case EFFECT_STOP_BACKTRACK:
+ r = qualifiers_memory_node_info(en->target);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+
+ case N_BACKREF:
+ case N_STRING:
+ case N_CTYPE:
+ case N_CCLASS:
+ case N_ANYCHAR:
+ case N_ANCHOR:
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */
+
+static int
+get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
+{
+ OnigDistance tmin;
+ int r = 0;
+
+ *min = 0;
+ switch (NTYPE(node)) {
+ case N_BACKREF:
+ {
+ int i;
+ int* backs;
+ Node** nodes = SCANENV_MEM_NODES(env);
+ BackrefNode* br = &(NBACKREF(node));
+ if (br->state & NST_RECURSION) break;
+
+ backs = BACKREFS_P(br);
+ if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_min_match_length(nodes[backs[0]], min, env);
+ if (r != 0) break;
+ for (i = 1; i < br->back_num; i++) {
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_min_match_length(nodes[backs[i]], &tmin, env);
+ if (r != 0) break;
+ if (*min > tmin) *min = tmin;
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (IS_CALL_RECURSION(&NCALL(node))) {
+ EffectNode* en = &(NEFFECT(NCALL(node).target));
+ if (IS_EFFECT_MIN_FIXED(en))
+ *min = en->min_len;
+ }
+ else
+ r = get_min_match_length(NCALL(node).target, min, env);
+ break;
+#endif
+
+ case N_LIST:
+ do {
+ r = get_min_match_length(NCONS(node).left, &tmin, env);
+ if (r == 0) *min += tmin;
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_ALT:
+ {
+ Node *x, *y;
+ y = node;
+ do {
+ x = NCONS(y).left;
+ r = get_min_match_length(x, &tmin, env);
+ if (r != 0) break;
+ if (y == node) *min = tmin;
+ else if (*min > tmin) *min = tmin;
+ } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right));
+ }
+ break;
+
+ case N_STRING:
+ {
+ StrNode* sn = &(NSTRING(node));
+ *min = sn->end - sn->s;
+ }
+ break;
+
+ case N_CTYPE:
+ switch (NCTYPE(node).type) {
+ case CTYPE_WORD: *min = 1; break;
+ case CTYPE_NOT_WORD: *min = 1; break;
+ default:
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ case N_ANYCHAR:
+ *min = 1;
+ break;
+
+ case N_QUALIFIER:
+ {
+ QualifierNode* qn = &(NQUALIFIER(node));
+
+ if (qn->lower > 0) {
+ r = get_min_match_length(qn->target, min, env);
+ if (r == 0)
+ *min = distance_multiply(*min, qn->lower);
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ switch (en->type) {
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_MIN_FIXED(en))
+ *min = en->min_len;
+ else {
+ r = get_min_match_length(en->target, min, env);
+ if (r == 0) {
+ en->min_len = *min;
+ SET_EFFECT_STATUS(node, NST_MIN_FIXED);
+ }
+ }
+ break;
+#endif
+ case EFFECT_OPTION:
+ case EFFECT_STOP_BACKTRACK:
+ r = get_min_match_length(en->target, min, env);
+ break;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
+{
+ OnigDistance tmax;
+ int r = 0;
+
+ *max = 0;
+ switch (NTYPE(node)) {
+ case N_LIST:
+ do {
+ r = get_max_match_length(NCONS(node).left, &tmax, env);
+ if (r == 0)
+ *max = distance_add(*max, tmax);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_ALT:
+ do {
+ r = get_max_match_length(NCONS(node).left, &tmax, env);
+ if (r == 0 && *max < tmax) *max = tmax;
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_STRING:
+ {
+ StrNode* sn = &(NSTRING(node));
+ *max = sn->end - sn->s;
+ }
+ break;
+
+ case N_CTYPE:
+ switch (NCTYPE(node).type) {
+ case CTYPE_WORD:
+ case CTYPE_NOT_WORD:
+ *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ case N_ANYCHAR:
+ *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ break;
+
+ case N_BACKREF:
+ {
+ int i;
+ int* backs;
+ Node** nodes = SCANENV_MEM_NODES(env);
+ BackrefNode* br = &(NBACKREF(node));
+ if (br->state & NST_RECURSION) {
+ *max = ONIG_INFINITE_DISTANCE;
+ break;
+ }
+ backs = BACKREFS_P(br);
+ for (i = 0; i < br->back_num; i++) {
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_max_match_length(nodes[backs[i]], &tmax, env);
+ if (r != 0) break;
+ if (*max < tmax) *max = tmax;
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (! IS_CALL_RECURSION(&(NCALL(node))))
+ r = get_max_match_length(NCALL(node).target, max, env);
+ else
+ *max = ONIG_INFINITE_DISTANCE;
+ break;
+#endif
+
+ case N_QUALIFIER:
+ {
+ QualifierNode* qn = &(NQUALIFIER(node));
+
+ if (qn->upper != 0) {
+ r = get_max_match_length(qn->target, max, env);
+ if (r == 0 && *max != 0) {
+ if (! IS_REPEAT_INFINITE(qn->upper))
+ *max = distance_multiply(*max, qn->upper);
+ else
+ *max = ONIG_INFINITE_DISTANCE;
+ }
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ switch (en->type) {
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_MAX_FIXED(en))
+ *max = en->max_len;
+ else {
+ r = get_max_match_length(en->target, max, env);
+ if (r == 0) {
+ en->max_len = *max;
+ SET_EFFECT_STATUS(node, NST_MAX_FIXED);
+ }
+ }
+ break;
+#endif
+ case EFFECT_OPTION:
+ case EFFECT_STOP_BACKTRACK:
+ r = get_max_match_length(en->target, max, env);
+ break;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ default:
+ break;
+ }
+
+ return r;
+}
+
+#define GET_CHAR_LEN_VARLEN -1
+#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
+
+/* fixed size pattern node only */
+static int
+get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
+{
+ int tlen;
+ int r = 0;
+
+ level++;
+ *len = 0;
+ switch (NTYPE(node)) {
+ case N_LIST:
+ do {
+ r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
+ if (r == 0)
+ *len = distance_add(*len, tlen);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_ALT:
+ {
+ int tlen2;
+ int varlen = 0;
+
+ r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
+ while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) {
+ r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level);
+ if (r == 0) {
+ if (tlen != tlen2)
+ varlen = 1;
+ }
+ }
+ if (r == 0) {
+ if (varlen != 0) {
+ if (level == 1)
+ r = GET_CHAR_LEN_TOP_ALT_VARLEN;
+ else
+ r = GET_CHAR_LEN_VARLEN;
+ }
+ else
+ *len = tlen;
+ }
+ }
+ break;
+
+ case N_STRING:
+ {
+ StrNode* sn = &(NSTRING(node));
+ UChar *s = sn->s;
+ while (s < sn->end) {
+ s += enc_len(reg->enc, *s);
+ (*len)++;
+ }
+ }
+ break;
+
+ case N_QUALIFIER:
+ {
+ QualifierNode* qn = &(NQUALIFIER(node));
+ if (qn->lower == qn->upper) {
+ r = get_char_length_tree1(qn->target, reg, &tlen, level);
+ if (r == 0)
+ *len = distance_multiply(tlen, qn->lower);
+ }
+ else
+ r = GET_CHAR_LEN_VARLEN;
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (! IS_CALL_RECURSION(&(NCALL(node))))
+ r = get_char_length_tree1(NCALL(node).target, reg, len, level);
+ else
+ r = GET_CHAR_LEN_VARLEN;
+ break;
+#endif
+
+ case N_CTYPE:
+ switch (NCTYPE(node).type) {
+ case CTYPE_WORD:
+ case CTYPE_NOT_WORD:
+ *len = 1;
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ case N_ANYCHAR:
+ *len = 1;
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ switch (en->type) {
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_CLEN_FIXED(en))
+ *len = en->char_len;
+ else {
+ r = get_char_length_tree1(en->target, reg, len, level);
+ if (r == 0) {
+ en->char_len = *len;
+ SET_EFFECT_STATUS(node, NST_CLEN_FIXED);
+ }
+ }
+ break;
+#endif
+ case EFFECT_OPTION:
+ case EFFECT_STOP_BACKTRACK:
+ r = get_char_length_tree1(en->target, reg, len, level);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ break;
+
+ default:
+ r = GET_CHAR_LEN_VARLEN;
+ break;
+ }
+
+ return r;
+}
+
+static int
+get_char_length_tree(Node* node, regex_t* reg, int* len)
+{
+ return get_char_length_tree1(node, reg, len, 0);
+}
+
+extern int
+onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
+{
+ int found;
+
+ if (code >= SINGLE_BYTE_SIZE) {
+ if (IS_NULL(cc->mbuf)) {
+ found = 0;
+ }
+ else {
+ found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
+ }
+ }
+ else {
+ found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
+ }
+
+ if (cc->not == 0)
+ return found;
+ else
+ return !found;
+}
+
+/* x is not included y ==> 1 : 0 */
+static int
+is_not_included(Node* x, Node* y, regex_t* reg)
+{
+ int i, len;
+ OnigCodePoint code;
+ UChar *p, c;
+ int ytype;
+
+ retry:
+ ytype = NTYPE(y);
+ switch (NTYPE(x)) {
+ case N_CTYPE:
+ {
+ switch (ytype) {
+ case N_CTYPE:
+ switch (NCTYPE(x).type) {
+ case CTYPE_WORD:
+ if (NCTYPE(y).type == CTYPE_NOT_WORD)
+ return 1;
+ else
+ return 0;
+ break;
+ case CTYPE_NOT_WORD:
+ if (NCTYPE(y).type == CTYPE_WORD)
+ return 1;
+ else
+ return 0;
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ swap:
+ {
+ Node* tmp;
+ tmp = x; x = y; y = tmp;
+ goto retry;
+ }
+ break;
+
+ case N_STRING:
+ goto swap;
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ case N_CCLASS:
+ {
+ CClassNode* xc = &(NCCLASS(x));
+ switch (ytype) {
+ case N_CTYPE:
+ switch (NCTYPE(y).type) {
+ case CTYPE_WORD:
+ if (IS_NULL(xc->mbuf) && xc->not == 0) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(xc->bs, i)) {
+ if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
+ }
+ }
+ return 1;
+ }
+ return 0;
+ break;
+ case CTYPE_NOT_WORD:
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
+ if (xc->not == 0) {
+ if (BITSET_AT(xc->bs, i))
+ return 0;
+ }
+ else {
+ if (! BITSET_AT(xc->bs, i))
+ return 0;
+ }
+ }
+ }
+ return 1;
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ {
+ int v;
+ CClassNode* yc = &(NCCLASS(y));
+
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ v = BITSET_AT(xc->bs, i);
+ if ((v != 0 && xc->not == 0) || (v == 0 && xc->not)) {
+ v = BITSET_AT(yc->bs, i);
+ if ((v != 0 && yc->not == 0) || (v == 0 && yc->not))
+ return 0;
+ }
+ }
+ if ((IS_NULL(xc->mbuf) && xc->not == 0) ||
+ (IS_NULL(yc->mbuf) && yc->not == 0))
+ return 1;
+ return 0;
+ }
+ break;
+
+ case N_STRING:
+ goto swap;
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ case N_STRING:
+ {
+ StrNode* xs = &(NSTRING(x));
+ if (NSTRING_LEN(x) == 0)
+ break;
+
+ c = *(xs->s);
+ switch (ytype) {
+ case N_CTYPE:
+ switch (NCTYPE(y).type) {
+ case CTYPE_WORD:
+ return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1);
+ break;
+ case CTYPE_NOT_WORD:
+ return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ {
+ CClassNode* cc = &(NCCLASS(y));
+
+ code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
+ xs->s + enc_len(reg->enc, c));
+ return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
+ }
+ break;
+
+ case N_STRING:
+ {
+ UChar *q;
+ StrNode* ys = &(NSTRING(y));
+ len = NSTRING_LEN(x);
+ if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
+ if (NSTRING_IS_CASE_AMBIG(x) || NSTRING_IS_CASE_AMBIG(y)) {
+ UChar plow[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar qlow[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ int plen, qlen;
+ for (p = ys->s, q = xs->s; q < xs->end; ) {
+ plen = ONIGENC_MBC_TO_LOWER(reg->enc, p, plow);
+ qlen = ONIGENC_MBC_TO_LOWER(reg->enc, q, qlow);
+ if (plen != qlen || onig_strncmp(plow, qlow, plen) != 0)
+ return 1;
+ p += enc_len(reg->enc, *p);
+ q += enc_len(reg->enc, *q);
+ }
+ }
+ else {
+ for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
+ if (*p != *q) return 1;
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static Node*
+get_head_value_node(Node* node, int exact, regex_t* reg)
+{
+ Node* n = NULL_NODE;
+
+ switch (NTYPE(node)) {
+ case N_BACKREF:
+ case N_ALT:
+ case N_ANYCHAR:
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+#endif
+ break;
+
+ case N_CTYPE:
+ case N_CCLASS:
+ if (exact == 0) {
+ n = node;
+ }
+ break;
+
+ case N_LIST:
+ n = get_head_value_node(NCONS(node).left, exact, reg);
+ break;
+
+ case N_STRING:
+ {
+ StrNode* sn = &(NSTRING(node));
+
+ if (sn->end <= sn->s)
+ break;
+
+ if (exact != 0 &&
+ !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
+ if (! ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, sn->s))
+ n = node;
+ }
+ else {
+ n = node;
+ }
+ }
+ break;
+
+ case N_QUALIFIER:
+ {
+ QualifierNode* qn = &(NQUALIFIER(node));
+ if (qn->lower > 0) {
+ if (IS_NOT_NULL(qn->head_exact))
+ n = qn->head_exact;
+ else
+ n = get_head_value_node(qn->target, exact, reg);
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ switch (en->type) {
+ case EFFECT_OPTION:
+ {
+ OnigOptionType options = reg->options;
+
+ reg->options = NEFFECT(node).option;
+ n = get_head_value_node(NEFFECT(node).target, exact, reg);
+ reg->options = options;
+ }
+ break;
+
+ case EFFECT_MEMORY:
+ case EFFECT_STOP_BACKTRACK:
+ n = get_head_value_node(en->target, exact, reg);
+ break;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ if (NANCHOR(node).type == ANCHOR_PREC_READ)
+ n = get_head_value_node(NANCHOR(node).target, exact, reg);
+ break;
+
+ default:
+ break;
+ }
+
+ return n;
+}
+
+static int
+check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask)
+{
+ int type, r = 0;
+
+ type = NTYPE(node);
+ if ((type & type_mask) == 0)
+ return 1;
+
+ switch (type) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_QUALIFIER:
+ r = check_type_tree(NQUALIFIER(node).target, type_mask, effect_mask,
+ anchor_mask);
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ if ((en->type & effect_mask) == 0)
+ return 1;
+
+ r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask);
+ }
+ break;
+
+ case N_ANCHOR:
+ type = NANCHOR(node).type;
+ if ((type & anchor_mask) == 0)
+ return 1;
+
+ if (NANCHOR(node).target)
+ r = check_type_tree(NANCHOR(node).target,
+ type_mask, effect_mask, anchor_mask);
+ break;
+
+ default:
+ break;
+ }
+ return r;
+}
+
+#ifdef USE_SUBEXP_CALL
+
+#define RECURSION_EXIST 1
+#define RECURSION_INFINITE 2
+
+static int
+subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ {
+ Node *x;
+ OnigDistance min;
+ int ret;
+
+ x = node;
+ do {
+ ret = subexp_inf_recursive_check(NCONS(x).left, env, head);
+ if (ret < 0 || ret == RECURSION_INFINITE) return ret;
+ r |= ret;
+ if (head) {
+ ret = get_min_match_length(NCONS(x).left, &min, env);
+ if (ret != 0) return ret;
+ if (min != 0) head = 0;
+ }
+ } while (IS_NOT_NULL(x = NCONS(x).right));
+ }
+ break;
+
+ case N_ALT:
+ {
+ int ret;
+ r = RECURSION_EXIST;
+ do {
+ ret = subexp_inf_recursive_check(NCONS(node).left, env, head);
+ if (ret < 0 || ret == RECURSION_INFINITE) return ret;
+ r &= ret;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_QUALIFIER:
+ r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head);
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_inf_recursive_check(an->target, env, head);
+ break;
+ }
+ }
+ break;
+
+ case N_CALL:
+ r = subexp_inf_recursive_check(NCALL(node).target, env, head);
+ break;
+
+ case N_EFFECT:
+ if (IS_EFFECT_MARK2(&(NEFFECT(node))))
+ return 0;
+ else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
+ return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
+ else {
+ SET_EFFECT_STATUS(node, NST_MARK2);
+ r = subexp_inf_recursive_check(NEFFECT(node).target, env, head);
+ CLEAR_EFFECT_STATUS(node, NST_MARK2);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = subexp_inf_recursive_check_trav(NCONS(node).left, env);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_QUALIFIER:
+ r = subexp_inf_recursive_check_trav(NQUALIFIER(node).target, env);
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_inf_recursive_check_trav(an->target, env);
+ break;
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ if (IS_EFFECT_RECURSION(en)) {
+ SET_EFFECT_STATUS(node, NST_MARK1);
+ r = subexp_inf_recursive_check(en->target, env, 1);
+ if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
+ CLEAR_EFFECT_STATUS(node, NST_MARK1);
+ }
+ r = subexp_inf_recursive_check_trav(en->target, env);
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+subexp_recursive_check(Node* node)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r |= subexp_recursive_check(NCONS(node).left);
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_QUALIFIER:
+ r = subexp_recursive_check(NQUALIFIER(node).target);
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_recursive_check(an->target);
+ break;
+ }
+ }
+ break;
+
+ case N_CALL:
+ r = subexp_recursive_check(NCALL(node).target);
+ if (r != 0) SET_CALL_RECURSION(node);
+ break;
+
+ case N_EFFECT:
+ if (IS_EFFECT_MARK2(&(NEFFECT(node))))
+ return 0;
+ else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
+ return 1; /* recursion */
+ else {
+ SET_EFFECT_STATUS(node, NST_MARK2);
+ r = subexp_recursive_check(NEFFECT(node).target);
+ CLEAR_EFFECT_STATUS(node, NST_MARK2);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+
+static int
+subexp_recursive_check_trav(Node* node, ScanEnv* env)
+{
+#define FOUND_CALLED_NODE 1
+
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ case N_ALT:
+ {
+ int ret;
+ do {
+ ret = subexp_recursive_check_trav(NCONS(node).left, env);
+ if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
+ else if (ret < 0) return ret;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_QUALIFIER:
+ r = subexp_recursive_check_trav(NQUALIFIER(node).target, env);
+ if (NQUALIFIER(node).upper == 0) {
+ if (r == FOUND_CALLED_NODE)
+ NQUALIFIER(node).is_refered = 1;
+ }
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_recursive_check_trav(an->target, env);
+ break;
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ if (! IS_EFFECT_RECURSION(en)) {
+ if (IS_EFFECT_CALLED(en)) {
+ SET_EFFECT_STATUS(node, NST_MARK1);
+ r = subexp_recursive_check(en->target);
+ if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION);
+ CLEAR_EFFECT_STATUS(node, NST_MARK1);
+ }
+ }
+ r = subexp_recursive_check_trav(en->target, env);
+ if (IS_EFFECT_CALLED(en))
+ r |= FOUND_CALLED_NODE;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+setup_subexp_call(Node* node, ScanEnv* env)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ do {
+ r = setup_subexp_call(NCONS(node).left, env);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_ALT:
+ do {
+ r = setup_subexp_call(NCONS(node).left, env);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_QUALIFIER:
+ r = setup_subexp_call(NQUALIFIER(node).target, env);
+ break;
+ case N_EFFECT:
+ r = setup_subexp_call(NEFFECT(node).target, env);
+ break;
+
+ case N_CALL:
+ {
+ int n, num, *refs;
+ UChar *p;
+ CallNode* cn = &(NCALL(node));
+ Node** nodes = SCANENV_MEM_NODES(env);
+
+#ifdef USE_NAMED_GROUP
+ n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
+#else
+ n = -1;
+#endif
+ if (n <= 0) {
+ /* name not found, check group number. (?*ddd) */
+ p = cn->name;
+ num = onig_scan_unsigned_number(&p, cn->name_end, env->enc);
+ if (num <= 0 || p != cn->name_end) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+#ifdef USE_NAMED_GROUP
+ if (env->num_named > 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ }
+#endif
+ if (num > env->num_mem) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_GROUP_REFERENCE;
+ }
+ cn->ref_num = num;
+ goto set_call_attr;
+ }
+ else if (n > 1) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
+ return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
+ }
+ else {
+ cn->ref_num = refs[0];
+ set_call_attr:
+ cn->target = nodes[cn->ref_num];
+ if (IS_NULL(cn->target)) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ SET_EFFECT_STATUS(cn->target, NST_CALLED);
+ BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num);
+ cn->unset_addr_list = env->unset_addr_list;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = setup_subexp_call(an->target, env);
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif
+
+/* divide different length alternatives in look-behind.
+ (?<=A|B) ==> (?<=A)|(?<=B)
+ (?<!A|B) ==> (?<!A)(?<!B)
+*/
+static int
+divide_look_behind_alternatives(Node* node)
+{
+ Node tmp_node;
+ Node *head, *np, *insert_node;
+ AnchorNode* an = &(NANCHOR(node));
+ int anc_type = an->type;
+
+ head = an->target;
+ np = NCONS(head).left;
+ tmp_node = *node; *node = *head; *head = tmp_node;
+ NCONS(node).left = head;
+ NANCHOR(head).target = np;
+
+ np = node;
+ while ((np = NCONS(np).right) != NULL_NODE) {
+ insert_node = onig_node_new_anchor(anc_type);
+ CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY);
+ NANCHOR(insert_node).target = NCONS(np).left;
+ NCONS(np).left = insert_node;
+ }
+
+ if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
+ np = node;
+ do {
+ np->type = N_LIST; /* alt -> list */
+ } while ((np = NCONS(np).right) != NULL_NODE);
+ }
+ return 0;
+}
+
+static int
+setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
+{
+ int r, len;
+ AnchorNode* an = &(NANCHOR(node));
+
+ r = get_char_length_tree(an->target, reg, &len);
+ if (r == 0)
+ an->char_len = len;
+ else if (r == GET_CHAR_LEN_VARLEN)
+ r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
+ r = divide_look_behind_alternatives(node);
+ else
+ r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ }
+
+ return r;
+}
+
+static int
+next_setup(Node* node, Node* next_node, regex_t* reg)
+{
+ int type;
+
+ retry:
+ type = NTYPE(node);
+ if (type == N_QUALIFIER) {
+ QualifierNode* qn = &(NQUALIFIER(node));
+ if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
+#ifdef USE_QUALIFIER_PEEK_NEXT
+ qn->next_head_exact = get_head_value_node(next_node, 1, reg);
+#endif
+ /* automatic posseivation a*b ==> (?>a*)b */
+ if (qn->lower <= 1) {
+ int ttype = NTYPE(qn->target);
+ if (IS_NODE_TYPE_SIMPLE(ttype)) {
+ Node *x, *y;
+ x = get_head_value_node(qn->target, 0, reg);
+ if (IS_NOT_NULL(x)) {
+ y = get_head_value_node(next_node, 0, reg);
+ if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
+ Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK);
+ CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
+ SET_EFFECT_STATUS(en, NST_SIMPLE_REPEAT);
+ swap_node(node, en);
+ NEFFECT(node).target = en;
+ }
+ }
+ }
+ }
+ }
+ }
+ else if (type == N_EFFECT) {
+ EffectNode* en = &(NEFFECT(node));
+ if (en->type == EFFECT_MEMORY) {
+ node = en->target;
+ goto retry;
+ }
+ }
+ return 0;
+}
+
+#define IN_ALT (1<<0)
+#define IN_NOT (1<<1)
+#define IN_REPEAT (1<<2)
+
+/* setup_tree does the following work.
+ 1. check empty loop. (set qn->target_empty_info)
+ 2. expand ignore-case in char class.
+ 3. set memory status bit flags. (reg->mem_stats)
+ 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
+ 5. find invalid patterns in look-behind.
+ 6. expand repeated string.
+ */
+static int
+setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ {
+ Node* prev = NULL_NODE;
+ do {
+ r = setup_tree(NCONS(node).left, reg, state, env);
+ if (IS_NOT_NULL(prev) && r == 0) {
+ r = next_setup(prev, NCONS(node).left, reg);
+ }
+ prev = NCONS(node).left;
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_ALT:
+ do {
+ r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_CCLASS:
+ if (IS_IGNORECASE(reg->options)) {
+ int i;
+ UChar c, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ BitSetRef bs = NCCLASS(node).bs;
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ c = (UChar )i;
+ ONIGENC_MBC_TO_LOWER(reg->enc, &c, lowbuf);
+ if (*lowbuf != c) {
+ if (BITSET_AT(bs, c)) BITSET_SET_BIT(bs, *lowbuf);
+ if (BITSET_AT(bs, *lowbuf)) BITSET_SET_BIT(bs, c);
+ }
+ }
+ }
+ break;
+
+ case N_STRING:
+ if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
+ StrNode* sn = &NSTRING(node);
+ UChar* p = sn->s;
+
+ while (p < sn->end) {
+ if (ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p)) {
+ NSTRING_SET_CASE_AMBIG(node);
+ break;
+ }
+ p++;
+ }
+ }
+ break;
+
+ case N_CTYPE:
+ case N_ANYCHAR:
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ break;
+#endif
+
+ case N_BACKREF:
+ {
+ int i;
+ int* p;
+ Node** nodes = SCANENV_MEM_NODES(env);
+ BackrefNode* br = &(NBACKREF(node));
+ p = BACKREFS_P(br);
+ for (i = 0; i < br->back_num; i++) {
+ if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
+ BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
+ SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
+ }
+ }
+ break;
+
+ case N_QUALIFIER:
+ {
+ OnigDistance d;
+ QualifierNode* qn = &(NQUALIFIER(node));
+ Node* target = qn->target;
+
+ if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
+ r = get_min_match_length(target, &d, env);
+ if (r) break;
+ if (d == 0) {
+ qn->target_empty_info = NQ_TARGET_IS_EMPTY;
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ r = qualifiers_memory_node_info(target);
+ if (r < 0) break;
+ if (r > 0) {
+ qn->target_empty_info = r;
+ }
+#endif
+#if 0
+ r = get_max_match_length(target, &d, env);
+ if (r == 0 && d == 0) {
+ /* ()* ==> ()?, ()+ ==> () */
+ qn->upper = 1;
+ if (qn->lower > 1) qn->lower = 1;
+ if (NTYPE(target) == N_STRING) {
+ qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
+ }
+ }
+#endif
+ }
+ }
+
+ if (qn->lower != qn->upper)
+ state |= IN_REPEAT;
+ r = setup_tree(target, reg, state, env);
+ if (r) break;
+
+ /* expand string */
+#define EXPAND_STRING_MAX_LENGTH 100
+ if (NTYPE(target) == N_STRING) {
+ if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
+ qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
+ int len = NSTRING_LEN(target);
+ StrNode* sn = &(NSTRING(target));
+
+ if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
+ int i, n = qn->lower;
+ onig_node_conv_to_str_node(node, NSTRING(target).flag);
+ for (i = 0; i < n; i++) {
+ r = onig_node_str_cat(node, sn->s, sn->end);
+ if (r) break;
+ }
+ onig_node_free(target);
+ break; /* break case N_QUALIFIER: */
+ }
+ }
+ }
+
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
+ if (qn->greedy && (qn->target_empty_info != 0)) {
+ if (NTYPE(target) == N_QUALIFIER) {
+ QualifierNode* tqn = &(NQUALIFIER(target));
+ if (IS_NOT_NULL(tqn->head_exact)) {
+ qn->head_exact = tqn->head_exact;
+ tqn->head_exact = NULL;
+ }
+ }
+ else {
+ qn->head_exact = get_head_value_node(qn->target, 1, reg);
+ }
+ }
+#endif
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ switch (en->type) {
+ case EFFECT_OPTION:
+ {
+ OnigOptionType options = reg->options;
+ reg->options = NEFFECT(node).option;
+ r = setup_tree(NEFFECT(node).target, reg, state, env);
+ reg->options = options;
+ }
+ break;
+
+ case EFFECT_MEMORY:
+ if ((state & (IN_ALT | IN_NOT | IN_REPEAT)) != 0) {
+ BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
+ /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */
+ }
+ /* fall */
+ case EFFECT_STOP_BACKTRACK:
+ {
+ Node* target = en->target;
+ r = setup_tree(target, reg, state, env);
+ if (NTYPE(target) == N_QUALIFIER) {
+ QualifierNode* tqn = &(NQUALIFIER(target));
+ if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
+ tqn->greedy != 0) { /* (?>a*), a*+ etc... */
+ int qtype = NTYPE(tqn->target);
+ if (IS_NODE_TYPE_SIMPLE(qtype))
+ SET_EFFECT_STATUS(node, NST_SIMPLE_REPEAT);
+ }
+ }
+ }
+ break;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ r = setup_tree(an->target, reg, state, env);
+ break;
+ case ANCHOR_PREC_READ_NOT:
+ r = setup_tree(an->target, reg, (state | IN_NOT), env);
+ break;
+
+/* allowed node types in look-behind */
+#define ALLOWED_TYPE_IN_LB \
+ ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \
+ N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUALIFIER | N_CALL )
+
+#define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY )
+#define ALLOWED_EFFECT_IN_LB_NOT 0
+
+#define ALLOWED_ANCHOR_IN_LB \
+( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF )
+#define ALLOWED_ANCHOR_IN_LB_NOT \
+( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF )
+ /* can't allow all anchors, because \G in look-behind through Search().
+ ex. /(?<=\G)zz/.match("azz") => success. */
+
+ case ANCHOR_LOOK_BEHIND:
+ {
+ r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
+ ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = setup_look_behind(node, reg, env);
+ if (r != 0) return r;
+ r = setup_tree(an->target, reg, state, env);
+ }
+ break;
+
+ case ANCHOR_LOOK_BEHIND_NOT:
+ {
+ r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
+ ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = setup_look_behind(node, reg, env);
+ if (r != 0) return r;
+ r = setup_tree(an->target, reg, (state | IN_NOT), env);
+ }
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+/* set skip map for Boyer-Moor search */
+static int
+set_bm_skip(UChar* s, UChar* end, OnigEncoding enc, int ignore_case,
+ UChar skip[], int** int_skip)
+{
+ int i, len;
+ UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ len = end - s;
+ if (len < ONIG_CHAR_TABLE_SIZE) {
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
+
+ if (ignore_case) {
+ for (i = 0; i < len - 1; i++) {
+ ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
+ skip[*lowbuf] = len - 1 - i;
+ }
+ }
+ else {
+ for (i = 0; i < len - 1; i++)
+ skip[s[i]] = len - 1 - i;
+ }
+ }
+ else {
+ if (IS_NULL(*int_skip)) {
+ *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
+ if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
+ }
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
+
+ if (ignore_case) {
+ for (i = 0; i < len - 1; i++) {
+ ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
+ (*int_skip)[*lowbuf] = len - 1 - i;
+ }
+ }
+ else {
+ for (i = 0; i < len - 1; i++)
+ (*int_skip)[s[i]] = len - 1 - i;
+ }
+ }
+ return 0;
+}
+
+#define OPT_EXACT_MAXLEN 24
+
+typedef struct {
+ OnigDistance min; /* min byte length */
+ OnigDistance max; /* max byte length */
+} MinMaxLen;
+
+typedef struct {
+ MinMaxLen mmd;
+ BitStatusType backrefed_status;
+ OnigEncoding enc;
+ OnigOptionType options;
+ ScanEnv* scan_env;
+} OptEnv;
+
+typedef struct {
+ int left_anchor;
+ int right_anchor;
+} OptAncInfo;
+
+typedef struct {
+ MinMaxLen mmd; /* info position */
+ OptAncInfo anc;
+
+ int reach_end;
+ int ignore_case;
+ int len;
+ UChar s[OPT_EXACT_MAXLEN];
+} OptExactInfo;
+
+typedef struct {
+ MinMaxLen mmd; /* info position */
+ OptAncInfo anc;
+
+ int value; /* weighted value */
+ UChar map[ONIG_CHAR_TABLE_SIZE];
+} OptMapInfo;
+
+typedef struct {
+ MinMaxLen len;
+
+ OptAncInfo anc;
+ OptExactInfo exb; /* boundary */
+ OptExactInfo exm; /* middle */
+ OptExactInfo expr; /* prec read (?=...) */
+
+ OptMapInfo map; /* boundary */
+} NodeOptInfo;
+
+
+static int
+map_position_value(int i)
+{
+ static int vals[] = {
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 1, 1, 10, 10, 1, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 1, 6, 3, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5,
+ 5, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 5, 5, 5,
+ 5, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 10,
+ };
+
+ if (i < sizeof(vals)/sizeof(vals[0])) return vals[i];
+
+ return 7; /* Take it easy. */
+}
+
+static int
+distance_value(MinMaxLen* mm)
+{
+ /* 1000 / (min-max-dist + 1) */
+ static int dist_vals[] = {
+ 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
+ 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
+ 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
+ 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
+ 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
+ 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
+ 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
+ 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
+ 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
+ };
+
+ int d;
+
+ if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
+
+ d = mm->max - mm->min;
+ if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
+ /* return dist_vals[d] * 16 / (mm->min + 12); */
+ return dist_vals[d];
+ else
+ return 1;
+}
+
+static int
+comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
+{
+ if (v2 <= 0) return -1;
+ if (v1 <= 0) return 1;
+
+ v1 *= distance_value(d1);
+ v2 *= distance_value(d2);
+
+ if (v2 > v1) return 1;
+ if (v2 < v1) return -1;
+
+ if (d2->min < d1->min) return 1;
+ if (d2->min > d1->min) return -1;
+ return 0;
+}
+
+static int
+is_equal_mml(MinMaxLen* a, MinMaxLen* b)
+{
+ return (a->min == b->min && a->max == b->max) ? 1 : 0;
+}
+
+
+static void
+set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
+{
+ mml->min = min;
+ mml->max = max;
+}
+
+static void
+clear_mml(MinMaxLen* mml)
+{
+ mml->min = mml->max = 0;
+}
+
+static void
+copy_mml(MinMaxLen* to, MinMaxLen* from)
+{
+ to->min = from->min;
+ to->max = from->max;
+}
+
+static void
+add_mml(MinMaxLen* to, MinMaxLen* from)
+{
+ to->min = distance_add(to->min, from->min);
+ to->max = distance_add(to->max, from->max);
+}
+
+static void
+add_len_mml(MinMaxLen* to, OnigDistance len)
+{
+ to->min = distance_add(to->min, len);
+ to->max = distance_add(to->max, len);
+}
+
+static void
+alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
+{
+ if (to->min > from->min) to->min = from->min;
+ if (to->max < from->max) to->max = from->max;
+}
+
+static void
+copy_opt_env(OptEnv* to, OptEnv* from)
+{
+ *to = *from;
+}
+
+static void
+clear_opt_anc_info(OptAncInfo* anc)
+{
+ anc->left_anchor = 0;
+ anc->right_anchor = 0;
+}
+
+static void
+copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
+{
+ *to = *from;
+}
+
+static void
+concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
+ OnigDistance left_len, OnigDistance right_len)
+{
+ clear_opt_anc_info(to);
+
+ to->left_anchor = left->left_anchor;
+ if (left_len == 0) {
+ to->left_anchor |= right->left_anchor;
+ }
+
+ to->right_anchor = right->right_anchor;
+ if (right_len == 0) {
+ to->right_anchor |= left->right_anchor;
+ }
+}
+
+static int
+is_left_anchor(int anc)
+{
+ if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
+ anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
+ anc == ANCHOR_PREC_READ_NOT)
+ return 0;
+
+ return 1;
+}
+
+static int
+is_set_opt_anc_info(OptAncInfo* to, int anc)
+{
+ if ((to->left_anchor & anc) != 0) return 1;
+
+ return ((to->right_anchor & anc) != 0 ? 1 : 0);
+}
+
+static void
+add_opt_anc_info(OptAncInfo* to, int anc)
+{
+ if (is_left_anchor(anc))
+ to->left_anchor |= anc;
+ else
+ to->right_anchor |= anc;
+}
+
+static void
+remove_opt_anc_info(OptAncInfo* to, int anc)
+{
+ if (is_left_anchor(anc))
+ to->left_anchor &= ~anc;
+ else
+ to->right_anchor &= ~anc;
+}
+
+static void
+alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
+{
+ to->left_anchor &= add->left_anchor;
+ to->right_anchor &= add->right_anchor;
+}
+
+static int
+is_full_opt_exact_info(OptExactInfo* ex)
+{
+ return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
+}
+
+static void
+clear_opt_exact_info(OptExactInfo* ex)
+{
+ clear_mml(&ex->mmd);
+ clear_opt_anc_info(&ex->anc);
+ ex->reach_end = 0;
+ ex->ignore_case = 0;
+ ex->len = 0;
+ ex->s[0] = '\0';
+}
+
+static void
+copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
+{
+ *to = *from;
+}
+
+static void
+concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
+{
+ int i, n;
+ OptAncInfo tanc;
+
+ if (! to->ignore_case && add->ignore_case) {
+ if (to->len >= add->len) return ; /* avoid */
+
+ to->ignore_case = 1;
+ }
+
+ for (i = to->len, n = 0; n < add->len && i < OPT_EXACT_MAXLEN; i++, n++)
+ to->s[i] = add->s[n];
+
+ to->len = i;
+ to->reach_end = (n == add->len ? add->reach_end : 0);
+
+ concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
+ if (! to->reach_end) tanc.right_anchor = 0;
+ copy_opt_anc_info(&to->anc, &tanc);
+}
+
+static void
+concat_opt_exact_info_str(OptExactInfo* to,
+ UChar* s, UChar* end, int raw, OnigEncoding enc)
+{
+ int i, j, len;
+ UChar *p;
+
+ for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
+ if (raw) {
+ to->s[i++] = *p++;
+ }
+ else {
+ len = enc_len(enc, *p);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len; j++)
+ to->s[i++] = *p++;
+ }
+ }
+
+ to->len = i;
+}
+
+static void
+alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
+{
+ int i, j, len;
+
+ if (add->len == 0 || to->len == 0) {
+ clear_opt_exact_info(to);
+ return ;
+ }
+
+ if (! is_equal_mml(&to->mmd, &add->mmd)) {
+ clear_opt_exact_info(to);
+ return ;
+ }
+
+ for (i = 0; i < to->len && i < add->len; ) {
+ if (to->s[i] != add->s[i]) break;
+ len = enc_len(env->enc, to->s[i]);
+
+ for (j = 1; j < len; j++) {
+ if (to->s[i+j] != add->s[i+j]) break;
+ }
+ if (j < len) break;
+ i += len;
+ }
+
+ if (! add->reach_end || i < add->len || i < to->len) {
+ to->reach_end = 0;
+ }
+ to->len = i;
+ to->ignore_case |= add->ignore_case;
+
+ alt_merge_opt_anc_info(&to->anc, &add->anc);
+ if (! to->reach_end) to->anc.right_anchor = 0;
+}
+
+static void
+select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
+{
+ int vlen1, vlen2;
+
+ vlen1 = now->len * (now->ignore_case ? 1 : 2);
+ vlen2 = alt->len * (alt->ignore_case ? 1 : 2);
+
+ if (comp_distance_value(&now->mmd, &alt->mmd, vlen1, vlen2) > 0)
+ copy_opt_exact_info(now, alt);
+}
+
+static void
+clear_opt_map_info(OptMapInfo* map)
+{
+ int i;
+
+ clear_mml(&map->mmd);
+ clear_opt_anc_info(&map->anc);
+ map->value = 0;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ map->map[i] = 0;
+}
+
+static void
+copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
+{
+ *to = *from;
+}
+
+static void
+add_char_opt_map_info(OptMapInfo* map, int c)
+{
+ if (map->map[c] == 0) {
+ map->map[c] = 1;
+ map->value += map_position_value(c);
+ }
+}
+
+static void
+add_char_amb_opt_map_info(OptMapInfo* map, int c, OnigEncoding enc)
+{
+ UChar x, low[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ add_char_opt_map_info(map, c);
+
+ x = (UChar )c;
+ ONIGENC_MBC_TO_LOWER(enc, &x, low);
+ if (*low != x) {
+ add_char_opt_map_info(map, (int )(*low));
+ }
+ else {
+ int i;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
+ x = (UChar )i;
+ ONIGENC_MBC_TO_LOWER(enc, &x, low);
+ if ((int )(*low) == c) add_char_opt_map_info(map, i);
+ }
+ }
+}
+
+static void
+select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
+{
+ static int z = 1<<15; /* 32768: something big value */
+
+ int v1, v2;
+
+ if (alt->value == 0) return ;
+ if (now->value == 0) {
+ copy_opt_map_info(now, alt);
+ return ;
+ }
+
+ v1 = z / now->value;
+ v2 = z / alt->value;
+ if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
+ copy_opt_map_info(now, alt);
+}
+
+static int
+comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
+{
+#define COMP_EM_BASE 20
+ int ve, vm;
+
+ if (m->value <= 0) return -1;
+
+ ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
+ vm = COMP_EM_BASE * 5 * 2 / m->value;
+ return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
+}
+
+static void
+alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
+{
+ int i, val;
+
+ /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
+ if (to->value == 0) return ;
+ if (add->value == 0 || to->mmd.max < add->mmd.min) {
+ clear_opt_map_info(to);
+ return ;
+ }
+
+ alt_merge_mml(&to->mmd, &add->mmd);
+
+ val = 0;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
+ if (add->map[i])
+ to->map[i] = 1;
+
+ if (to->map[i])
+ val += map_position_value(i);
+ }
+ to->value = val;
+
+ alt_merge_opt_anc_info(&to->anc, &add->anc);
+}
+
+static void
+set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
+{
+ copy_mml(&(opt->exb.mmd), mmd);
+ copy_mml(&(opt->expr.mmd), mmd);
+ copy_mml(&(opt->map.mmd), mmd);
+}
+
+static void
+clear_node_opt_info(NodeOptInfo* opt)
+{
+ clear_mml(&opt->len);
+ clear_opt_anc_info(&opt->anc);
+ clear_opt_exact_info(&opt->exb);
+ clear_opt_exact_info(&opt->exm);
+ clear_opt_exact_info(&opt->expr);
+ clear_opt_map_info(&opt->map);
+}
+
+static void
+copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
+{
+ *to = *from;
+}
+
+static void
+concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
+{
+ int exb_reach, exm_reach;
+ OptAncInfo tanc;
+
+ concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
+ copy_opt_anc_info(&to->anc, &tanc);
+
+ if (add->exb.len > 0 && to->len.max == 0) {
+ concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
+ to->len.max, add->len.max);
+ copy_opt_anc_info(&add->exb.anc, &tanc);
+ }
+
+ if (add->map.value > 0 && to->len.max == 0) {
+ if (add->map.mmd.max == 0)
+ add->map.anc.left_anchor |= to->anc.left_anchor;
+ }
+
+ exb_reach = to->exb.reach_end;
+ exm_reach = to->exm.reach_end;
+
+ if (add->len.max != 0)
+ to->exb.reach_end = to->exm.reach_end = 0;
+
+ if (add->exb.len > 0) {
+ if (exb_reach) {
+ concat_opt_exact_info(&to->exb, &add->exb);
+ clear_opt_exact_info(&add->exb);
+ }
+ else if (exm_reach) {
+ concat_opt_exact_info(&to->exm, &add->exb);
+ clear_opt_exact_info(&add->exb);
+ }
+ }
+ select_opt_exact_info(&to->exm, &add->exb);
+ select_opt_exact_info(&to->exm, &add->exm);
+
+ if (to->expr.len > 0) {
+ if (add->len.max > 0) {
+ if (to->expr.len > (int )add->len.max)
+ to->expr.len = add->len.max;
+
+ if (to->expr.mmd.max == 0)
+ select_opt_exact_info(&to->exb, &to->expr);
+ else
+ select_opt_exact_info(&to->exm, &to->expr);
+ }
+ }
+ else if (add->expr.len > 0) {
+ copy_opt_exact_info(&to->expr, &add->expr);
+ }
+
+ select_opt_map_info(&to->map, &add->map);
+
+ add_mml(&to->len, &add->len);
+}
+
+static void
+alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
+{
+ alt_merge_opt_anc_info (&to->anc, &add->anc);
+ alt_merge_opt_exact_info(&to->exb, &add->exb, env);
+ alt_merge_opt_exact_info(&to->exm, &add->exm, env);
+ alt_merge_opt_exact_info(&to->expr, &add->expr, env);
+ alt_merge_opt_map_info (&to->map, &add->map);
+
+ alt_merge_mml(&to->len, &add->len);
+}
+
+
+#define MAX_NODE_OPT_INFO_REF_COUNT 5
+
+static int
+optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
+{
+ int type;
+ int r = 0;
+
+ clear_node_opt_info(opt);
+ set_bound_node_opt_info(opt, &env->mmd);
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ {
+ OptEnv nenv;
+ NodeOptInfo nopt;
+ Node* nd = node;
+
+ copy_opt_env(&nenv, env);
+ do {
+ r = optimize_node_left(NCONS(nd).left, &nopt, &nenv);
+ if (r == 0) {
+ add_mml(&nenv.mmd, &nopt.len);
+ concat_left_node_opt_info(opt, &nopt);
+ }
+ } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right));
+ }
+ break;
+
+ case N_ALT:
+ {
+ NodeOptInfo nopt;
+ Node* nd = node;
+
+ do {
+ r = optimize_node_left(NCONS(nd).left, &nopt, env);
+ if (r == 0) {
+ if (nd == node) copy_node_opt_info(opt, &nopt);
+ else alt_merge_node_opt_info(opt, &nopt, env);
+ }
+ } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right));
+ }
+ break;
+
+ case N_STRING:
+ {
+ UChar *p;
+ int len, plen;
+ StrNode* sn = &(NSTRING(node));
+ int slen = sn->end - sn->s;
+ int is_raw = NSTRING_IS_RAW(node);
+
+ if ((! IS_IGNORECASE(env->options)) || is_raw) {
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ NSTRING_IS_RAW(node), env->enc);
+ if (slen > 0) {
+ add_char_opt_map_info(&opt->map, *(sn->s));
+ }
+ }
+ else {
+ for (p = sn->s; p < sn->end; ) {
+ len = enc_len(env->enc, *p);
+ if (len == 1 && ONIGENC_IS_MBC_CASE_AMBIG(env->enc, p)) {
+ break;
+ }
+ p += len;
+ }
+
+ plen = p - sn->s;
+ if (plen > slen / 5) {
+ concat_opt_exact_info_str(&opt->exb, sn->s, p, is_raw, env->enc);
+ concat_opt_exact_info_str(&opt->exm, p, sn->end, is_raw, env->enc);
+ opt->exm.ignore_case = 1;
+ if (opt->exm.len == sn->end - p)
+ opt->exm.reach_end = 1;
+
+ copy_mml(&(opt->exm.mmd), &(opt->exb.mmd));
+ add_len_mml(&(opt->exm.mmd), plen);
+ }
+ else {
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ is_raw, env->enc);
+ opt->exb.ignore_case = 1;
+ }
+
+ if (slen > 0) {
+ if (p == sn->s)
+ add_char_amb_opt_map_info(&opt->map, *(sn->s), env->enc);
+ else
+ add_char_opt_map_info(&opt->map, *(sn->s));
+ }
+ }
+
+ if (opt->exb.len == slen)
+ opt->exb.reach_end = 1;
+
+ set_mml(&opt->len, slen, slen);
+ }
+ break;
+
+ case N_CCLASS:
+ {
+ int i, z, len, found, mb_found;
+ CClassNode* cc = &(NCCLASS(node));
+
+ /* no need to check ignore case. (setted in setup_tree()) */
+ found = mb_found = 0;
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ z = BITSET_AT(cc->bs, i);
+ if ((z && !cc->not) || (!z && cc->not)) {
+ found = 1;
+ add_char_opt_map_info(&opt->map, i);
+ }
+ }
+
+ if (IS_NULL(cc->mbuf)) {
+ if (cc->not) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ add_char_opt_map_info(&opt->map, i);
+ }
+ mb_found = 1;
+ }
+ }
+ else {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ z = ONIGENC_IS_MBC_HEAD(env->enc, i);
+ if (z) {
+ mb_found = 1;
+ add_char_opt_map_info(&opt->map, i);
+ }
+ }
+ }
+
+ if (mb_found) {
+ len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ set_mml(&opt->len, 1, len);
+ }
+ else if (found) {
+ len = 1;
+ set_mml(&opt->len, 1, len);
+ }
+ }
+ break;
+
+ case N_CTYPE:
+ {
+ int c;
+ int len, min, max;
+
+ min = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ max = 0;
+
+#define IS_WORD_HEAD_BYTE(enc,b) \
+ (ONIGENC_IS_MBC_ASCII(&b) ? ONIGENC_IS_CODE_WORD(enc,((OnigCodePoint )b)) \
+ : ONIGENC_IS_MBC_HEAD(enc,b))
+
+ switch (NCTYPE(node).type) {
+ case CTYPE_WORD:
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (IS_WORD_HEAD_BYTE(env->enc, c)) {
+ add_char_opt_map_info(&opt->map, c);
+ len = enc_len(env->enc, c);
+ if (len < min) min = len;
+ if (len > max) max = len;
+ }
+ }
+ break;
+
+ case CTYPE_NOT_WORD:
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! IS_WORD_HEAD_BYTE(env->enc, c)) {
+ add_char_opt_map_info(&opt->map, c);
+ len = enc_len(env->enc, c);
+ if (len < min) min = len;
+ if (len > max) max = len;
+ }
+ }
+ break;
+ }
+
+ set_mml(&opt->len, min, max);
+ }
+ break;
+
+ case N_ANYCHAR:
+ {
+ OnigDistance len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ set_mml(&opt->len, 1, len);
+ }
+ break;
+
+ case N_ANCHOR:
+ switch (NANCHOR(node).type) {
+ case ANCHOR_BEGIN_BUF:
+ case ANCHOR_BEGIN_POSITION:
+ case ANCHOR_BEGIN_LINE:
+ case ANCHOR_END_BUF:
+ case ANCHOR_SEMI_END_BUF:
+ case ANCHOR_END_LINE:
+ add_opt_anc_info(&opt->anc, NANCHOR(node).type);
+ break;
+
+ case ANCHOR_PREC_READ:
+ {
+ NodeOptInfo nopt;
+
+ r = optimize_node_left(NANCHOR(node).target, &nopt, env);
+ if (r == 0) {
+ if (nopt.exb.len > 0)
+ copy_opt_exact_info(&opt->expr, &nopt.exb);
+ else if (nopt.exm.len > 0)
+ copy_opt_exact_info(&opt->expr, &nopt.exm);
+
+ opt->expr.reach_end = 0;
+
+ if (nopt.map.value > 0)
+ copy_opt_map_info(&opt->map, &nopt.map);
+ }
+ }
+ break;
+
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
+ case ANCHOR_LOOK_BEHIND_NOT:
+ break;
+ }
+ break;
+
+ case N_BACKREF:
+ {
+ int i;
+ int* backs;
+ OnigDistance min, max, tmin, tmax;
+ Node** nodes = SCANENV_MEM_NODES(env->scan_env);
+ BackrefNode* br = &(NBACKREF(node));
+
+ if (br->state & NST_RECURSION) {
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ break;
+ }
+ backs = BACKREFS_P(br);
+ r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
+ if (r != 0) break;
+ r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
+ if (r != 0) break;
+ for (i = 1; i < br->back_num; i++) {
+ r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
+ if (r != 0) break;
+ r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
+ if (r != 0) break;
+ if (min > tmin) min = tmin;
+ if (max < tmax) max = tmax;
+ }
+ if (r == 0) set_mml(&opt->len, min, max);
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (IS_CALL_RECURSION(&(NCALL(node))))
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ else {
+ OnigOptionType save = env->options;
+ env->options = NEFFECT(NCALL(node).target).option;
+ r = optimize_node_left(NCALL(node).target, opt, env);
+ env->options = save;
+ }
+ break;
+#endif
+
+ case N_QUALIFIER:
+ {
+ int i;
+ OnigDistance min, max;
+ NodeOptInfo nopt;
+ QualifierNode* qn = &(NQUALIFIER(node));
+
+ r = optimize_node_left(qn->target, &nopt, env);
+ if (r) break;
+
+ if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
+ if (env->mmd.max == 0 &&
+ NTYPE(qn->target) == N_ANYCHAR && qn->greedy) {
+ if (IS_POSIXLINE(env->options))
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_PL);
+ else
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
+ }
+ }
+ else {
+ if (qn->lower > 0) {
+ copy_node_opt_info(opt, &nopt);
+ if (nopt.exb.len > 0) {
+ if (nopt.exb.reach_end) {
+ for (i = 2; i < qn->lower &&
+ ! is_full_opt_exact_info(&opt->exb); i++) {
+ concat_opt_exact_info(&opt->exb, &nopt.exb);
+ }
+ if (i < qn->lower) {
+ opt->exb.reach_end = 0;
+ }
+ }
+ }
+
+ if (qn->lower != qn->upper) {
+ opt->exb.reach_end = 0;
+ opt->exm.reach_end = 0;
+ }
+ if (qn->lower > 1)
+ opt->exm.reach_end = 0;
+ }
+ }
+
+ min = distance_multiply(nopt.len.min, qn->lower);
+ if (IS_REPEAT_INFINITE(qn->upper))
+ max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
+ else
+ max = distance_multiply(nopt.len.max, qn->upper);
+
+ set_mml(&opt->len, min, max);
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ switch (en->type) {
+ case EFFECT_OPTION:
+ {
+ OnigOptionType save = env->options;
+
+ env->options = en->option;
+ r = optimize_node_left(en->target, opt, env);
+ env->options = save;
+ }
+ break;
+
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ en->opt_count++;
+ if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
+ OnigDistance min, max;
+
+ min = 0;
+ max = ONIG_INFINITE_DISTANCE;
+ if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len;
+ if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len;
+ set_mml(&opt->len, min, max);
+ }
+ else
+#endif
+ {
+ r = optimize_node_left(en->target, opt, env);
+
+ if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
+ if (BIT_STATUS_AT(env->backrefed_status, en->regnum))
+ remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
+ }
+ }
+ break;
+
+ case EFFECT_STOP_BACKTRACK:
+ r = optimize_node_left(en->target, opt, env);
+ break;
+ }
+ }
+ break;
+
+ default:
+#ifdef ONIG_DEBUG
+ fprintf(stderr, "optimize_node_left: undefined node type %d\n",
+ NTYPE(node));
+#endif
+ r = ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
+{
+ int r;
+
+ if (e->len == 0) return 0;
+
+ reg->exact = onig_strdup(e->s, e->s + e->len);
+ CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
+
+ reg->exact_end = reg->exact + e->len;
+
+ if (e->ignore_case) {
+ UChar buf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ int len, low_len, i, j, alloc_size;
+
+ alloc_size = e->len;
+ i = j = 0;
+ while (i < e->len) {
+ low_len = ONIGENC_MBC_TO_LOWER(reg->enc, &(e->s[i]), buf);
+ len = enc_len(reg->enc, e->s[i]);
+ if (low_len > alloc_size - i) {
+ reg->exact = xrealloc(reg->exact, alloc_size * 2);
+ CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
+ alloc_size *= 2;
+ }
+
+ xmemcpy(&(reg->exact[j]), buf, low_len);
+ i += len;
+ j += low_len;
+ }
+ reg->exact_end = reg->exact + j;
+ reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
+ }
+ else {
+ int allow_reverse;
+
+ if (e->anc.left_anchor & ANCHOR_BEGIN_LINE)
+ allow_reverse = 1;
+ else
+ allow_reverse =
+ ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
+
+ if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
+ r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, 0,
+ reg->map, &(reg->int_map));
+ if (r) return r;
+
+ reg->optimize = (allow_reverse != 0
+ ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
+ }
+ else {
+ reg->optimize = ONIG_OPTIMIZE_EXACT;
+ }
+ }
+
+ reg->dmin = e->mmd.min;
+ reg->dmax = e->mmd.max;
+
+ if (reg->dmin != ONIG_INFINITE_DISTANCE) {
+ reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
+ }
+
+ return 0;
+}
+
+static void
+set_optimize_map_info(regex_t* reg, OptMapInfo* m)
+{
+ int i;
+
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ reg->map[i] = m->map[i];
+
+ reg->optimize = ONIG_OPTIMIZE_MAP;
+ reg->dmin = m->mmd.min;
+ reg->dmax = m->mmd.max;
+
+ if (reg->dmin != ONIG_INFINITE_DISTANCE) {
+ reg->threshold_len = reg->dmin + 1;
+ }
+}
+
+static void
+set_sub_anchor(regex_t* reg, OptAncInfo* anc)
+{
+ reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
+ reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
+}
+
+#ifdef ONIG_DEBUG
+static void print_optimize_info(FILE* f, regex_t* reg);
+#endif
+
+static int
+set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
+{
+
+ int r;
+ NodeOptInfo opt;
+ OptEnv env;
+
+ env.enc = reg->enc;
+ env.options = reg->options;
+ env.scan_env = scan_env;
+ clear_mml(&env.mmd);
+
+ r = optimize_node_left(node, &opt, &env);
+ if (r) return r;
+
+ reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
+ ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL);
+
+ reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
+
+ if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
+ reg->anchor_dmin = opt.len.min;
+ reg->anchor_dmax = opt.len.max;
+ }
+
+ if (opt.exb.len > 0 || opt.exm.len > 0) {
+ select_opt_exact_info(&opt.exb, &opt.exm);
+ if (opt.map.value > 0 &&
+ comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
+ goto set_map;
+ }
+ else {
+ r = set_optimize_exact_info(reg, &opt.exb);
+ set_sub_anchor(reg, &opt.exb.anc);
+ }
+ }
+ else if (opt.map.value > 0) {
+ set_map:
+ set_optimize_map_info(reg, &opt.map);
+ set_sub_anchor(reg, &opt.map.anc);
+ }
+ else {
+ reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
+ if (opt.len.max == 0)
+ reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
+ }
+
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
+ print_optimize_info(stderr, reg);
+#endif
+ return r;
+}
+
+static void
+clear_optimize_info(regex_t* reg)
+{
+ reg->optimize = ONIG_OPTIMIZE_NONE;
+ reg->anchor = 0;
+ reg->anchor_dmin = 0;
+ reg->anchor_dmax = 0;
+ reg->sub_anchor = 0;
+ reg->exact_end = (UChar* )NULL;
+ reg->threshold_len = 0;
+ if (IS_NOT_NULL(reg->exact)) {
+ xfree(reg->exact);
+ reg->exact = (UChar* )NULL;
+ }
+}
+
+#ifdef ONIG_DEBUG
+
+static void
+print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
+{
+ if (a == ONIG_INFINITE_DISTANCE)
+ fputs("inf", f);
+ else
+ fprintf(f, "(%u)", a);
+
+ fputs("-", f);
+
+ if (b == ONIG_INFINITE_DISTANCE)
+ fputs("inf", f);
+ else
+ fprintf(f, "(%u)", b);
+}
+
+static void
+print_anchor(FILE* f, int anchor)
+{
+ int q = 0;
+
+ fprintf(f, "[");
+
+ if (anchor & ANCHOR_BEGIN_BUF) {
+ fprintf(f, "begin-buf");
+ q = 1;
+ }
+ if (anchor & ANCHOR_BEGIN_LINE) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "begin-line");
+ }
+ if (anchor & ANCHOR_BEGIN_POSITION) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "begin-pos");
+ }
+ if (anchor & ANCHOR_END_BUF) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "end-buf");
+ }
+ if (anchor & ANCHOR_SEMI_END_BUF) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "semi-end-buf");
+ }
+ if (anchor & ANCHOR_END_LINE) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "end-line");
+ }
+ if (anchor & ANCHOR_ANYCHAR_STAR) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "anychar-star");
+ }
+ if (anchor & ANCHOR_ANYCHAR_STAR_PL) {
+ if (q) fprintf(f, ", ");
+ fprintf(f, "anychar-star-pl");
+ }
+
+ fprintf(f, "]");
+}
+
+static void
+print_optimize_info(FILE* f, regex_t* reg)
+{
+ static char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
+ "EXACT_IC", "MAP" };
+
+ fprintf(f, "optimize: %s\n", on[reg->optimize]);
+ fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
+ if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
+ print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
+ fprintf(f, "\n");
+
+ if (reg->optimize) {
+ fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n");
+
+ if (reg->exact) {
+ UChar *p;
+ fprintf(f, "exact: [");
+ for (p = reg->exact; p < reg->exact_end; p++) {
+ fputc(*p, f);
+ }
+ fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
+ }
+ else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
+ int i, n = 0;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ if (reg->map[i]) n++;
+
+ fprintf(f, "map: n=%d\n", n);
+ if (n > 0) {
+ fputc('[', f);
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ if (reg->map[i] && enc_len(reg->enc, i) == 1 &&
+ ONIGENC_IS_CODE_PRINT(reg->enc, i))
+ fputc(i, f);
+ fprintf(f, "]\n");
+ }
+ }
+}
+#endif /* ONIG_DEBUG */
+
+
+static void
+onig_free_body(regex_t* reg)
+{
+ if (IS_NOT_NULL(reg->p)) xfree(reg->p);
+ if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
+ if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
+ if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
+ if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
+ if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
+
+#ifdef USE_NAMED_GROUP
+ onig_names_free(reg);
+#endif
+}
+
+extern void
+onig_free(regex_t* reg)
+{
+ if (IS_NOT_NULL(reg)) {
+ onig_free_body(reg);
+ xfree(reg);
+ }
+}
+
+#define REGEX_TRANSFER(to,from) do {\
+ (to)->state = ONIG_STATE_MODIFY;\
+ onig_free_body(to);\
+ xmemcpy(to, from, sizeof(regex_t));\
+ xfree(from);\
+} while (0)
+
+static void
+onig_transfer(regex_t* to, regex_t* from)
+{
+ THREAD_ATOMIC_START;
+ REGEX_TRANSFER(to, from);
+ THREAD_ATOMIC_END;
+}
+
+#define REGEX_CHAIN_HEAD(reg) do {\
+ while (IS_NOT_NULL((reg)->chain)) {\
+ (reg) = (reg)->chain;\
+ }\
+} while (0)
+
+static void
+onig_chain_link_add(regex_t* to, regex_t* add)
+{
+ THREAD_ATOMIC_START;
+ REGEX_CHAIN_HEAD(to);
+ to->chain = add;
+ THREAD_ATOMIC_END;
+}
+
+extern void
+onig_chain_reduce(regex_t* reg)
+{
+ regex_t *head, *prev;
+
+ THREAD_ATOMIC_START;
+ prev = reg;
+ head = prev->chain;
+ if (IS_NOT_NULL(head)) {
+ reg->state = ONIG_STATE_MODIFY;
+ while (IS_NOT_NULL(head->chain)) {
+ prev = head;
+ head = head->chain;
+ }
+ prev->chain = (regex_t* )NULL;
+ REGEX_TRANSFER(reg, head);
+ }
+ THREAD_ATOMIC_END;
+}
+
+#if 0
+extern int
+onig_clone(regex_t** to, regex_t* from)
+{
+ int r, size;
+ regex_t* reg;
+
+ if (ONIG_STATE(from) == ONIG_STATE_NORMAL) {
+ from->state++; /* increment as search counter */
+ if (IS_NOT_NULL(from->chain)) {
+ onig_chain_reduce(from);
+ from->state++;
+ }
+ }
+ else {
+ int n = 0;
+ while (ONIG_STATE(from) < ONIG_STATE_NORMAL) {
+ if (++n > THREAD_PASS_LIMIT_COUNT)
+ return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
+ THREAD_PASS;
+ }
+ from->state++; /* increment as search counter */
+ }
+
+ r = onig_alloc_init(&reg, ONIG_OPTION_NONE, from->enc, ONIG_SYNTAX_DEFAULT);
+ if (r != 0) {
+ from->state--;
+ return r;
+ }
+
+ xmemcpy(reg, from, sizeof(onig_t));
+ reg->state = ONIG_STATE_NORMAL;
+ reg->chain = (regex_t* )NULL;
+
+ if (from->p) {
+ reg->p = (UChar* )xmalloc(reg->alloc);
+ if (IS_NULL(reg->p)) goto mem_error;
+ xmemcpy(reg->p, from->p, reg->alloc);
+ }
+
+ if (from->exact) {
+ reg->exact = (UChar* )xmalloc(from->exact_end - from->exact);
+ if (IS_NULL(reg->exact)) goto mem_error;
+ reg->exact_end = reg->exact + (from->exact_end - from->exact);
+ xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact);
+ }
+
+ if (from->int_map) {
+ size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
+ reg->int_map = (int* )xmalloc(size);
+ if (IS_NULL(reg->int_map)) goto mem_error;
+ xmemcpy(reg->int_map, from->int_map, size);
+ }
+
+ if (from->int_map_backward) {
+ size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
+ reg->int_map_backward = (int* )xmalloc(size);
+ if (IS_NULL(reg->int_map_backward)) goto mem_error;
+ xmemcpy(reg->int_map_backward, from->int_map_backward, size);
+ }
+
+#ifdef USE_NAMED_GROUP
+ reg->name_table = names_clone(from); /* names_clone is not implemented */
+#endif
+
+ from->state--;
+ *to = reg;
+ return 0;
+
+ mem_error:
+ from->state--;
+ return ONIGERR_MEMORY;
+}
+#endif
+
+#ifdef ONIG_DEBUG
+static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
+#endif
+#ifdef ONIG_DEBUG_PARSE_TREE
+static void print_tree P_((FILE* f, Node* node));
+#endif
+
+extern int
+onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
+ OnigErrorInfo* einfo)
+{
+#define COMPILE_INIT_SIZE 20
+
+ int r, init_size;
+ Node* root;
+ ScanEnv scan_env;
+#ifdef USE_SUBEXP_CALL
+ UnsetAddrList uslist;
+#endif
+
+ reg->state = ONIG_STATE_COMPILING;
+
+ if (reg->alloc == 0) {
+ init_size = (pattern_end - pattern) * 2;
+ if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
+ r = BBUF_INIT(reg, init_size);
+ if (r != 0) goto end;
+ }
+ else
+ reg->used = 0;
+
+ reg->num_mem = 0;
+ reg->num_repeat = 0;
+ reg->num_null_check = 0;
+ reg->repeat_range_alloc = 0;
+ reg->repeat_range = (OnigRepeatRange* )NULL;
+
+ r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
+ if (r != 0) goto err;
+
+#ifdef USE_NAMED_GROUP
+ /* mixed use named group and no-named group */
+ if (scan_env.num_named > 0 &&
+ IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
+ if (scan_env.num_named != scan_env.num_mem)
+ r = disable_noname_group_capture(&root, reg, &scan_env);
+ else
+ r = numbered_ref_check(root);
+
+ if (r != 0) goto err;
+ }
+#endif
+
+#ifdef ONIG_DEBUG_PARSE_TREE
+ print_tree(stderr, root);
+#endif
+
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.num_call > 0) {
+ r = unset_addr_list_init(&uslist, scan_env.num_call);
+ if (r != 0) goto err;
+ scan_env.unset_addr_list = &uslist;
+ r = setup_subexp_call(root, &scan_env);
+ if (r != 0) goto err_unset;
+ r = subexp_recursive_check_trav(root, &scan_env);
+ if (r < 0) goto err_unset;
+ r = subexp_inf_recursive_check_trav(root, &scan_env);
+ if (r != 0) goto err_unset;
+
+ reg->num_call = scan_env.num_call;
+ }
+ else
+ reg->num_call = 0;
+#endif
+
+ r = setup_tree(root, reg, 0, &scan_env);
+ if (r != 0) goto err_unset;
+
+ reg->capture_history = scan_env.capture_history;
+ reg->bt_mem_start = scan_env.bt_mem_start;
+ reg->bt_mem_start |= reg->capture_history;
+ if (IS_FIND_CONDITION(reg->options))
+ BIT_STATUS_ON_ALL(reg->bt_mem_end);
+ else {
+ reg->bt_mem_end = scan_env.bt_mem_end;
+ reg->bt_mem_end |= reg->capture_history;
+ }
+
+ clear_optimize_info(reg);
+#ifndef ONIG_DONT_OPTIMIZE
+ r = set_optimize_info_from_tree(root, reg, &scan_env);
+ if (r != 0) goto err_unset;
+#endif
+
+ if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
+ xfree(scan_env.mem_nodes_dynamic);
+ scan_env.mem_nodes_dynamic = (Node** )NULL;
+ }
+
+ r = compile_tree(root, reg);
+ if (r == 0) {
+ r = add_opcode(reg, OP_END);
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.num_call > 0) {
+ r = unset_addr_list_fix(&uslist, reg);
+ unset_addr_list_end(&uslist);
+ if (r) goto err;
+ }
+#endif
+
+ if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
+ reg->stack_pop_level = STACK_POP_LEVEL_ALL;
+ else {
+ if (reg->bt_mem_start != 0)
+ reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
+ else
+ reg->stack_pop_level = STACK_POP_LEVEL_FREE;
+ }
+ }
+#ifdef USE_SUBEXP_CALL
+ else if (scan_env.num_call > 0) {
+ unset_addr_list_end(&uslist);
+ }
+#endif
+ onig_node_free(root);
+
+#ifdef ONIG_DEBUG_COMPILE
+#ifdef USE_NAMED_GROUP
+ onig_print_names(stderr, reg);
+#endif
+ print_compiled_byte_code_list(stderr, reg);
+#endif
+
+ end:
+ reg->state = ONIG_STATE_NORMAL;
+ return r;
+
+ err_unset:
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.num_call > 0) {
+ unset_addr_list_end(&uslist);
+ }
+#endif
+ err:
+ if (IS_NOT_NULL(scan_env.error)) {
+ if (IS_NOT_NULL(einfo)) {
+ einfo->par = scan_env.error;
+ einfo->par_end = scan_env.error_end;
+ }
+ }
+
+ if (IS_NOT_NULL(root)) onig_node_free(root);
+ if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
+ xfree(scan_env.mem_nodes_dynamic);
+ return r;
+}
+
+extern int
+onig_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+ OnigErrorInfo* einfo)
+{
+ int r;
+ regex_t *new_reg;
+
+ r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
+ if (r) return r;
+ if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_transfer(reg, new_reg);
+ }
+ else {
+ onig_chain_link_add(reg, new_reg);
+ }
+ return 0;
+}
+
+static int onig_inited = 0;
+
+extern int
+onig_alloc_init(regex_t** reg, OnigOptionType option, OnigEncoding enc,
+ OnigSyntaxType* syntax)
+{
+ if (! onig_inited)
+ onig_init();
+
+ if (ONIGENC_IS_UNDEF(enc))
+ return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
+
+ *reg = (regex_t* )xmalloc(sizeof(regex_t));
+ if (IS_NULL(*reg)) return ONIGERR_MEMORY;
+
+ if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
+ option |= syntax->options;
+ option &= ~ONIG_OPTION_SINGLELINE;
+ }
+ else
+ option |= syntax->options;
+
+ (*reg)->state = ONIG_STATE_NORMAL;
+ (*reg)->enc = enc;
+ (*reg)->options = option;
+ (*reg)->syntax = syntax;
+ (*reg)->optimize = 0;
+ (*reg)->exact = (UChar* )NULL;
+ (*reg)->int_map = (int* )NULL;
+ (*reg)->int_map_backward = (int* )NULL;
+ (*reg)->chain = (regex_t* )NULL;
+
+ (*reg)->p = (UChar* )NULL;
+ (*reg)->alloc = 0;
+ (*reg)->used = 0;
+ (*reg)->name_table = (void* )NULL;
+
+ return 0;
+}
+
+extern int
+onig_new(regex_t** reg, UChar* pattern, UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+ OnigErrorInfo* einfo)
+{
+ int r;
+
+ if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
+
+ r = onig_alloc_init(reg, option, enc, syntax);
+ if (r) return r;
+
+ r = onig_compile(*reg, pattern, pattern_end, einfo);
+ if (r) {
+ onig_free(*reg);
+ *reg = NULL;
+ }
+ return r;
+}
+
+extern int
+onig_init()
+{
+ if (onig_inited != 0)
+ return 0;
+
+ onig_inited = 1;
+
+ THREAD_ATOMIC_START;
+
+ onigenc_init();
+ onigenc_set_default_caseconv_table((UChar* )0);
+
+#ifdef ONIG_DEBUG_STATISTICS
+ onig_statistics_init();
+#endif
+
+ THREAD_ATOMIC_END;
+ return 0;
+}
+
+extern int
+onig_end()
+{
+#ifdef ONIG_DEBUG_STATISTICS
+ onig_print_statistics(stderr);
+#endif
+
+#ifdef USE_RECYCLE_NODE
+ onig_free_node_list();
+#endif
+
+ onig_inited = 0;
+ return 0;
+}
+
+
+#ifdef ONIG_DEBUG
+
+OnigOpInfoType OnigOpInfo[] = {
+ { OP_FINISH, "finish", ARG_NON },
+ { OP_END, "end", ARG_NON },
+ { OP_EXACT1, "exact1", ARG_SPECIAL },
+ { OP_EXACT2, "exact2", ARG_SPECIAL },
+ { OP_EXACT3, "exact3", ARG_SPECIAL },
+ { OP_EXACT4, "exact4", ARG_SPECIAL },
+ { OP_EXACT5, "exact5", ARG_SPECIAL },
+ { OP_EXACTN, "exactn", ARG_SPECIAL },
+ { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
+ { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
+ { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
+ { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
+ { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
+ { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
+ { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
+ { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
+ { OP_CCLASS, "cclass", ARG_SPECIAL },
+ { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
+ { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
+ { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
+ { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
+ { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
+ { OP_ANYCHAR, "anychar", ARG_NON },
+ { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
+ { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
+ { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
+ { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
+ { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
+ { OP_WORD, "word", ARG_NON },
+ { OP_NOT_WORD, "not-word", ARG_NON },
+ { OP_WORD_SB, "word-sb", ARG_NON },
+ { OP_WORD_MB, "word-mb", ARG_NON },
+ { OP_WORD_BOUND, "word-bound", ARG_NON },
+ { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
+ { OP_WORD_BEGIN, "word-begin", ARG_NON },
+ { OP_WORD_END, "word-end", ARG_NON },
+ { OP_BEGIN_BUF, "begin-buf", ARG_NON },
+ { OP_END_BUF, "end-buf", ARG_NON },
+ { OP_BEGIN_LINE, "begin-line", ARG_NON },
+ { OP_END_LINE, "end-line", ARG_NON },
+ { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
+ { OP_BEGIN_POSITION, "begin-position", ARG_NON },
+ { OP_BACKREF1, "backref1", ARG_NON },
+ { OP_BACKREF2, "backref2", ARG_NON },
+ { OP_BACKREF3, "backref3", ARG_NON },
+ { OP_BACKREFN, "backrefn", ARG_MEMNUM },
+ { OP_BACKREFN_IC, "backrefn-ic", ARG_MEMNUM },
+ { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic",ARG_SPECIAL },
+ { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
+ { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
+ { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
+ { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
+ { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
+ { OP_SET_OPTION, "set-option", ARG_OPTION },
+ { OP_FAIL, "fail", ARG_NON },
+ { OP_JUMP, "jump", ARG_RELADDR },
+ { OP_PUSH, "push", ARG_RELADDR },
+ { OP_POP, "pop", ARG_NON },
+ { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
+ { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
+ { OP_REPEAT, "repeat", ARG_SPECIAL },
+ { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
+ { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
+ { OP_NULL_CHECK_START, "null-check-start",ARG_MEMNUM },
+ { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
+ { OP_PUSH_POS, "push-pos", ARG_NON },
+ { OP_POP_POS, "pop-pos", ARG_NON },
+ { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
+ { OP_FAIL_POS, "fail-pos", ARG_NON },
+ { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
+ { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
+ { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
+ { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
+ { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
+ { OP_CALL, "call", ARG_ABSADDR },
+ { OP_RETURN, "return", ARG_NON },
+ { -1, "", ARG_NON }
+};
+
+static char*
+op2name(int opcode)
+{
+ int i;
+
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ if (opcode == OnigOpInfo[i].opcode)
+ return OnigOpInfo[i].name;
+ }
+ return "";
+}
+
+static int
+op2arg_type(int opcode)
+{
+ int i;
+
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ if (opcode == OnigOpInfo[i].opcode)
+ return OnigOpInfo[i].arg_type;
+ }
+ return ARG_SPECIAL;
+}
+
+static void
+Indent(FILE* f, int indent)
+{
+ int i;
+ for (i = 0; i < indent; i++) putc(' ', f);
+}
+
+static void
+p_string(FILE* f, int len, UChar* s)
+{
+ fputs(":", f);
+ while (len-- > 0) { fputc(*s++, f); }
+}
+
+static void
+p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
+{
+ int x = len * mb_len;
+
+ fprintf(f, ":%d:", len);
+ while (x-- > 0) { fputc(*s++, f); }
+}
+
+extern void
+onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
+{
+ int i, n, arg_type;
+ RelAddrType addr;
+ LengthType len;
+ MemNumType mem;
+ OnigCodePoint code;
+ UChar *q;
+
+ fprintf(f, "[%s", op2name(*bp));
+ arg_type = op2arg_type(*bp);
+ if (arg_type != ARG_SPECIAL) {
+ bp++;
+ switch (arg_type) {
+ case ARG_NON:
+ break;
+ case ARG_RELADDR:
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":(%d)", addr);
+ break;
+ case ARG_ABSADDR:
+ GET_ABSADDR_INC(addr, bp);
+ fprintf(f, ":(%d)", addr);
+ break;
+ case ARG_LENGTH:
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d", len);
+ break;
+ case ARG_MEMNUM:
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ fprintf(f, ":%d", mem);
+ break;
+ case ARG_OPTION:
+ {
+ OnigOptionType option = *((OnigOptionType* )bp);
+ bp += SIZE_OPTION;
+ fprintf(f, ":%d", option);
+ }
+ break;
+ }
+ }
+ else {
+ switch (*bp++) {
+ case OP_EXACT1:
+ case OP_ANYCHAR_STAR_PEEK_NEXT:
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
+ p_string(f, 1, bp++); break;
+ case OP_EXACT2:
+ p_string(f, 2, bp); bp += 2; break;
+ case OP_EXACT3:
+ p_string(f, 3, bp); bp += 3; break;
+ case OP_EXACT4:
+ p_string(f, 4, bp); bp += 4; break;
+ case OP_EXACT5:
+ p_string(f, 5, bp); bp += 5; break;
+ case OP_EXACTN:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 1, bp);
+ bp += len;
+ break;
+
+ case OP_EXACTMB2N1:
+ p_string(f, 2, bp); bp += 2; break;
+ case OP_EXACTMB2N2:
+ p_string(f, 4, bp); bp += 4; break;
+ case OP_EXACTMB2N3:
+ p_string(f, 6, bp); bp += 6; break;
+ case OP_EXACTMB2N:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 2, bp);
+ bp += len * 2;
+ break;
+ case OP_EXACTMB3N:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 3, bp);
+ bp += len * 3;
+ break;
+ case OP_EXACTMBN:
+ {
+ int mb_len;
+
+ GET_LENGTH_INC(mb_len, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:%d:", mb_len, len);
+ n = len * mb_len;
+ while (n-- > 0) { fputc(*bp++, f); }
+ }
+ break;
+
+ case OP_EXACT1_IC:
+ p_string(f, 1, bp++);
+ break;
+ case OP_EXACTN_IC:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 1, bp);
+ bp += len;
+ break;
+
+ case OP_CCLASS:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ fprintf(f, ":%d", n);
+ break;
+
+ case OP_CCLASS_NOT:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ fprintf(f, ":%d", n);
+ break;
+
+ case OP_CCLASS_MB:
+ case OP_CCLASS_MB_NOT:
+ GET_LENGTH_INC(len, bp);
+ q = bp;
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+ ALIGNMENT_RIGHT(q);
+#endif
+ GET_CODE_POINT(code, q);
+ bp += len;
+ fprintf(f, ":%d:%d", (int )code, len);
+ break;
+
+ case OP_CCLASS_MIX:
+ case OP_CCLASS_MIX_NOT:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ GET_LENGTH_INC(len, bp);
+ q = bp;
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+ ALIGNMENT_RIGHT(q);
+#endif
+ GET_CODE_POINT(code, q);
+ bp += len;
+ fprintf(f, ":%d:%d:%d", n, (int )code, len);
+ break;
+
+ case OP_BACKREF_MULTI:
+ case OP_BACKREF_MULTI_IC:
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ break;
+
+ case OP_REPEAT:
+ case OP_REPEAT_NG:
+ {
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:%d", mem, addr);
+ }
+ break;
+
+ case OP_PUSH_OR_JUMP_EXACT1:
+ case OP_PUSH_IF_PEEK_NEXT:
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":(%d)", addr);
+ p_string(f, 1, bp);
+ bp += 1;
+ break;
+
+ case OP_LOOK_BEHIND:
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d", len);
+ break;
+
+ case OP_PUSH_LOOK_BEHIND_NOT:
+ GET_RELADDR_INC(addr, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:(%d)", len, addr);
+ break;
+
+ default:
+ fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
+ *--bp);
+ }
+ }
+ fputs("]", f);
+ if (nextp) *nextp = bp;
+}
+
+static void
+print_compiled_byte_code_list(FILE* f, regex_t* reg)
+{
+ int ncode;
+ UChar* bp = reg->p;
+ UChar* end = reg->p + reg->used;
+
+ fprintf(f, "code length: %d\n", reg->used);
+
+ ncode = 0;
+ while (bp < end) {
+ ncode++;
+ if (bp > reg->p) {
+ if (ncode % 5 == 0)
+ fprintf(f, "\n");
+ else
+ fputs(" ", f);
+ }
+ onig_print_compiled_byte_code(f, bp, &bp);
+ }
+
+ fprintf(f, "\n");
+}
+
+static void
+print_indent_tree(FILE* f, Node* node, int indent)
+{
+ int i, type;
+ int add = 3;
+ UChar* p;
+
+ Indent(f, indent);
+ if (IS_NULL(node)) {
+ fprintf(f, "ERROR: null node!!!\n");
+ exit (0);
+ }
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ case N_ALT:
+ if (NTYPE(node) == N_LIST)
+ fprintf(f, "<list:%x>\n", (int )node);
+ else
+ fprintf(f, "<alt:%x>\n", (int )node);
+
+ print_indent_tree(f, NCONS(node).left, indent + add);
+ while (IS_NOT_NULL(node = NCONS(node).right)) {
+ if (NTYPE(node) != type) {
+ fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
+ exit(0);
+ }
+ print_indent_tree(f, NCONS(node).left, indent + add);
+ }
+ break;
+
+ case N_STRING:
+ fprintf(f, "<string%s:%x>",
+ (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
+ for (p = NSTRING(node).s; p < NSTRING(node).end; p++) {
+ if (*p >= 0x20 && *p < 0x7f)
+ fputc(*p, f);
+ else {
+ fprintf(f, " 0x%02x", *p);
+ }
+ }
+ break;
+
+ case N_CCLASS:
+ fprintf(f, "<cclass:%x>", (int )node);
+ if (NCCLASS(node).not) fputs(" not", f);
+ if (NCCLASS(node).mbuf) {
+ BBuf* bbuf = NCCLASS(node).mbuf;
+ for (i = 0; i < bbuf->used; i++) {
+ if (i > 0) fprintf(f, ",");
+ fprintf(f, "%0x", bbuf->p[i]);
+ }
+ }
+#if 0
+ fprintf(f, "\n");
+ Indent(f, indent);
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++)
+ fputc((BITSET_AT(NCCLASS(node).bs, i) ? '1' : '0'), f);
+#endif
+ break;
+
+ case N_CTYPE:
+ fprintf(f, "<ctype:%x> ", (int )node);
+ switch (NCTYPE(node).type) {
+ case CTYPE_WORD: fputs("word", f); break;
+ case CTYPE_NOT_WORD: fputs("not word", f); break;
+ default:
+ fprintf(f, "ERROR: undefined ctype.\n");
+ exit(0);
+ }
+ break;
+
+ case N_ANYCHAR:
+ fprintf(f, "<anychar:%x>", (int )node);
+ break;
+
+ case N_ANCHOR:
+ fprintf(f, "<anchor:%x> ", (int )node);
+ switch (NANCHOR(node).type) {
+ case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
+ case ANCHOR_END_BUF: fputs("end buf", f); break;
+ case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
+ case ANCHOR_END_LINE: fputs("end line", f); break;
+ case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
+ case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
+
+ case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
+ case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
+#ifdef USE_WORD_BEGIN_END
+ case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
+ case ANCHOR_WORD_END: fputs("word end", f); break;
+#endif
+ case ANCHOR_PREC_READ: fputs("prec read", f); break;
+ case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break;
+ case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break;
+ case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
+
+ default:
+ fprintf(f, "ERROR: undefined anchor type.\n");
+ break;
+ }
+ break;
+
+ case N_BACKREF:
+ {
+ int* p;
+ BackrefNode* br = &(NBACKREF(node));
+ p = BACKREFS_P(br);
+ fprintf(f, "<backref:%x>", (int )node);
+ for (i = 0; i < br->back_num; i++) {
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", p[i]);
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ {
+ CallNode* cn = &(NCALL(node));
+ fprintf(f, "<call:%x>", (int )node);
+ p_string(f, cn->name_end - cn->name, cn->name);
+ }
+ break;
+#endif
+
+ case N_QUALIFIER:
+ fprintf(f, "<qualifier:%x>{%d,%d}%s\n", (int )node,
+ NQUALIFIER(node).lower, NQUALIFIER(node).upper,
+ (NQUALIFIER(node).greedy ? "" : "?"));
+ print_indent_tree(f, NQUALIFIER(node).target, indent + add);
+ break;
+
+ case N_EFFECT:
+ fprintf(f, "<effect:%x> ", (int )node);
+ switch (NEFFECT(node).type) {
+ case EFFECT_OPTION:
+ fprintf(f, "option:%d\n", NEFFECT(node).option);
+ print_indent_tree(f, NEFFECT(node).target, indent + add);
+ break;
+ case EFFECT_MEMORY:
+ fprintf(f, "memory:%d", NEFFECT(node).regnum);
+ break;
+ case EFFECT_STOP_BACKTRACK:
+ fprintf(f, "stop-bt");
+ break;
+
+ default:
+ break;
+ }
+ fprintf(f, "\n");
+ print_indent_tree(f, NEFFECT(node).target, indent + add);
+ break;
+
+ default:
+ fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
+ break;
+ }
+
+ if (type != N_LIST && type != N_ALT && type != N_QUALIFIER &&
+ type != N_EFFECT)
+ fprintf(f, "\n");
+ fflush(f);
+}
+#endif /* ONIG_DEBUG */
+
+#ifdef ONIG_DEBUG_PARSE_TREE
+static void
+print_tree(FILE* f, Node* node)
+{
+ print_indent_tree(f, node, 0);
+}
+#endif
diff --git a/regenc.c b/regenc.c
new file mode 100644
index 000000000..21598ca7c
--- /dev/null
+++ b/regenc.c
@@ -0,0 +1,586 @@
+/**********************************************************************
+
+ regenc.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regenc.h"
+
+OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
+
+extern int
+onigenc_init()
+{
+ return 0;
+}
+
+extern OnigEncoding
+onigenc_get_default_encoding()
+{
+ return OnigEncDefaultCharEncoding;
+}
+
+extern int
+onigenc_set_default_encoding(OnigEncoding enc)
+{
+ OnigEncDefaultCharEncoding = enc;
+ return 0;
+}
+
+extern UChar*
+onigenc_get_right_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
+{
+ UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+ if (p < s) {
+ p += enc_len(enc, *p);
+ }
+ return p;
+}
+
+extern UChar*
+onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
+ UChar* start, UChar* s, UChar** prev)
+{
+ UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+
+ if (p < s) {
+ if (prev) *prev = p;
+ p += enc_len(enc, *p);
+ }
+ else {
+ if (prev) *prev = (UChar* )NULL; /* Sorry */
+ }
+ return p;
+}
+
+extern UChar*
+onigenc_get_prev_char_head(OnigEncoding enc, UChar* start, UChar* s)
+{
+ if (s <= start)
+ return (UChar* )NULL;
+
+ return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
+}
+
+extern UChar*
+onigenc_step_back(OnigEncoding enc, UChar* start, UChar* s, int n)
+{
+ while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
+ if (s <= start)
+ return (UChar* )NULL;
+
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
+ }
+ return s;
+}
+
+
+#ifndef ONIG_RUBY_M17N
+
+#ifndef NOT_RUBY
+#define USE_APPLICATION_TO_LOWER_CASE_TABLE
+#endif
+
+UChar* OnigEncAsciiToLowerCaseTable = (UChar* )0;
+
+#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
+static UChar BuiltInAsciiToLowerCaseTable[] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+};
+#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
+
+unsigned short OnigEncAsciiCtypeTable[256] = {
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
+ 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
+ 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
+ 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
+ 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
+ 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
+ 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
+ 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
+ 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+extern void
+onigenc_set_default_caseconv_table(UChar* table)
+{
+ if (table == (UChar* )0) {
+#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
+ table = BuiltInAsciiToLowerCaseTable;
+#else
+ return ;
+#endif
+ }
+
+ if (table != OnigEncAsciiToLowerCaseTable) {
+ OnigEncAsciiToLowerCaseTable = table;
+ }
+}
+
+extern UChar*
+onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
+{
+ return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+}
+
+extern int
+onigenc_nothing_get_all_fold_match_code(OnigCodePoint** codes)
+{
+ return 0;
+}
+
+extern int
+onigenc_nothing_get_fold_match_info(UChar* p, UChar* end,
+ OnigEncFoldMatchInfo** info)
+{
+ return -1;
+}
+
+extern int
+onigenc_nothing_get_ctype_code_range(int ctype, int* nsb, int* nmb,
+ OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
+{
+ return -1;
+}
+
+/* for single byte encodings */
+extern int
+onigenc_ascii_mbc_to_lower(UChar* p, UChar* lower)
+{
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ return 1; /* return byte length of converted char to lower */
+}
+
+extern int
+onigenc_ascii_mbc_is_case_ambig(UChar* p)
+{
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+}
+
+extern OnigCodePoint
+onigenc_single_byte_mbc_to_code(UChar* p, UChar* end)
+{
+ return (OnigCodePoint )(*p);
+}
+
+extern int
+onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
+{
+ return 1;
+}
+
+extern int
+onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
+{
+ return (code & 0xff);
+}
+
+extern int
+onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ *buf = (UChar )(code & 0xff);
+ return 1;
+}
+
+extern UChar*
+onigenc_single_byte_left_adjust_char_head(UChar* start, UChar* s)
+{
+ return s;
+}
+
+extern int
+onigenc_single_byte_is_allowed_reverse_match(UChar* s, UChar* end)
+{
+ return TRUE;
+}
+
+extern OnigCodePoint
+onigenc_mbn_mbc_to_code(OnigEncoding enc, UChar* p, UChar* end)
+{
+ int c, i, len;
+ OnigCodePoint n;
+
+ c = *p++;
+ len = enc_len(enc, c);
+ n = c;
+ if (len == 1) return n;
+
+ for (i = 1; i < len; i++) {
+ if (p >= end) break;
+ c = *p++;
+ n <<= 8; n += c;
+ }
+ return n;
+}
+
+extern int
+onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower)
+{
+ int len;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ return 1;
+ }
+ else {
+ len = enc_len(enc, *p);
+ if (lower != p) {
+ /* memcpy(lower, p, len); */
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ return len; /* return byte length of converted to lower char */
+ }
+}
+
+extern int
+onigenc_mbn_mbc_is_case_ambig(UChar* p)
+{
+ if (ONIGENC_IS_MBC_ASCII(p))
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+
+ return FALSE;
+}
+
+extern int
+onigenc_mb2_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff00) != 0) return 2;
+ else return 1;
+}
+
+extern int
+onigenc_mb4_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff000000) != 0) return 4;
+ else if ((code & 0xff0000) != 0) return 3;
+ else if ((code & 0xff00) != 0) return 2;
+ else return 1;
+}
+
+extern int
+onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
+{
+ int first;
+
+ if ((code & 0xff00) != 0) {
+ first = (code >> 8) & 0xff;
+ }
+ else {
+ return (int )code;
+ }
+ return first;
+}
+
+extern int
+onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
+{
+ int first;
+
+ if ((code & 0xff000000) != 0) {
+ first = (code >> 24) & 0xff;
+ }
+ else if ((code & 0xff0000) != 0) {
+ first = (code >> 16) & 0xff;
+ }
+ else if ((code & 0xff00) != 0) {
+ first = (code >> 8) & 0xff;
+ }
+ else {
+ return (int )code;
+ }
+ return first;
+}
+
+extern int
+onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff00) != 0) {
+ *p++ = (UChar )((code >> 8) & 0xff);
+ }
+ *p++ = (UChar )(code & 0xff);
+
+#if 1
+ if (enc_len(enc, buf[0]) != (p - buf))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+extern int
+onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff000000) != 0) {
+ *p++ = (UChar )((code >> 24) & 0xff);
+ }
+ if ((code & 0xff0000) != 0) {
+ *p++ = (UChar )((code >> 16) & 0xff);
+ }
+ if ((code & 0xff00) != 0) {
+ *p++ = (UChar )((code >> 8) & 0xff);
+ }
+ *p++ = (UChar )(code & 0xff);
+
+#if 1
+ if (enc_len(enc, buf[0]) != (p - buf))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+extern int
+onigenc_mb2_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
+ unsigned int ctype)
+{
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ int first = onigenc_mb2_code_to_mbc_first(code);
+ return (enc_len(enc, first) > 1 ? TRUE : FALSE);
+ }
+
+ ctype &= ~ONIGENC_CTYPE_WORD;
+ if (ctype == 0) return FALSE;
+ }
+
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+extern int
+onigenc_mb4_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
+ unsigned int ctype)
+{
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ int first = onigenc_mb4_code_to_mbc_first(code);
+ return (enc_len(enc, first) > 1 ? TRUE : FALSE);
+ }
+
+ ctype &= ~ONIGENC_CTYPE_WORD;
+ if (ctype == 0) return FALSE;
+ }
+
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+extern int
+onigenc_get_all_fold_match_code_ss_0xdf(OnigCodePoint** codes)
+{
+ static OnigCodePoint list[] = { 0xdf };
+ *codes = list;
+ return 1;
+}
+
+extern int
+onigenc_get_fold_match_info_ss_0xdf(UChar* p, UChar* end,
+ OnigEncFoldMatchInfo** info)
+{
+ /* German alphabet ess-tsett(U+00DF) */
+ static OnigEncFoldMatchInfo ss = {
+ 3,
+ { 1, 2, 2 },
+ { "\337", "ss", "SS" } /* 0337: 0xdf */
+ };
+
+ if (p >= end) return -1;
+
+ if (*p == 0xdf) {
+ *info = &ss;
+ return 1;
+ }
+ else if (p + 1 < end) {
+ if ((*p == 'S' && *(p+1) == 'S') ||
+ (*p == 's' && *(p+1) == 's')) {
+ *info = &ss;
+ return 2;
+ }
+ }
+
+ return -1; /* is not a fold string. */
+}
+
+#else /* ONIG_RUBY_M17N */
+
+extern int
+onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
+{
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ return m17n_isalpha(enc, code);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ return m17n_iscntrl(enc, code);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ return m17n_isdigit(enc, code);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ return m17n_islower(enc, code);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ return m17n_isprint(enc, code);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ return m17n_ispunct(enc, code);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ return m17n_isspace(enc, code);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ return m17n_isupper(enc, code);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ return m17n_isxdigit(enc, code);
+ break;
+ case ONIGENC_CTYPE_WORD:
+ return m17n_iswchar(enc, code);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ return (code < 128 ? TRUE : FALSE);
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ return m17n_isalnum(enc, code);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+extern int
+onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
+{
+ int c, len;
+
+ m17n_mbcput(enc, code, buf);
+ c = m17n_firstbyte(enc, code);
+ len = enc_len(enc, c);
+ return len;
+}
+
+extern int
+onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
+{
+ unsigned int c, low;
+
+ c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
+ low = m17n_tolower(enc, c);
+ m17n_mbcput(enc, low, buf);
+
+ return m17n_codelen(enc, low);
+}
+
+extern int
+onigenc_mbc_is_case_ambig(OnigEncoding enc, UChar* p)
+{
+ unsigned int c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
+
+ if (m17n_isupper(enc, c) || m17n_islower(enc, c))
+ return TRUE;
+ return FALSE;
+}
+
+extern UChar*
+onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
+{
+ UChar *p;
+ int len;
+
+ if (s <= start) return s;
+ p = s;
+
+ while (!m17n_islead(enc, *p) && p > start) p--;
+ while (p + (len = enc_len(enc, *p)) < s) {
+ p += len;
+ }
+ if (p + len == s) return s;
+ return p;
+}
+
+extern int
+onigenc_is_allowed_reverse_match(OnigEncoding enc, UChar* s, UChar* end)
+{
+ return ONIGENC_IS_SINGLEBYTE(enc);
+}
+
+extern void
+onigenc_set_default_caseconv_table(UChar* table) { }
+
+#endif /* ONIG_RUBY_M17N */
diff --git a/regenc.h b/regenc.h
new file mode 100644
index 000000000..e0c6211d3
--- /dev/null
+++ b/regenc.h
@@ -0,0 +1,96 @@
+/**********************************************************************
+
+ regenc.h - Oniguruma (regular expression library)
+
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#ifndef REGENC_H
+#define REGENC_H
+
+#ifndef RUBY_PLATFORM
+#include "config.h"
+#endif
+#include "oniguruma.h"
+
+#ifndef NULL
+#define NULL ((void* )0)
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* error codes */
+/* internal error */
+#define ONIGERR_MEMORY -5
+#define ONIGERR_TYPE_BUG -6
+/* syntax error [-400, -999] */
+#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
+#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
+
+#define ONIG_NEWLINE '\n'
+#define ONIG_IS_NEWLINE(c) ((c) == ONIG_NEWLINE)
+#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
+#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
+#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
+#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
+
+
+#ifdef ONIG_RUBY_M17N
+
+#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UNDEF
+
+#else /* ONIG_RUBY_M17N */
+
+#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
+
+/* for encoding system implementation (internal) */
+ONIG_EXTERN int onigenc_nothing_get_all_fold_match_code P_((OnigCodePoint** codes));
+ONIG_EXTERN int onigenc_nothing_get_fold_match_info P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
+ONIG_EXTERN int onigenc_nothing_get_ctype_code_range P_((int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]));
+
+/* methods for single byte encoding */
+ONIG_EXTERN int onigenc_ascii_mbc_to_lower P_((UChar* p, UChar* lower));
+ONIG_EXTERN int onigenc_ascii_mbc_is_case_ambig P_((UChar* p));
+ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((UChar* p, UChar* end));
+ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
+ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((UChar* start, UChar* s));
+ONIG_EXTERN int onigenc_single_byte_is_allowed_reverse_match P_((UChar* s, UChar* end));
+
+/* methods for multi byte encoding */
+ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, UChar* p, UChar* end));
+ONIG_EXTERN int onigenc_mbn_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* lower));
+ONIG_EXTERN int onigenc_mbn_mbc_is_case_ambig P_((UChar* p));
+ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+ONIG_EXTERN int onigenc_mb2_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
+ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+ONIG_EXTERN int onigenc_mb4_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
+
+ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
+ONIG_EXTERN int onigenc_get_fold_match_info_ss_0xdf P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
+
+#endif /* is not ONIG_RUBY_M17N */
+
+
+ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
+ONIG_EXTERN UChar* OnigEncAsciiToLowerCaseTable;
+ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
+
+#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
+#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
+ ((OnigEncAsciiCtypeTable[code] & ctype) != 0)
+#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
+ ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER))
+
+#endif /* REGENC_H */
diff --git a/regerror.c b/regerror.c
new file mode 100644
index 000000000..5a6c31b82
--- /dev/null
+++ b/regerror.c
@@ -0,0 +1,291 @@
+/**********************************************************************
+
+ regerror.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regint.h"
+#include <stdio.h> /* for vsnprintf() */
+
+#ifdef HAVE_STDARG_PROTOTYPES
+#include <stdarg.h>
+#define va_init_list(a,b) va_start(a,b)
+#else
+#include <varargs.h>
+#define va_init_list(a,b) va_start(a)
+#endif
+
+extern char*
+onig_error_code_to_format(int code)
+{
+ char *p;
+
+ if (code >= 0) return (char* )0;
+
+ switch (code) {
+ case ONIG_MISMATCH:
+ p = "mismatch"; break;
+ case ONIG_NO_SUPPORT_CONFIG:
+ p = "no support in this configuration"; break;
+ case ONIGERR_MEMORY:
+ p = "fail to memory allocation"; break;
+ case ONIGERR_MATCH_STACK_LIMIT_OVER:
+ p = "match-stack limit over"; break;
+ case ONIGERR_TYPE_BUG:
+ p = "undefined type (bug)"; break;
+ case ONIGERR_PARSER_BUG:
+ p = "internal parser error (bug)"; break;
+ case ONIGERR_STACK_BUG:
+ p = "stack error (bug)"; break;
+ case ONIGERR_UNDEFINED_BYTECODE:
+ p = "undefined bytecode (bug)"; break;
+ case ONIGERR_UNEXPECTED_BYTECODE:
+ p = "unexpected bytecode (bug)"; break;
+ case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
+ p = "default multibyte-encoding is not setted"; break;
+ case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
+ p = "can't convert to wide-char on specified multibyte-encoding"; break;
+ case ONIGERR_INVALID_ARGUMENT:
+ p = "invalid argument"; break;
+ case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
+ p = "end pattern at left brace"; break;
+ case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
+ p = "end pattern at left bracket"; break;
+ case ONIGERR_EMPTY_CHAR_CLASS:
+ p = "empty char-class"; break;
+ case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
+ p = "premature end of char-class"; break;
+ case ONIGERR_END_PATTERN_AT_BACKSLASH:
+ p = "end pattern at backslash"; break;
+ case ONIGERR_END_PATTERN_AT_META:
+ p = "end pattern at meta"; break;
+ case ONIGERR_END_PATTERN_AT_CONTROL:
+ p = "end pattern at control"; break;
+ case ONIGERR_META_CODE_SYNTAX:
+ p = "illegal meta-code syntax"; break;
+ case ONIGERR_CONTROL_CODE_SYNTAX:
+ p = "illegal control-code syntax"; break;
+ case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
+ p = "char-class value at end of range"; break;
+ case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
+ p = "char-class value at start of range"; break;
+ case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
+ p = "unmatched range specifier in char-class"; break;
+ case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
+ p = "target of repeat operator is not specified"; break;
+ case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
+ p = "target of repeat operator is invalid"; break;
+ case ONIGERR_NESTED_REPEAT_OPERATOR:
+ p = "nested repeat operator"; break;
+ case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
+ p = "unmatched close parenthesis"; break;
+ case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
+ p = "end pattern with unmatched parenthesis"; break;
+ case ONIGERR_END_PATTERN_IN_GROUP:
+ p = "end pattern in group"; break;
+ case ONIGERR_UNDEFINED_GROUP_OPTION:
+ p = "undefined group option"; break;
+ case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
+ p = "invalid POSIX bracket type"; break;
+ case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
+ p = "invalid pattern in look-behind"; break;
+ case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
+ p = "invalid repeat range {lower,upper}"; break;
+ case ONIGERR_TOO_BIG_NUMBER:
+ p = "too big number"; break;
+ case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
+ p = "too big number for repeat range"; break;
+ case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
+ p = "upper is smaller than lower in repeat range"; break;
+ case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
+ p = "empty range in char class"; break;
+ case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
+ p = "mismatch multibyte code length in char-class range"; break;
+ case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
+ p = "too many multibyte code ranges are specified"; break;
+ case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
+ p = "too short multibyte code string"; break;
+ case ONIGERR_TOO_BIG_BACKREF_NUMBER:
+ p = "too big backref number"; break;
+ case ONIGERR_INVALID_BACKREF:
+#ifdef USE_NAMED_GROUP
+ p = "invalid backref number/name"; break;
+#else
+ p = "invalid backref number"; break;
+#endif
+ case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
+ p = "numbered backref/call is not allowed. (use name)"; break;
+ case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
+ p = "too big wide-char value"; break;
+ case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
+ p = "too long wide-char value"; break;
+ case ONIGERR_INVALID_WIDE_CHAR_VALUE:
+ p = "invalid wide-char value"; break;
+ case ONIGERR_EMPTY_GROUP_NAME:
+ p = "group name is empty"; break;
+ case ONIGERR_INVALID_GROUP_NAME:
+ p = "invalid group name <%n>"; break;
+ case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+#ifdef USE_NAMED_GROUP
+ p = "invalid char in group name <%n>"; break;
+#else
+ p = "invalid char in group number <%n>"; break;
+#endif
+ case ONIGERR_UNDEFINED_NAME_REFERENCE:
+ p = "undefined name <%n> reference"; break;
+ case ONIGERR_UNDEFINED_GROUP_REFERENCE:
+ p = "undefined group <%n> reference"; break;
+ case ONIGERR_MULTIPLEX_DEFINED_NAME:
+ p = "multiplex defined name <%n>"; break;
+ case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
+ p = "multiplex definition name <%n> call"; break;
+ case ONIGERR_NEVER_ENDING_RECURSION:
+ p = "never ending recursion"; break;
+ case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
+ p = "group number is too big for capture history"; break;
+ case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
+ p = "invalid character property name"; break;
+ case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
+ p = "over thread pass limit count"; break;
+
+ default:
+ p = "undefined error code"; break;
+ }
+
+ return p;
+}
+
+
+/* for ONIG_MAX_ERROR_MESSAGE_LEN */
+#define MAX_ERROR_PAR_LEN 30
+
+extern int
+#ifdef HAVE_STDARG_PROTOTYPES
+onig_error_code_to_str(UChar* s, int code, ...)
+#else
+onig_error_code_to_str(s, code, va_alist)
+ UChar* s;
+ int code;
+ va_dcl
+#endif
+{
+ UChar *p, *q;
+ OnigErrorInfo* einfo;
+ int len;
+ va_list vargs;
+
+ va_init_list(vargs, code);
+
+ switch (code) {
+ case ONIGERR_UNDEFINED_NAME_REFERENCE:
+ case ONIGERR_UNDEFINED_GROUP_REFERENCE:
+ case ONIGERR_MULTIPLEX_DEFINED_NAME:
+ case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
+ case ONIGERR_INVALID_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+ einfo = va_arg(vargs, OnigErrorInfo*);
+ len = einfo->par_end - einfo->par;
+ q = onig_error_code_to_format(code);
+ p = s;
+ while (*q != '\0') {
+ if (*q == '%') {
+ q++;
+ if (*q == 'n') { /* '%n': name */
+ if (len > MAX_ERROR_PAR_LEN) {
+ xmemcpy(p, einfo->par, MAX_ERROR_PAR_LEN - 3);
+ p += (MAX_ERROR_PAR_LEN - 3);
+ xmemcpy(p, "...", 3);
+ p += 3;
+ }
+ else {
+ xmemcpy(p, einfo->par, len);
+ p += len;
+ }
+ q++;
+ }
+ else
+ goto normal_char;
+ }
+ else {
+ normal_char:
+ *p++ = *q++;
+ }
+ }
+ *p = '\0';
+ len = p - s;
+ break;
+
+ default:
+ q = onig_error_code_to_format(code);
+ len = strlen(q);
+ xmemcpy(s, q, len);
+ s[len] = '\0';
+ break;
+ }
+
+ va_end(vargs);
+ return len;
+}
+
+
+void
+#ifdef HAVE_STDARG_PROTOTYPES
+onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
+ char* pat, char* pat_end, char *fmt, ...)
+#else
+onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
+ char buf[];
+ int bufsize;
+ OnigEncoding enc;
+ char* pat;
+ char* pat_end;
+ const char *fmt;
+ va_dcl
+#endif
+{
+ int n, need, len;
+ UChar *p, *s;
+ va_list args;
+
+ va_init_list(args, fmt);
+ n = vsnprintf(buf, bufsize, fmt, args);
+ va_end(args);
+
+ need = (pat_end - pat) * 4 + 4;
+
+ if (n + need < bufsize) {
+ strcat(buf, ": /");
+ s = buf + strlen(buf);
+
+ p = pat;
+ while (p < (UChar* )pat_end) {
+ if (*p == MC_ESC) {
+ *s++ = *p++;
+ len = enc_len(enc, *p);
+ while (len-- > 0) *s++ = *p++;
+ }
+ else if (*p == '/') {
+ *s++ = MC_ESC;
+ *s++ = *p++;
+ }
+ else if (ONIGENC_IS_MBC_HEAD(enc, *p)) {
+ len = enc_len(enc, *p);
+ while (len-- > 0) *s++ = *p++;
+ }
+ else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
+ !ONIGENC_IS_CODE_SPACE(enc, *p)) {
+ char b[5];
+ sprintf(b, "\\%03o", *p & 0377);
+ len = strlen(b);
+ while (len-- > 0) *s++ = *p++;
+ }
+ else {
+ *s++ = *p++;
+ }
+ }
+
+ *s++ = '/';
+ *s = '\0';
+ }
+}
diff --git a/regexec.c b/regexec.c
new file mode 100644
index 000000000..2ded602e1
--- /dev/null
+++ b/regexec.c
@@ -0,0 +1,3299 @@
+/**********************************************************************
+
+ regexec.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regint.h"
+
+static void
+region_list_clear(OnigRegion** list)
+{
+ int i;
+
+ if (IS_NOT_NULL(list)) {
+ for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (IS_NOT_NULL(list[i])) {
+ xfree(list[i]);
+ list[i] = (OnigRegion* )0;
+ }
+ }
+ }
+}
+
+static void
+region_list_free(OnigRegion* r)
+{
+ if (IS_NOT_NULL(r->list)) {
+ region_list_clear(r->list);
+ xfree(r->list);
+ r->list = (OnigRegion** )0;
+ }
+}
+
+static OnigRegion**
+region_list_new()
+{
+ int i;
+ OnigRegion** list;
+
+ list = (OnigRegion** )xmalloc(sizeof(OnigRegion*)
+ * (ONIG_MAX_CAPTURE_HISTORY_GROUP + 1));
+ CHECK_NULL_RETURN(list);
+ for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ list[i] = (OnigRegion* )0;
+ }
+
+ return list;
+}
+
+extern void
+onig_region_clear(OnigRegion* region)
+{
+ int i;
+
+ for (i = 0; i < region->num_regs; i++) {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
+ }
+ region_list_clear(region->list);
+}
+
+extern int
+onig_region_resize(OnigRegion* region, int n)
+{
+ int i;
+
+ region->num_regs = n;
+
+ if (n < ONIG_NREGION)
+ n = ONIG_NREGION;
+
+ if (region->allocated == 0) {
+ region->beg = (int* )xmalloc(n * sizeof(int));
+ region->end = (int* )xmalloc(n * sizeof(int));
+
+ if (region->beg == 0 || region->end == 0)
+ return ONIGERR_MEMORY;
+
+ region->allocated = n;
+ }
+ else if (region->allocated < n) {
+ region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
+ region->end = (int* )xrealloc(region->end, n * sizeof(int));
+
+ if (region->beg == 0 || region->end == 0)
+ return ONIGERR_MEMORY;
+
+ region->allocated = n;
+ }
+
+ for (i = 0; i < region->num_regs; i++) {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
+ }
+
+ if (IS_NOT_NULL(region->list))
+ region_list_clear(region->list);
+
+ return 0;
+}
+
+static int
+region_ensure_size(OnigRegion* region, int n)
+{
+ int i, new_size;
+
+ if (region->allocated >= n)
+ return 0;
+
+ new_size = region->allocated;
+ if (new_size == 0)
+ new_size = ONIG_NREGION;
+ while (new_size < n)
+ new_size *= 2;
+
+ if (region->allocated == 0) {
+ region->beg = (int* )xmalloc(new_size * sizeof(int));
+ region->end = (int* )xmalloc(new_size * sizeof(int));
+ if (region->beg == 0 || region->end == 0)
+ return ONIGERR_MEMORY;
+
+ region->allocated = new_size;
+ }
+ else if (region->allocated < new_size) {
+ region->beg = (int* )xrealloc(region->beg, new_size * sizeof(int));
+ region->end = (int* )xrealloc(region->end, new_size * sizeof(int));
+ if (region->beg == 0 || region->end == 0)
+ return ONIGERR_MEMORY;
+
+ region->allocated = new_size;
+ }
+
+ for (i = region->num_regs; i < n; i++) {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
+ }
+ return 0;
+}
+
+static int
+region_list_add_entry(OnigRegion* region, int group, int start, int end)
+{
+ int r, pos;
+ OnigRegion** list;
+
+ if (group > ONIG_MAX_CAPTURE_HISTORY_GROUP)
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+
+ if (IS_NULL(region->list)) {
+ region->list = region_list_new();
+ CHECK_NULL_RETURN_VAL(region->list, ONIGERR_MEMORY);
+ }
+
+ list = region->list;
+ if (IS_NULL(list[group])) {
+ list[group] = onig_region_new();
+ CHECK_NULL_RETURN_VAL(list[group], ONIGERR_MEMORY);
+ }
+
+ r = region_ensure_size(list[group], list[group]->num_regs + 1);
+ if (r != 0) return r;
+
+ pos = list[group]->num_regs;
+ list[group]->beg[pos] = start;
+ list[group]->end[pos] = end;
+ list[group]->num_regs++;
+
+ return 0;
+}
+
+static void
+onig_region_init(OnigRegion* region)
+{
+ region->num_regs = 0;
+ region->allocated = 0;
+ region->beg = (int* )0;
+ region->end = (int* )0;
+ region->list = (OnigRegion** )0;
+}
+
+extern OnigRegion*
+onig_region_new()
+{
+ OnigRegion* r;
+
+ r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
+ onig_region_init(r);
+ return r;
+}
+
+extern void
+onig_region_free(OnigRegion* r, int free_self)
+{
+ if (r) {
+ if (r->allocated > 0) {
+ if (r->beg) xfree(r->beg);
+ if (r->end) xfree(r->end);
+ r->allocated = 0;
+ }
+ region_list_free(r);
+ if (free_self) xfree(r);
+ }
+}
+
+extern void
+onig_region_copy(OnigRegion* to, OnigRegion* from)
+{
+#define RREGC_SIZE (sizeof(int) * from->num_regs)
+ int i;
+
+ if (to == from) return;
+
+ if (to->allocated == 0) {
+ if (from->num_regs > 0) {
+ to->beg = (int* )xmalloc(RREGC_SIZE);
+ to->end = (int* )xmalloc(RREGC_SIZE);
+ to->allocated = from->num_regs;
+ }
+ }
+ else if (to->allocated < from->num_regs) {
+ to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
+ to->end = (int* )xrealloc(to->end, RREGC_SIZE);
+ to->allocated = from->num_regs;
+ }
+
+ for (i = 0; i < from->num_regs; i++) {
+ to->beg[i] = from->beg[i];
+ to->end[i] = from->end[i];
+ }
+ to->num_regs = from->num_regs;
+
+ if (IS_NOT_NULL(from->list)) {
+ if (IS_NULL(to->list)) {
+ to->list = region_list_new();
+ }
+
+ for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (IS_NOT_NULL(from->list[i])) {
+ if (IS_NULL(to->list[i]))
+ to->list[i] = onig_region_new();
+
+ onig_region_copy(to->list[i], from->list[i]);
+ }
+ else {
+ if (IS_NOT_NULL(to->list[i])) {
+ xfree(to->list[i]);
+ to->list[i] = (OnigRegion* )0;
+ }
+ }
+ }
+ }
+ else
+ region_list_free(to);
+}
+
+
+/** stack **/
+#define INVALID_STACK_INDEX -1
+typedef int StackIndex;
+
+typedef struct _StackType {
+ unsigned int type;
+ union {
+ struct {
+ UChar *pcode; /* byte code position */
+ UChar *pstr; /* string position */
+ UChar *pstr_prev; /* previous char position of pstr */
+ } state;
+ struct {
+ int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
+ UChar *pcode; /* byte code position (head of repeated target) */
+ int num; /* repeat id */
+ } repeat;
+ struct {
+ StackIndex si; /* index of stack */
+ } repeat_inc;
+ struct {
+ int num; /* memory num */
+ UChar *pstr; /* start/end position */
+ /* Following information is setted, if this stack type is MEM-START */
+ StackIndex start; /* prev. info (for backtrack "(...)*" ) */
+ StackIndex end; /* prev. info (for backtrack "(...)*" ) */
+ } mem;
+ struct {
+ int num; /* null check id */
+ UChar *pstr; /* start position */
+ } null_check;
+#ifdef USE_SUBEXP_CALL
+ struct {
+ UChar *ret_addr; /* byte code position */
+ int num; /* null check id */
+ UChar *pstr; /* string position */
+ } call_frame;
+#endif
+ } u;
+} StackType;
+
+/* stack type */
+/* used by normal-POP */
+#define STK_ALT 0x0001
+#define STK_LOOK_BEHIND_NOT 0x0003
+#define STK_POS_NOT 0x0005
+/* avoided by normal-POP, but value should be small */
+#define STK_NULL_CHECK_START 0x0100
+/* handled by normal-POP */
+#define STK_MEM_START 0x0200
+#define STK_MEM_END 0x0300
+#define STK_REPEAT_INC 0x0400
+/* avoided by normal-POP */
+#define STK_POS 0x0500 /* used when POP-POS */
+#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
+#define STK_REPEAT 0x0700
+#define STK_CALL_FRAME 0x0800
+#define STK_RETURN 0x0900
+#define STK_MEM_END_MARK 0x0a00
+#define STK_VOID 0x0b00 /* for fill a blank */
+#define STK_NULL_CHECK_END 0x0c00 /* for recursive call */
+
+/* stack type check mask */
+#define STK_MASK_POP_USED 0x00ff
+#define IS_TO_VOID_TARGET(stk) \
+ (((stk)->type & STK_MASK_POP_USED) || (stk)->type == STK_NULL_CHECK_START)
+
+typedef struct {
+ void* stack_p;
+ int stack_n;
+ OnigOptionType options;
+ OnigRegion* region;
+ UChar* start; /* search start position (for \G: BEGIN_POSITION) */
+} MatchArg;
+
+#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
+ (msa).stack_p = (void* )0;\
+ (msa).options = (arg_option);\
+ (msa).region = (arg_region);\
+ (msa).start = (arg_start);\
+} while (0)
+
+#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
+
+
+#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
+ if (msa->stack_p) {\
+ alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
+ stk_alloc = (StackType* )(msa->stack_p);\
+ stk_base = stk_alloc;\
+ stk = stk_base;\
+ stk_end = stk_base + msa->stack_n;\
+ }\
+ else {\
+ alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
+ + sizeof(StackType) * (stack_num));\
+ stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
+ stk_base = stk_alloc;\
+ stk = stk_base;\
+ stk_end = stk_base + (stack_num);\
+ }\
+} while(0)
+
+#define STACK_SAVE do{\
+ if (stk_base != stk_alloc) {\
+ msa->stack_p = stk_base;\
+ msa->stack_n = stk_end - stk_base;\
+ };\
+} while(0)
+
+static int
+stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
+ StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
+{
+ int n;
+ StackType *x, *stk_base, *stk_end, *stk;
+
+ stk_base = *arg_stk_base;
+ stk_end = *arg_stk_end;
+ stk = *arg_stk;
+
+ n = stk_end - stk_base;
+ if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
+ x = (StackType* )xmalloc(sizeof(StackType) * n * 2);
+ if (IS_NULL(x)) {
+ STACK_SAVE;
+ return ONIGERR_MEMORY;
+ }
+ xmemcpy(x, stk_base, n * sizeof(StackType));
+ n *= 2;
+ }
+ else {
+ n *= 2;
+ if (n > MATCH_STACK_LIMIT_SIZE) return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
+ if (IS_NULL(x)) {
+ STACK_SAVE;
+ return ONIGERR_MEMORY;
+ }
+ }
+ *arg_stk = x + (stk - stk_base);
+ *arg_stk_base = x;
+ *arg_stk_end = x + n;
+ return 0;
+}
+
+#define STACK_ENSURE(n) do {\
+ if (stk_end - stk < (n)) {\
+ int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
+ if (r != 0) { STACK_SAVE; return r; } \
+ }\
+} while(0)
+
+#define STACK_AT(index) (stk_base + (index))
+#define GET_STACK_INDEX(stk) ((stk) - stk_base)
+
+#define STACK_PUSH(stack_type,pat,s,sprev) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = (s);\
+ stk->u.state.pstr_prev = (sprev);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_ENSURED(stack_type,pat) do {\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_TYPE(stack_type) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
+#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
+#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
+#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
+#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
+ STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
+
+#define STACK_PUSH_REPEAT(id, pat) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_REPEAT;\
+ stk->u.repeat.num = (id);\
+ stk->u.repeat.pcode = (pat);\
+ stk->u.repeat.count = 0;\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_REPEAT_INC(sindex) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_REPEAT_INC;\
+ stk->u.repeat_inc.si = (sindex);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_MEM_START(mnum, s) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_MEM_START;\
+ stk->u.mem.num = (mnum);\
+ stk->u.mem.pstr = (s);\
+ stk->u.mem.start = mem_start_stk[mnum];\
+ stk->u.mem.end = mem_end_stk[mnum];\
+ mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
+ mem_end_stk[mnum] = INVALID_STACK_INDEX;\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_MEM_END(mnum, s) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_MEM_END;\
+ stk->u.mem.num = (mnum);\
+ stk->u.mem.pstr = (s);\
+ stk->u.mem.start = mem_start_stk[mnum];\
+ stk->u.mem.end = mem_end_stk[mnum];\
+ mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_MEM_END_MARK(mnum) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_MEM_END_MARK;\
+ stk->u.mem.num = (mnum);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_GET_MEM_START(mnum, k) do {\
+ int level = 0;\
+ k = stk;\
+ while (k > stk_base) {\
+ k--;\
+ if ((k->type == STK_MEM_END_MARK || k->type == STK_MEM_END) \
+ && k->u.mem.num == (mnum)) {\
+ level++;\
+ }\
+ else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
+ if (level == 0) break;\
+ level--;\
+ }\
+ }\
+} while (0)
+
+#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
+ int level = 0;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
+ if (level == 0) (start) = k->u.mem.pstr;\
+ level++;\
+ }\
+ else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
+ level--;\
+ if (level == 0) {\
+ (end) = k->u.mem.pstr;\
+ break;\
+ }\
+ }\
+ k++;\
+ }\
+} while (0)
+
+#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_NULL_CHECK_START;\
+ stk->u.null_check.num = (cnum);\
+ stk->u.null_check.pstr = (s);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_NULL_CHECK_END;\
+ stk->u.null_check.num = (cnum);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_CALL_FRAME(pat) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_CALL_FRAME;\
+ stk->u.call_frame.ret_addr = (pat);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_RETURN do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_RETURN;\
+ STACK_INC;\
+} while(0)
+
+
+#ifdef ONIG_DEBUG
+#define STACK_BASE_CHECK(p) \
+ if ((p) < stk_base) goto stack_error;
+#else
+#define STACK_BASE_CHECK(p)
+#endif
+
+#define STACK_POP_ONE do {\
+ stk--;\
+ STACK_BASE_CHECK(stk); \
+} while(0)
+
+#define STACK_POP do {\
+ switch (pop_level) {\
+ case STACK_POP_LEVEL_FREE:\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk); \
+ if ((stk->type & STK_MASK_POP_USED) != 0) break;\
+ }\
+ break;\
+ case STACK_POP_LEVEL_MEM_START:\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk); \
+ if ((stk->type & STK_MASK_POP_USED) != 0) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ }\
+ break;\
+ default:\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk); \
+ if ((stk->type & STK_MASK_POP_USED) != 0) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ }\
+ break;\
+ }\
+} while(0)
+
+#define STACK_POP_TIL_POS_NOT do {\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk); \
+ if (stk->type == STK_POS_NOT) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ }\
+} while(0)
+
+#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk); \
+ if (stk->type == STK_LOOK_BEHIND_NOT) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ }\
+} while(0)
+
+#define STACK_POS_END(k) do {\
+ k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k); \
+ if (IS_TO_VOID_TARGET(k)) {\
+ k->type = STK_VOID;\
+ }\
+ else if (k->type == STK_POS) {\
+ k->type = STK_VOID;\
+ break;\
+ }\
+ }\
+} while(0)
+
+#define STACK_STOP_BT_END do {\
+ StackType *k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k); \
+ if (IS_TO_VOID_TARGET(k)) {\
+ k->type = STK_VOID;\
+ }\
+ else if (k->type == STK_STOP_BT) {\
+ k->type = STK_VOID;\
+ break;\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK(isnull,id,s) do {\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ (isnull) = (k->u.null_check.pstr == (s));\
+ break;\
+ }\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
+ int level = 0;\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (level == 0) {\
+ (isnull) = (k->u.null_check.pstr == (s));\
+ break;\
+ }\
+ else level--;\
+ }\
+ }\
+ else if (k->type == STK_NULL_CHECK_END) {\
+ level++;\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (k->u.null_check.pstr != (s)) {\
+ (isnull) = 0;\
+ break;\
+ }\
+ else {\
+ UChar* endp;\
+ (isnull) = 1;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START) {\
+ if (k->u.mem.end == INVALID_STACK_INDEX) {\
+ (isnull) = 0; break;\
+ }\
+ if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
+ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
+ else\
+ endp = (UChar* )k->u.mem.end;\
+ if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
+ (isnull) = 0; break;\
+ }\
+ else if (endp != s) {\
+ (isnull) = -1; /* empty, but position changed */ \
+ }\
+ }\
+ k++;\
+ }\
+ break;\
+ }\
+ }\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
+ int level = 0;\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (level == 0) {\
+ if (k->u.null_check.pstr != (s)) {\
+ (isnull) = 0;\
+ break;\
+ }\
+ else {\
+ UChar* endp;\
+ (isnull) = 1;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START) {\
+ if (k->u.mem.end == INVALID_STACK_INDEX) {\
+ (isnull) = 0; break;\
+ }\
+ if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
+ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
+ else\
+ endp = (UChar* )k->u.mem.end;\
+ if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
+ (isnull) = 0; break;\
+ }\
+ else if (endp != s) {\
+ (isnull) = -1; /* empty, but position changed */ \
+ }\
+ }\
+ k++;\
+ }\
+ break;\
+ }\
+ }\
+ else {\
+ level--;\
+ }\
+ }\
+ }\
+ else if (k->type == STK_NULL_CHECK_END) {\
+ if (k->u.null_check.num == (id)) level++;\
+ }\
+ }\
+} while(0)
+
+#define STACK_GET_REPEAT(id, k) do {\
+ int level = 0;\
+ k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k); \
+ if (k->type == STK_REPEAT) {\
+ if (level == 0) {\
+ if (k->u.repeat.num == (id)) {\
+ break;\
+ }\
+ }\
+ }\
+ else if (k->type == STK_CALL_FRAME) level--;\
+ else if (k->type == STK_RETURN) level++;\
+ }\
+} while (0)
+
+#define STACK_RETURN(addr) do {\
+ int level = 0;\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k); \
+ if (k->type == STK_CALL_FRAME) {\
+ if (level == 0) {\
+ (addr) = k->u.call_frame.ret_addr;\
+ break;\
+ }\
+ else level--;\
+ }\
+ else if (k->type == STK_RETURN)\
+ level++;\
+ }\
+} while(0)
+
+
+#define STRING_CMP(s1,s2,len) do {\
+ while (len-- > 0) {\
+ if (*s1++ != *s2++) goto fail;\
+ }\
+} while(0)
+
+#define STRING_CMP_IC(s1,ps2,len) do {\
+ if (string_cmp_ic(encode, s1, ps2, len) == 0) \
+ goto fail; \
+} while(0)
+
+static int string_cmp_ic(OnigEncoding enc,
+ UChar* s1, UChar** ps2, int mblen)
+{
+ UChar buf1[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar buf2[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar *p1, *p2, *end, *s2;
+ int len1, len2;
+
+ s2 = *ps2;
+ end = s1 + mblen;
+ while (s1 < end) {
+ len1 = ONIGENC_MBC_TO_LOWER(enc, s1, buf1);
+ len2 = ONIGENC_MBC_TO_LOWER(enc, s2, buf2);
+ if (len1 != len2) return 0;
+ p1 = buf1;
+ p2 = buf2;
+ while (len1-- > 0) {
+ if (*p1 != *p2) return 0;
+ p1++;
+ p2++;
+ }
+
+ s1 += enc_len(enc, *s1);
+ s2 += enc_len(enc, *s2);
+ }
+
+ *ps2 = s2;
+ return 1;
+}
+
+#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
+ is_fail = 0;\
+ while (len-- > 0) {\
+ if (*s1++ != *s2++) {\
+ is_fail = 1; break;\
+ }\
+ }\
+} while(0)
+
+#define STRING_CMP_VALUE_IC(s1,ps2,len,is_fail) do {\
+ if (string_cmp_ic(encode, s1, ps2, len) == 0) \
+ is_fail = 1; \
+ else \
+ is_fail = 0; \
+} while(0)
+
+#define ON_STR_BEGIN(s) ((s) == str)
+#define ON_STR_END(s) ((s) == end)
+#define IS_EMPTY_STR (str == end)
+
+#define DATA_ENSURE(n) \
+ if (s + (n) > end) goto fail
+
+#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
+
+#ifdef ONIG_DEBUG_STATISTICS
+
+#define USE_TIMEOFDAY
+
+#ifdef USE_TIMEOFDAY
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+static struct timeval ts, te;
+#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
+#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
+ (((te).tv_sec - (ts).tv_sec)*1000000))
+#else
+#ifdef HAVE_SYS_TIMES_H
+#include <sys/times.h>
+#endif
+static struct tms ts, te;
+#define GETTIME(t) times(&(t))
+#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
+#endif
+
+static int OpCounter[256];
+static int OpPrevCounter[256];
+static unsigned long OpTime[256];
+static int OpCurr = OP_FINISH;
+static int OpPrevTarget = OP_FAIL;
+static int MaxStackDepth = 0;
+
+#define STAT_OP_IN(opcode) do {\
+ if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
+ OpCurr = opcode;\
+ OpCounter[opcode]++;\
+ GETTIME(ts);\
+} while (0)
+
+#define STAT_OP_OUT do {\
+ GETTIME(te);\
+ OpTime[OpCurr] += TIMEDIFF(te, ts);\
+} while (0)
+
+#ifdef RUBY_PLATFORM
+/*
+ * :nodoc:
+ */
+static VALUE onig_stat_print()
+{
+ onig_print_statistics(stderr);
+ return Qnil;
+}
+#endif
+
+extern void onig_statistics_init()
+{
+ int i;
+ for (i = 0; i < 256; i++) {
+ OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
+ }
+ MaxStackDepth = 0;
+
+#ifdef RUBY_PLATFORM
+ rb_define_global_function("onig_stat_print", onig_stat_print, 0);
+#endif
+}
+
+extern void
+onig_print_statistics(FILE* f)
+{
+ int i;
+ fprintf(f, " count prev time\n");
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ fprintf(f, "%8d: %8d: %10ld: %s\n",
+ OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
+ }
+ fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
+}
+
+#define STACK_INC do {\
+ stk++;\
+ if (stk - stk_base > MaxStackDepth) \
+ MaxStackDepth = stk - stk_base;\
+} while (0)
+
+#else
+#define STACK_INC stk++
+
+#define STAT_OP_IN(opcode)
+#define STAT_OP_OUT
+#endif
+
+extern int
+onig_is_in_code_range(UChar* p, OnigCodePoint code)
+{
+ OnigCodePoint n, *data;
+ OnigCodePoint low, high, x;
+
+ GET_CODE_POINT(n, p);
+ data = (OnigCodePoint* )p;
+ data++;
+
+ for (low = 0, high = n; low < high; ) {
+ x = (low + high) >> 1;
+ if (code > data[x * 2 + 1])
+ low = x + 1;
+ else
+ high = x;
+ }
+
+ return ((low < n && code >= data[low * 2]) ? 1 : 0);
+}
+
+
+/* matching region of POSIX API */
+typedef int regoff_t;
+
+typedef struct {
+ regoff_t rm_so;
+ regoff_t rm_eo;
+} posix_regmatch_t;
+
+/* match data(str - end) from position (sstart). */
+/* if sstart == str then set sprev to NULL. */
+static int
+match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
+ UChar* sprev, MatchArg* msa)
+{
+ static UChar FinishCode[] = { OP_FINISH };
+
+ int i, n, num_mem, best_len, pop_level;
+ LengthType tlen, tlen2;
+ MemNumType mem;
+ RelAddrType addr;
+ OnigOptionType option = reg->options;
+ OnigEncoding encode = reg->enc;
+ int ignore_case;
+ UChar *s, *q, *sbegin;
+ UChar *p = reg->p;
+ char *alloca_base;
+ StackType *stk_alloc, *stk_base, *stk, *stk_end;
+ StackType *stkp; /* used as any purpose. */
+ StackIndex *repeat_stk;
+ StackIndex *mem_start_stk, *mem_end_stk;
+ n = reg->num_repeat + reg->num_mem * 2;
+
+ STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
+ ignore_case = IS_IGNORECASE(option);
+ pop_level = reg->stack_pop_level;
+ num_mem = reg->num_mem;
+ repeat_stk = (StackIndex* )alloca_base;
+
+ mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);
+ mem_end_stk = mem_start_stk + num_mem;
+ mem_start_stk--; /* for index start from 1,
+ mem_start_stk[1]..mem_start_stk[num_mem] */
+ mem_end_stk--; /* for index start from 1,
+ mem_end_stk[1]..mem_end_stk[num_mem] */
+ for (i = 1; i <= num_mem; i++) {
+ mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
+ }
+
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
+ (int )str, (int )end, (int )sstart, (int )sprev);
+ fprintf(stderr, "size: %d, start offset: %d\n",
+ (int )(end - str), (int )(sstart - str));
+#endif
+
+ STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
+ best_len = ONIG_MISMATCH;
+ s = sstart;
+ while (1) {
+#ifdef ONIG_DEBUG_MATCH
+ {
+ UChar *q, *bp, buf[50];
+ int len;
+ fprintf(stderr, "%4d> \"", (int )(s - str));
+ bp = buf;
+ for (i = 0, q = s; i < 7 && q < end; i++) {
+ len = enc_len(encode, *q);
+ while (len-- > 0) *bp++ = *q++;
+ }
+ if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
+ else { xmemcpy(bp, "\"", 1); bp += 1; }
+ *bp = 0;
+ fputs(buf, stderr);
+ for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
+ onig_print_compiled_byte_code(stderr, p, NULL);
+ fprintf(stderr, "\n");
+ }
+#endif
+
+ sbegin = s;
+ switch (*p++) {
+ case OP_END: STAT_OP_IN(OP_END);
+ n = s - sstart;
+ if (n > best_len) {
+ OnigRegion* region = msa->region;
+ best_len = n;
+ if (region) {
+#ifdef USE_POSIX_REGION_OPTION
+ if (IS_POSIX_REGION(msa->options)) {
+ posix_regmatch_t* rmt = (posix_regmatch_t* )region;
+
+ rmt[0].rm_so = sstart - str;
+ rmt[0].rm_eo = s - str;
+ for (i = 1; i <= num_mem; i++) {
+ if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (BIT_STATUS_AT(reg->bt_mem_start, i))
+ rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
+ else
+ rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
+
+ rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
+ ? STACK_AT(mem_end_stk[i])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[i])) - str;
+ }
+ else {
+ rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
+ }
+ }
+ }
+ else {
+#endif /* USE_POSIX_REGION_OPTION */
+ region->beg[0] = sstart - str;
+ region->end[0] = s - str;
+ for (i = 1; i <= num_mem; i++) {
+ if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (BIT_STATUS_AT(reg->bt_mem_start, i))
+ region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
+ else
+ region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
+
+ region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
+ ? STACK_AT(mem_end_stk[i])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[i])) - str;
+ }
+ else {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
+ }
+ }
+
+ if (reg->capture_history != 0) {
+ UChar *pstart, *pend;
+ for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (BIT_STATUS_AT(reg->capture_history, i) != 0) {
+ stkp = stk_base;
+ do {
+ STACK_GET_MEM_RANGE(stkp, i, pstart, pend);
+ if (stkp < stk) {
+ int r;
+ r = region_list_add_entry(region, i,
+ pstart - str, pend - str);
+ if (r) {
+ STACK_SAVE;
+ return r;
+ }
+ }
+ stkp++;
+ } while (stkp < stk);
+ }
+ }
+ } /* list of captures */
+#ifdef USE_POSIX_REGION_OPTION
+ } /* else IS_POSIX_REGION() */
+#endif
+ } /* if (region) */
+ } /* n > best_len */
+ STAT_OP_OUT;
+
+ if (IS_FIND_CONDITION(option)) {
+ if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
+ best_len = ONIG_MISMATCH;
+ goto fail; /* for retry */
+ }
+ if (IS_FIND_LONGEST(option) && s < end) {
+ goto fail; /* for retry */
+ }
+ }
+ else {
+ /* default behavior: return first-matching result. */
+ goto finish;
+ }
+ break;
+
+ case OP_EXACT1: STAT_OP_IN(OP_EXACT1);
+#if 0
+ DATA_ENSURE(1);
+ if (*p != *s) goto fail;
+ p++; s++;
+#endif
+ if (*p != *s++) goto fail;
+ DATA_ENSURE(0);
+ p++;
+ STAT_OP_OUT;
+ break;
+
+ case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
+ {
+ int len;
+ UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf);
+ DATA_ENSURE(len);
+ q = lowbuf;
+ s += enc_len(encode, *s);
+ while (len-- > 0) {
+ if (*p != *q) goto fail;
+ p++; q++;
+ }
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_EXACT2: STAT_OP_IN(OP_EXACT2);
+ DATA_ENSURE(2);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACT3: STAT_OP_IN(OP_EXACT3);
+ DATA_ENSURE(3);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACT4: STAT_OP_IN(OP_EXACT4);
+ DATA_ENSURE(4);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACT5: STAT_OP_IN(OP_EXACT5);
+ DATA_ENSURE(5);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTN: STAT_OP_IN(OP_EXACTN);
+ GET_LENGTH_INC(tlen, p);
+ DATA_ENSURE(tlen);
+ while (tlen-- > 0) {
+ if (*p++ != *s++) goto fail;
+ }
+ sprev = s - 1;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
+ {
+ int len;
+ UChar *q, *endp, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ GET_LENGTH_INC(tlen, p);
+ endp = p + tlen;
+
+ while (p < endp) {
+ len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf);
+ DATA_ENSURE(len);
+ sprev = s;
+ s += enc_len(encode, *s);
+ q = lowbuf;
+ while (len-- > 0) {
+ if (*p != *q) goto fail;
+ p++; q++;
+ }
+ }
+ }
+
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1);
+ DATA_ENSURE(2);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ STAT_OP_OUT;
+ break;
+
+ case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2);
+ DATA_ENSURE(4);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ sprev = s;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3);
+ DATA_ENSURE(6);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ sprev = s;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N);
+ GET_LENGTH_INC(tlen, p);
+ DATA_ENSURE(tlen * 2);
+ while (tlen-- > 0) {
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ }
+ sprev = s - 2;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N);
+ GET_LENGTH_INC(tlen, p);
+ DATA_ENSURE(tlen * 3);
+ while (tlen-- > 0) {
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ }
+ sprev = s - 3;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN);
+ GET_LENGTH_INC(tlen, p); /* mb-len */
+ GET_LENGTH_INC(tlen2, p); /* string len */
+ tlen2 *= tlen;
+ DATA_ENSURE(tlen2);
+ while (tlen2-- > 0) {
+ if (*p != *s) goto fail;
+ p++; s++;
+ }
+ sprev = s - tlen;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_CCLASS: STAT_OP_IN(OP_CCLASS);
+ DATA_ENSURE(1);
+ if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
+ p += SIZE_BITSET;
+ s += enc_len(encode, *s); /* OP_CCLASS can match mb-code. \D, \S */
+ STAT_OP_OUT;
+ break;
+
+ case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB);
+ if (! ONIGENC_IS_MBC_HEAD(encode, *s)) goto fail;
+
+ cclass_mb:
+ GET_LENGTH_INC(tlen, p);
+ {
+ OnigCodePoint code;
+ UChar *ss;
+ int mb_len = enc_len(encode, *s);
+
+ DATA_ENSURE(mb_len);
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ if (! onig_is_in_code_range(p, code)) goto fail;
+#else
+ q = p;
+ ALIGNMENT_RIGHT(q);
+ if (! onig_is_in_code_range(q, code)) goto fail;
+#endif
+ }
+ p += tlen;
+ STAT_OP_OUT;
+ break;
+
+ case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX);
+ DATA_ENSURE(1);
+ if (ONIGENC_IS_MBC_HEAD(encode, *s)) {
+ p += SIZE_BITSET;
+ goto cclass_mb;
+ }
+ else {
+ if (BITSET_AT(((BitSetRef )p), *s) == 0)
+ goto fail;
+
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ s++;
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT);
+ DATA_ENSURE(1);
+ if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
+ p += SIZE_BITSET;
+ s += enc_len(encode, *s);
+ STAT_OP_OUT;
+ break;
+
+ case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT);
+ if (! ONIGENC_IS_MBC_HEAD(encode, *s)) {
+ DATA_ENSURE(1);
+ s++;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ goto cc_mb_not_success;
+ }
+
+ cclass_mb_not:
+ GET_LENGTH_INC(tlen, p);
+ {
+ OnigCodePoint code;
+ UChar *ss;
+ int mb_len = enc_len(encode, *s);
+
+ if (s + mb_len > end) {
+ s = end;
+ p += tlen;
+ goto cc_mb_not_success;
+ }
+
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ if (onig_is_in_code_range(p, code)) goto fail;
+#else
+ q = p;
+ ALIGNMENT_RIGHT(q);
+ if (onig_is_in_code_range(q, code)) goto fail;
+#endif
+ }
+ p += tlen;
+
+ cc_mb_not_success:
+ STAT_OP_OUT;
+ break;
+
+ case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT);
+ DATA_ENSURE(1);
+ if (ONIGENC_IS_MBC_HEAD(encode, *s)) {
+ p += SIZE_BITSET;
+ goto cclass_mb_not;
+ }
+ else {
+ if (BITSET_AT(((BitSetRef )p), *s) != 0)
+ goto fail;
+
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ s++;
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
+ n = enc_len(encode, *s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ s += n;
+ }
+ else {
+ DATA_ENSURE(1);
+ if (ONIG_IS_NEWLINE(*s)) goto fail;
+ s++;
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML);
+ n = enc_len(encode, *s);
+ DATA_ENSURE(n);
+ s += n;
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR);
+ while (s < end) {
+ STACK_PUSH_ALT(p, s, sprev);
+ n = enc_len(encode, *s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ if (ONIG_IS_NEWLINE(*s)) goto fail;
+ sprev = s;
+ s++;
+ }
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR);
+ while (s < end) {
+ STACK_PUSH_ALT(p, s, sprev);
+ n = enc_len(encode, *s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
+ while (s < end) {
+ if (*p == *s) {
+ STACK_PUSH_ALT(p + 1, s, sprev);
+ }
+ n = enc_len(encode, *s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ if (ONIG_IS_NEWLINE(*s)) goto fail;
+ sprev = s;
+ s++;
+ }
+ }
+ p++;
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ while (s < end) {
+ if (*p == *s) {
+ STACK_PUSH_ALT(p + 1, s, sprev);
+ }
+ n = enc_len(encode, *s);
+ if (n >1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ p++;
+ STAT_OP_OUT;
+ break;
+
+ case OP_WORD: STAT_OP_IN(OP_WORD);
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+
+ s += enc_len(encode, *s);
+ STAT_OP_OUT;
+ break;
+
+ case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD);
+ DATA_ENSURE(1);
+ if (ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+
+ s += enc_len(encode, *s);
+ STAT_OP_OUT;
+ break;
+
+ case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND);
+ if (ON_STR_BEGIN(s)) {
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+ }
+ else if (ON_STR_END(s)) {
+ if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ else {
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ == ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND);
+ if (ON_STR_BEGIN(s)) {
+ if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+ }
+ else if (ON_STR_END(s)) {
+ if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ else {
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ != ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+#ifdef USE_WORD_BEGIN_END
+ case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN);
+ if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) {
+ if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
+ STAT_OP_OUT;
+ continue;
+ }
+ }
+ goto fail;
+ break;
+
+ case OP_WORD_END: STAT_OP_IN(OP_WORD_END);
+ if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
+ if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
+ STAT_OP_OUT;
+ continue;
+ }
+ }
+ goto fail;
+ break;
+#endif
+
+ case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF);
+ if (! ON_STR_BEGIN(s)) goto fail;
+
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_END_BUF: STAT_OP_IN(OP_END_BUF);
+ if (! ON_STR_END(s)) goto fail;
+
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE);
+ if (ON_STR_BEGIN(s)) {
+ if (IS_NOTBOL(msa->options)) goto fail;
+ STAT_OP_OUT;
+ continue;
+ }
+ else if (ONIG_IS_NEWLINE(*sprev) && !ON_STR_END(s)) {
+ STAT_OP_OUT;
+ continue;
+ }
+ goto fail;
+ break;
+
+ case OP_END_LINE: STAT_OP_IN(OP_END_LINE);
+ if (ON_STR_END(s)) {
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) {
+#endif
+ if (IS_NOTEOL(msa->options)) goto fail;
+ STAT_OP_OUT;
+ continue;
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ }
+#endif
+ }
+ else if (ONIG_IS_NEWLINE(*s)) {
+ STAT_OP_OUT;
+ continue;
+ }
+ goto fail;
+ break;
+
+ case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF);
+ if (ON_STR_END(s)) {
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) {
+#endif
+ if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */
+ STAT_OP_OUT;
+ continue;
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ }
+#endif
+ }
+ if (ONIG_IS_NEWLINE(*s) && ON_STR_END(s+1)) {
+ STAT_OP_OUT;
+ continue;
+ }
+ goto fail;
+ break;
+
+ case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION);
+ if (s != msa->start)
+ goto fail;
+
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH);
+ GET_MEMNUM_INC(mem, p);
+ STACK_PUSH_MEM_START(mem, s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START);
+ GET_MEMNUM_INC(mem, p);
+ mem_start_stk[mem] = (StackIndex )((void* )s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH);
+ GET_MEMNUM_INC(mem, p);
+ STACK_PUSH_MEM_END(mem, s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END);
+ GET_MEMNUM_INC(mem, p);
+ mem_end_stk[mem] = (StackIndex )((void* )s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC);
+ GET_MEMNUM_INC(mem, p);
+ STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
+ STACK_PUSH_MEM_END(mem, s);
+ mem_start_stk[mem] = GET_STACK_INDEX(stkp);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC);
+ GET_MEMNUM_INC(mem, p);
+ mem_end_stk[mem] = (StackIndex )((void* )s);
+ STACK_GET_MEM_START(mem, stkp);
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ mem_start_stk[mem] = GET_STACK_INDEX(stkp);
+ else
+ mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
+
+ STACK_PUSH_MEM_END_MARK(mem);
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif
+
+ case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1);
+ mem = 1;
+ goto backref;
+ break;
+
+ case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2);
+ mem = 2;
+ goto backref;
+ break;
+
+ case OP_BACKREF3: STAT_OP_IN(OP_BACKREF3);
+ mem = 3;
+ goto backref;
+ break;
+
+ case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN);
+ GET_MEMNUM_INC(mem, p);
+ backref:
+ {
+ int len;
+ UChar *pstart, *pend;
+
+ /* if you want to remove following line,
+ you should check in parse and compile time. */
+ if (mem > num_mem) goto fail;
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP(pstart, s, n);
+ while (sprev + (len = enc_len(encode, *sprev)) < s)
+ sprev += len;
+
+ STAT_OP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC);
+ GET_MEMNUM_INC(mem, p);
+ {
+ int len;
+ UChar *pstart, *pend;
+
+ /* if you want to remove following line,
+ you should check in parse and compile time. */
+ if (mem > num_mem) goto fail;
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP_IC(pstart, &s, n);
+ while (sprev + (len = enc_len(encode, *sprev)) < s)
+ sprev += len;
+
+ STAT_OP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI);
+ {
+ int len, is_fail;
+ UChar *pstart, *pend, *swork;
+
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE(pstart, swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enc_len(encode, *sprev)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ STAT_OP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC);
+ {
+ int len, is_fail;
+ UChar *pstart, *pend, *swork;
+
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE_IC(pstart, &swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enc_len(encode, *sprev)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ STAT_OP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH);
+ GET_OPTION_INC(option, p);
+ ignore_case = IS_IGNORECASE(option);
+ STACK_PUSH_ALT(p, s, sprev);
+ p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION);
+ GET_OPTION_INC(option, p);
+ ignore_case = IS_IGNORECASE(option);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_NULL_CHECK_START: STAT_OP_IN(OP_NULL_CHECK_START);
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_PUSH_NULL_CHECK_START(mem, s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_NULL_CHECK_END: STAT_OP_IN(OP_NULL_CHECK_END);
+ {
+ int isnull;
+
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_NULL_CHECK(isnull, mem, s);
+ if (isnull) {
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ null_check_found:
+ /* empty loop founded, skip next instruction */
+ switch (*p++) {
+ case OP_JUMP:
+ case OP_PUSH:
+ p += SIZE_RELADDR;
+ break;
+ case OP_REPEAT_INC:
+ case OP_REPEAT_INC_NG:
+ p += SIZE_MEMNUM;
+ break;
+ default:
+ goto unexpected_bytecode_error;
+ break;
+ }
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST);
+ {
+ int isnull;
+
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
+ if (isnull) {
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif
+
+#ifdef USE_SUBEXP_CALL
+ case OP_NULL_CHECK_END_MEMST_PUSH:
+ STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
+ {
+ int isnull;
+
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
+#else
+ STACK_NULL_CHECK_REC(isnull, mem, s);
+#endif
+ if (isnull) {
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
+ else {
+ STACK_PUSH_NULL_CHECK_END(mem);
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif
+
+ case OP_JUMP: STAT_OP_IN(OP_JUMP);
+ GET_RELADDR_INC(addr, p);
+ p += addr;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH: STAT_OP_IN(OP_PUSH);
+ GET_RELADDR_INC(addr, p);
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_POP: STAT_OP_IN(OP_POP);
+ STACK_POP_ONE;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1);
+ GET_RELADDR_INC(addr, p);
+ if (*p == *s && DATA_ENSURE_CHECK(1)) {
+ p++;
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ STAT_OP_OUT;
+ continue;
+ }
+ p += (addr + 1);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_IF_PEEK_NEXT: STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT);
+ GET_RELADDR_INC(addr, p);
+ if (*p == *s) {
+ p++;
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ STAT_OP_OUT;
+ continue;
+ }
+ p++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_REPEAT: STAT_OP_IN(OP_REPEAT);
+ {
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ GET_RELADDR_INC(addr, p);
+
+ STACK_ENSURE(1);
+ repeat_stk[mem] = GET_STACK_INDEX(stk);
+ STACK_PUSH_REPEAT(mem, p);
+
+ if (reg->repeat_range[mem].lower == 0) {
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_REPEAT_NG: STAT_OP_IN(OP_REPEAT_NG);
+ {
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ GET_RELADDR_INC(addr, p);
+
+ STACK_ENSURE(1);
+ repeat_stk[mem] = GET_STACK_INDEX(stk);
+ STACK_PUSH_REPEAT(mem, p);
+
+ if (reg->repeat_range[mem].lower == 0) {
+ STACK_PUSH_ALT(p, s, sprev);
+ p += addr;
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC);
+ {
+ StackIndex si;
+
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+#ifdef USE_SUBEXP_CALL
+ if (reg->num_call > 0) {
+ STACK_GET_REPEAT(mem, stkp);
+ si = GET_STACK_INDEX(stkp);
+ }
+ else {
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
+ }
+#else
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
+#endif
+ stkp->u.repeat.count++;
+ if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+ /* end of repeat. Nothing to do. */
+ }
+ else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ STACK_PUSH_ALT(p, s, sprev);
+ p = stkp->u.repeat.pcode;
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ }
+ STACK_PUSH_REPEAT_INC(si);
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG);
+ {
+ StackIndex si;
+
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+#ifdef USE_SUBEXP_CALL
+ if (reg->num_call > 0) {
+ STACK_GET_REPEAT(mem, stkp);
+ si = GET_STACK_INDEX(stkp);
+ }
+ else {
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
+ }
+#else
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
+#endif
+ stkp->u.repeat.count++;
+ if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ UChar* pcode = stkp->u.repeat.pcode;
+
+ STACK_PUSH_REPEAT_INC(si);
+ STACK_PUSH_ALT(pcode, s, sprev);
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ STACK_PUSH_REPEAT_INC(si);
+ }
+ }
+ else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+ STACK_PUSH_REPEAT_INC(si);
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS);
+ STACK_PUSH_POS(s, sprev);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_POP_POS: STAT_OP_IN(OP_POP_POS);
+ {
+ STACK_POS_END(stkp);
+ s = stkp->u.state.pstr;
+ sprev = stkp->u.state.pstr_prev;
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_POS_NOT: STAT_OP_IN(OP_PUSH_POS_NOT);
+ GET_RELADDR_INC(addr, p);
+ STACK_PUSH_POS_NOT(p + addr, s, sprev);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_FAIL_POS: STAT_OP_IN(OP_FAIL_POS);
+ STACK_POP_TIL_POS_NOT;
+ goto fail;
+ break;
+
+ case OP_PUSH_STOP_BT: STAT_OP_IN(OP_PUSH_STOP_BT);
+ STACK_PUSH_STOP_BT;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_POP_STOP_BT: STAT_OP_IN(OP_POP_STOP_BT);
+ STACK_STOP_BT_END;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND);
+ GET_LENGTH_INC(tlen, p);
+ s = ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
+ if (IS_NULL(s)) goto fail;
+ sprev = onigenc_get_prev_char_head(encode, str, s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT);
+ GET_RELADDR_INC(addr, p);
+ GET_LENGTH_INC(tlen, p);
+ q = ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
+ if (IS_NULL(q)) {
+ /* too short case -> success. ex. /(?<!XXX)a/.match("a")
+ If you want to change to fail, replace following line. */
+ p += addr;
+ /* goto fail; */
+ }
+ else {
+ STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
+ s = q;
+ sprev = onigenc_get_prev_char_head(encode, str, s);
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_FAIL_LOOK_BEHIND_NOT: STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT);
+ STACK_POP_TIL_LOOK_BEHIND_NOT;
+ goto fail;
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case OP_CALL: STAT_OP_IN(OP_CALL);
+ GET_ABSADDR_INC(addr, p);
+ STACK_PUSH_CALL_FRAME(p);
+ p = reg->p + addr;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_RETURN: STAT_OP_IN(OP_RETURN);
+ STACK_RETURN(p);
+ STACK_PUSH_RETURN;
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif
+
+ case OP_FINISH:
+ goto finish;
+ break;
+
+ fail:
+ STAT_OP_OUT;
+ /* fall */
+ case OP_FAIL: STAT_OP_IN(OP_FAIL);
+ STACK_POP;
+ p = stk->u.state.pcode;
+ s = stk->u.state.pstr;
+ sprev = stk->u.state.pstr_prev;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ default:
+ goto bytecode_error;
+
+ } /* end of switch */
+ sprev = sbegin;
+ } /* end of while(1) */
+
+ finish:
+ STACK_SAVE;
+ return best_len;
+
+#ifdef ONIG_DEBUG
+ stack_error:
+ STACK_SAVE;
+ return ONIGERR_STACK_BUG;
+#endif
+
+ bytecode_error:
+ STACK_SAVE;
+ return ONIGERR_UNDEFINED_BYTECODE;
+
+ unexpected_bytecode_error:
+ STACK_SAVE;
+ return ONIGERR_UNEXPECTED_BYTECODE;
+}
+
+
+static UChar*
+slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
+ UChar* text, UChar* text_end, UChar* text_range)
+{
+ UChar *t, *p, *s, *end;
+
+ end = text_end - (target_end - target) + 1;
+ if (end > text_range)
+ end = text_range;
+
+ s = text;
+
+ while (s < end) {
+ if (*s == *target) {
+ p = s + 1;
+ t = target + 1;
+ while (t < target_end) {
+ if (*t != *p++)
+ break;
+ t++;
+ }
+ if (t == target_end)
+ return s;
+ }
+ s += enc_len(enc, *s);
+ }
+
+ return (UChar* )NULL;
+}
+
+#if 0
+static int
+str_trans_match_after_head_byte(OnigEncoding enc,
+ int len, UChar* t, UChar* tend, UChar* p)
+{
+ while (--len > 0) {
+ if (*t != *p) break;
+ t++; p++;
+ }
+
+ if (len == 0) {
+ int lowlen;
+ UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ while (t < tend) {
+ len = enc_len(enc, *p);
+ lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf);
+ q = lowbuf;
+ while (lowlen > 0) {
+ if (*t++ != *q++) break;
+ lowlen--;
+ }
+ if (lowlen > 0) break;
+ p += len;
+ }
+ if (t == tend)
+ return 1;
+ }
+
+ return 0;
+}
+#endif
+
+static int
+str_lower_case_match(OnigEncoding enc, UChar* t, UChar* tend, UChar* p)
+{
+ int len, lowlen;
+ UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ while (t < tend) {
+ len = enc_len(enc, *p);
+ lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf);
+ q = lowbuf;
+ while (lowlen > 0) {
+ if (*t++ != *q++) return 0;
+ lowlen--;
+ }
+ p += len;
+ }
+
+ return 1;
+}
+
+static UChar*
+slow_search_ic(OnigEncoding enc,
+ UChar* target, UChar* target_end,
+ UChar* text, UChar* text_end, UChar* text_range)
+{
+ int len, lowlen;
+ UChar *t, *p, *s, *end;
+ UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ end = text_end - (target_end - target) + 1;
+ if (end > text_range)
+ end = text_range;
+
+ s = text;
+
+ while (s < end) {
+ len = enc_len(enc, *s);
+ lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf);
+ if (*target == *lowbuf) {
+ p = lowbuf + 1;
+ t = target + 1;
+ while (--lowlen > 0) {
+ if (*p != *t) break;
+ p++; *t++;
+ }
+ if (lowlen == 0) {
+ if (str_lower_case_match(enc, t, target_end, s + len))
+ return s;
+ }
+ }
+
+ s += len;
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
+ UChar* text, UChar* adjust_text, UChar* text_end, UChar* text_start)
+{
+ UChar *t, *p, *s;
+
+ s = text_end - (target_end - target);
+ if (s > text_start)
+ s = text_start;
+ else
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
+
+ while (s >= text) {
+ if (*s == *target) {
+ p = s + 1;
+ t = target + 1;
+ while (t < target_end) {
+ if (*t != *p++)
+ break;
+ t++;
+ }
+ if (t == target_end)
+ return s;
+ }
+ s = onigenc_get_prev_char_head(enc, adjust_text, s);
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+slow_search_backward_ic(OnigEncoding enc,
+ UChar* target,UChar* target_end,
+ UChar* text, UChar* adjust_text,
+ UChar* text_end, UChar* text_start)
+{
+ int len, lowlen;
+ UChar *t, *p, *s;
+ UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ s = text_end - (target_end - target);
+ if (s > text_start)
+ s = text_start;
+ else
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
+
+ while (s >= text) {
+ len = enc_len(enc, *s);
+ lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf);
+ if (*target == *lowbuf) {
+ p = lowbuf + 1;
+ t = target + 1;
+ while (--lowlen > 0) {
+ if (*p != *t) break;
+ p++; *t++;
+ }
+ if (lowlen == 0) {
+ if (str_lower_case_match(enc, t, target_end, s + len))
+ return s;
+ }
+ }
+
+ s = onigenc_get_prev_char_head(enc, adjust_text, s);
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
+ UChar* text, UChar* text_end, UChar* text_range)
+{
+ UChar *s, *t, *p, *end;
+ UChar *tail;
+ int skip;
+
+ end = text_range + (target_end - target) - 1;
+ if (end > text_end)
+ end = text_end;
+
+ tail = target_end - 1;
+ s = text;
+ while ((s - text) < target_end - target) {
+ s += enc_len(reg->enc, *s);
+ }
+ s--; /* set to text check tail position. */
+
+ if (IS_NULL(reg->int_map)) {
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (t >= target && *p == *t) {
+ p--; t--;
+ }
+ if (t < target) return p + 1;
+
+ skip = reg->map[*s];
+ p++;
+ t = p;
+ while ((p - t) < skip) {
+ p += enc_len(reg->enc, *p);
+ }
+ s += (p - t);
+ }
+ }
+ else {
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (t >= target && *p == *t) {
+ p--; t--;
+ }
+ if (t < target) return p + 1;
+
+ skip = reg->int_map[*s];
+ p++;
+ t = p;
+ while ((p - t) < skip) {
+ p += enc_len(reg->enc, *p);
+ }
+ s += (p - t);
+ }
+ }
+ return (UChar* )NULL;
+}
+
+static UChar*
+bm_search(regex_t* reg, UChar* target, UChar* target_end,
+ UChar* text, UChar* text_end, UChar* text_range)
+{
+ UChar *s, *t, *p, *end;
+ UChar *tail;
+
+ end = text_range + (target_end - target) - 1;
+ if (end > text_end)
+ end = text_end;
+
+ tail = target_end - 1;
+ s = text + (target_end - target) - 1;
+ if (IS_NULL(reg->int_map)) {
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (t >= target && *p == *t) {
+ p--; t--;
+ }
+ if (t < target) return p + 1;
+ s += reg->map[*s];
+ }
+ }
+ else { /* see int_map[] */
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (t >= target && *p == *t) {
+ p--; t--;
+ }
+ if (t < target) return p + 1;
+ s += reg->int_map[*s];
+ }
+ }
+ return (UChar* )NULL;
+}
+
+static int
+set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc,
+ int ignore_case, int** skip)
+{
+ int i, len;
+ UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ if (IS_NULL(*skip)) {
+ *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
+ if (IS_NULL(*skip)) return ONIGERR_MEMORY;
+ }
+
+ len = end - s;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ (*skip)[i] = len;
+
+ if (ignore_case) {
+ for (i = len - 1; i > 0; i--) {
+ ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
+ (*skip)[*lowbuf] = i;
+ }
+ }
+ else {
+ for (i = len - 1; i > 0; i--)
+ (*skip)[s[i]] = i;
+ }
+ return 0;
+}
+
+static UChar*
+bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text,
+ UChar* adjust_text, UChar* text_end, UChar* text_start)
+{
+ UChar *s, *t, *p;
+
+ s = text_end - (target_end - target);
+ if (text_start < s)
+ s = text_start;
+ else
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
+
+ while (s >= text) {
+ p = s;
+ t = target;
+ while (t < target_end && *p == *t) {
+ p++; t++;
+ }
+ if (t == target_end)
+ return s;
+
+ s -= reg->int_map_backward[*s];
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+map_search(OnigEncoding enc, UChar map[], UChar* text, UChar* text_range)
+{
+ UChar *s = text;
+
+ while (s < text_range) {
+ if (map[*s]) return s;
+
+ s += enc_len(enc, *s);
+ }
+ return (UChar* )NULL;
+}
+
+static UChar*
+map_search_backward(OnigEncoding enc, UChar map[],
+ UChar* text, UChar* adjust_text, UChar* text_start)
+{
+ UChar *s = text_start;
+
+ while (s >= text) {
+ if (map[*s]) return s;
+
+ s = onigenc_get_prev_char_head(enc, adjust_text, s);
+ }
+ return (UChar* )NULL;
+}
+
+extern int
+onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
+ OnigOptionType option)
+{
+ int r;
+ UChar *prev;
+ MatchArg msa;
+
+ MATCH_ARG_INIT(msa, option, region, at);
+
+ if (region
+#ifdef USE_POSIX_REGION_OPTION
+ && !IS_POSIX_REGION(option)
+#endif
+ ) {
+ r = onig_region_resize(region, reg->num_mem + 1);
+ }
+ else
+ r = 0;
+
+ if (r == 0) {
+ prev = onigenc_get_prev_char_head(reg->enc, str, at);
+ r = match_at(reg, str, end, at, prev, &msa);
+ }
+ MATCH_ARG_FREE(msa);
+ return r;
+}
+
+static int
+forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
+ UChar* range, UChar** low, UChar** high, UChar** low_prev)
+{
+ UChar *p, *pprev = (UChar* )NULL;
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
+ (int )str, (int )end, (int )s, (int )range);
+#endif
+
+ p = s;
+ if (reg->dmin > 0) {
+ if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
+ p += reg->dmin;
+ }
+ else {
+ UChar *q = p + reg->dmin;
+ while (p < q) p += enc_len(reg->enc, *p);
+ }
+ }
+
+ retry:
+ switch (reg->optimize) {
+ case ONIG_OPTIMIZE_EXACT:
+ p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
+ break;
+ case ONIG_OPTIMIZE_EXACT_IC:
+ p = slow_search_ic(reg->enc, reg->exact, reg->exact_end, p, end, range);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_BM:
+ p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+ p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
+ break;
+
+ case ONIG_OPTIMIZE_MAP:
+ p = map_search(reg->enc, reg->map, p, range);
+ break;
+ }
+
+ if (p && p < range) {
+ if (p - reg->dmin < s) {
+ retry_gate:
+ pprev = p;
+ p += enc_len(reg->enc, *p);
+ goto retry;
+ }
+
+ if (reg->sub_anchor) {
+ UChar* prev;
+
+ switch (reg->sub_anchor) {
+ case ANCHOR_BEGIN_LINE:
+ if (!ON_STR_BEGIN(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ if (!ONIG_IS_NEWLINE(*prev))
+ goto retry_gate;
+ }
+ break;
+
+ case ANCHOR_END_LINE:
+ if (ON_STR_END(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ if (prev && ONIG_IS_NEWLINE(*prev))
+ goto retry_gate;
+ }
+ else if (!ONIG_IS_NEWLINE(*p))
+ goto retry_gate;
+ break;
+ }
+ }
+
+ if (reg->dmax == 0) {
+ *low = p;
+ if (low_prev) {
+ if (*low > s)
+ *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
+ else
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ }
+ }
+ else {
+ if (reg->dmax != ONIG_INFINITE_DISTANCE) {
+ *low = p - reg->dmax;
+ if (*low > s) {
+ *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
+ *low, low_prev);
+ if (low_prev && IS_NULL(*low_prev))
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : s), *low);
+ }
+ else {
+ if (low_prev)
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), *low);
+ }
+ }
+ }
+ /* no needs to adjust *high, *high is used as range check only */
+ *high = p - reg->dmin;
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr,
+ "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
+ (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
+#endif
+ return 1; /* success */
+ }
+
+ return 0; /* fail */
+}
+
+static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
+ int ignore_case, int** skip));
+
+#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
+
+static int
+backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
+ UChar* range, UChar* adjrange, UChar** low, UChar** high)
+{
+ int r;
+ UChar *p;
+
+ range += reg->dmin;
+ p = s;
+
+ retry:
+ switch (reg->optimize) {
+ case ONIG_OPTIMIZE_EXACT:
+ exact_method:
+ p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
+ range, adjrange, end, p);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_IC:
+ p = slow_search_backward_ic(reg->enc, reg->exact,
+ reg->exact_end, range, adjrange, end, p);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_BM:
+ case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+ if (IS_NULL(reg->int_map_backward)) {
+ if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
+ goto exact_method;
+
+ r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, 0,
+ &(reg->int_map_backward));
+ if (r) return r;
+ }
+ p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
+ end, p);
+ break;
+
+ case ONIG_OPTIMIZE_MAP:
+ p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
+ break;
+ }
+
+ if (p) {
+ if (reg->sub_anchor) {
+ UChar* prev;
+
+ switch (reg->sub_anchor) {
+ case ANCHOR_BEGIN_LINE:
+ if (!ON_STR_BEGIN(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+ if (!ONIG_IS_NEWLINE(*prev)) {
+ p = prev;
+ goto retry;
+ }
+ }
+ break;
+
+ case ANCHOR_END_LINE:
+ if (ON_STR_END(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+ if (IS_NULL(prev)) goto fail;
+ if (ONIG_IS_NEWLINE(*prev)) {
+ p = prev;
+ goto retry;
+ }
+ }
+ else if (!ONIG_IS_NEWLINE(*p)) {
+ p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+ if (IS_NULL(p)) goto fail;
+ goto retry;
+ }
+ break;
+ }
+ }
+
+ /* no needs to adjust *high, *high is used as range check only */
+ if (reg->dmax != ONIG_INFINITE_DISTANCE) {
+ *low = p - reg->dmax;
+ *high = p - reg->dmin;
+ *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
+ }
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
+ (int )(*low - str), (int )(*high - str));
+#endif
+ return 1; /* success */
+ }
+
+ fail:
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "backward_search_range: fail.\n");
+#endif
+ return 0; /* fail */
+}
+
+
+extern int
+onig_search(regex_t* reg, UChar* str, UChar* end,
+ UChar* start, UChar* range, OnigRegion* region, OnigOptionType option)
+{
+ int r;
+ UChar *s, *prev;
+ MatchArg msa;
+
+ if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ reg->state++; /* increment as search counter */
+ if (IS_NOT_NULL(reg->chain)) {
+ onig_chain_reduce(reg);
+ reg->state++;
+ }
+ }
+ else {
+ int n = 0;
+ while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
+ if (++n > THREAD_PASS_LIMIT_COUNT)
+ return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
+ THREAD_PASS;
+ }
+ reg->state++; /* increment as search counter */
+ }
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
+ (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
+#endif
+
+ if (region
+#ifdef USE_POSIX_REGION_OPTION
+ && !IS_POSIX_REGION(option)
+#endif
+ ) {
+ r = onig_region_resize(region, reg->num_mem + 1);
+ if (r) goto finish_no_msa;
+ }
+
+ if (start > end || start < str) goto mismatch_no_msa;
+
+#define MATCH_AND_RETURN_CHECK \
+ r = match_at(reg, str, end, s, prev, &msa);\
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) goto match;\
+ goto finish; /* error */ \
+ }
+
+ /* anchor optimize: resume search range */
+ if (reg->anchor != 0 && str < end) {
+ UChar* semi_end;
+
+ if (reg->anchor & ANCHOR_BEGIN_POSITION) {
+ /* search start-position only */
+ begin_position:
+ if (range > start)
+ range = start + 1;
+ else
+ range = start;
+ }
+ else if (reg->anchor & ANCHOR_BEGIN_BUF) {
+ /* search str-position only */
+ if (range > start) {
+ if (start != str) goto mismatch_no_msa;
+ range = str + 1;
+ }
+ else {
+ if (range <= str) {
+ start = str;
+ range = str;
+ }
+ else
+ goto mismatch_no_msa;
+ }
+ }
+ else if (reg->anchor & ANCHOR_END_BUF) {
+ semi_end = end;
+
+ end_buf:
+ if ((OnigDistance )(semi_end - str) < reg->anchor_dmin)
+ goto mismatch_no_msa;
+
+ if (range > start) {
+ if ((OnigDistance )(semi_end - start) > reg->anchor_dmax) {
+ start = semi_end - reg->anchor_dmax;
+ if (start < end)
+ start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
+ else { /* match with empty at end */
+ start = onigenc_get_prev_char_head(reg->enc, str, end);
+ }
+ }
+ if ((OnigDistance )(semi_end - (range - 1)) < reg->anchor_dmin) {
+ range = semi_end - reg->anchor_dmin + 1;
+ }
+
+ if (start >= range) goto mismatch_no_msa;
+ }
+ else {
+ if ((OnigDistance )(semi_end - range) > reg->anchor_dmax) {
+ range = semi_end - reg->anchor_dmax;
+ }
+ if ((OnigDistance )(semi_end - start) < reg->anchor_dmin) {
+ start = semi_end - reg->anchor_dmin;
+ start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
+ if (range > start) goto mismatch_no_msa;
+ }
+ }
+ }
+ else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
+ if (ONIG_IS_NEWLINE(end[-1])) {
+ semi_end = end - 1;
+ if (semi_end > str && start <= semi_end) {
+ goto end_buf;
+ }
+ }
+ else {
+ semi_end = end;
+ goto end_buf;
+ }
+ }
+ else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_PL)) {
+ goto begin_position;
+ }
+ }
+ else if (str == end) { /* empty string */
+ static UChar* address_for_empty_string = "";
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "onig_search: empty string.\n");
+#endif
+
+ if (reg->threshold_len == 0) {
+ s = start = end = str = address_for_empty_string;
+ prev = (UChar* )NULL;
+
+ MATCH_ARG_INIT(msa, option, region, start);
+ MATCH_AND_RETURN_CHECK;
+ goto mismatch;
+ }
+ goto mismatch_no_msa;
+ }
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
+ (int )(end - str), (int )(start - str), (int )(range - str));
+#endif
+
+ MATCH_ARG_INIT(msa, option, region, start);
+
+ s = start;
+ if (range > start) { /* forward search */
+ if (s > str)
+ prev = onigenc_get_prev_char_head(reg->enc, str, s);
+ else
+ prev = (UChar* )NULL;
+
+ if (reg->optimize != ONIG_OPTIMIZE_NONE) {
+ UChar *sch_range, *low, *high, *low_prev;
+
+ sch_range = range;
+ if (reg->dmax != 0) {
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ sch_range = end;
+ else {
+ sch_range += reg->dmax;
+ if (sch_range > end) sch_range = end;
+ }
+ }
+ if (reg->dmax != ONIG_INFINITE_DISTANCE &&
+ (end - start) >= reg->threshold_len) {
+ do {
+ if (! forward_search_range(reg, str, end, s, sch_range,
+ &low, &high, &low_prev)) goto mismatch;
+ if (s < low) {
+ s = low;
+ prev = low_prev;
+ }
+ while (s <= high) {
+ MATCH_AND_RETURN_CHECK;
+ prev = s;
+ s += enc_len(reg->enc, *s);
+ }
+ if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
+ if (IS_NOT_NULL(prev)) {
+ while (!ONIG_IS_NEWLINE(*prev) && s < range) {
+ prev = s;
+ s += enc_len(reg->enc, *s);
+ }
+ }
+ }
+ } while (s < range);
+ goto mismatch;
+ }
+ else { /* check only. */
+ if ((end - start) < reg->threshold_len ||
+ ! forward_search_range(reg, str, end, s, sch_range,
+ &low, &high, (UChar** )NULL)) goto mismatch;
+ }
+ }
+
+ do {
+ MATCH_AND_RETURN_CHECK;
+ prev = s;
+ s += enc_len(reg->enc, *s);
+ } while (s <= range); /* exec s == range, because empty match with /$/. */
+ }
+ else { /* backward search */
+ if (reg->optimize != ONIG_OPTIMIZE_NONE) {
+ UChar *low, *high, *adjrange, *sch_start;
+
+ adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
+ if (reg->dmax != ONIG_INFINITE_DISTANCE &&
+ (end - range) >= reg->threshold_len) {
+ do {
+ sch_start = s + reg->dmax;
+ if (sch_start > end) sch_start = end;
+ if (backward_search_range(reg, str, end, sch_start, range, adjrange,
+ &low, &high) <= 0)
+ goto mismatch;
+
+ if (s > high)
+ s = high;
+
+ while (s >= low) {
+ prev = onigenc_get_prev_char_head(reg->enc, str, s);
+ MATCH_AND_RETURN_CHECK;
+ s = prev;
+ }
+ } while (s >= range);
+ goto mismatch;
+ }
+ else { /* check only. */
+ if ((end - range) < reg->threshold_len) goto mismatch;
+
+ sch_start = s;
+ if (reg->dmax != 0) {
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ sch_start = end;
+ else {
+ sch_start += reg->dmax;
+ if (sch_start > end) sch_start = end;
+ else
+ sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
+ start, sch_start);
+ }
+ }
+ if (backward_search_range(reg, str, end, sch_start, range, adjrange,
+ &low, &high) <= 0) goto mismatch;
+ }
+ }
+
+ do {
+ prev = onigenc_get_prev_char_head(reg->enc, str, s);
+ MATCH_AND_RETURN_CHECK;
+ s = prev;
+ } while (s >= range);
+ }
+
+ mismatch:
+ r = ONIG_MISMATCH;
+
+ finish:
+ MATCH_ARG_FREE(msa);
+ reg->state--; /* decrement as search counter */
+
+ /* If result is mismatch and no FIND_NOT_EMPTY option,
+ then the region is not setted in match_at(). */
+ if (IS_FIND_NOT_EMPTY(reg->options) && region
+#ifdef USE_POSIX_REGION_OPTION
+ && !IS_POSIX_REGION(option)
+#endif
+ ) {
+ onig_region_clear(region);
+ }
+
+#ifdef ONIG_DEBUG
+ if (r != ONIG_MISMATCH)
+ fprintf(stderr, "onig_search: error %d\n", r);
+#endif
+ return r;
+
+ mismatch_no_msa:
+ r = ONIG_MISMATCH;
+ finish_no_msa:
+ reg->state--; /* decrement as search counter */
+#ifdef ONIG_DEBUG
+ if (r != ONIG_MISMATCH)
+ fprintf(stderr, "onig_search: error %d\n", r);
+#endif
+ return r;
+
+ match:
+ reg->state--; /* decrement as search counter */
+ MATCH_ARG_FREE(msa);
+ return s - str;
+}
+
+extern OnigEncoding
+onig_get_encoding(regex_t* reg)
+{
+ return reg->enc;
+}
+
+extern OnigOptionType
+onig_get_options(regex_t* reg)
+{
+ return reg->options;
+}
+
+extern OnigSyntaxType*
+onig_get_syntax(regex_t* reg)
+{
+ return reg->syntax;
+}
+
+extern const char*
+onig_version(void)
+{
+#define MSTR(a) # a
+
+ return (MSTR(ONIGURUMA_VERSION_MAJOR) "."
+ MSTR(ONIGURUMA_VERSION_MINOR) "."
+ MSTR(ONIGURUMA_VERSION_TEENY));
+}
diff --git a/reggnu.c b/reggnu.c
new file mode 100644
index 000000000..9c6a2161c
--- /dev/null
+++ b/reggnu.c
@@ -0,0 +1,256 @@
+/**********************************************************************
+
+ reggnu.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regint.h"
+
+#ifndef ONIGGNU_H /* name changes from oniggnu.h to regex.h in ruby. */
+#include "oniggnu.h"
+#endif
+
+#if defined(RUBY_PLATFORM) || defined(RUBY)
+#ifndef ONIG_RUBY_M17N
+#define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
+#endif
+#endif
+
+#ifndef NULL
+#define NULL ((void* )0)
+#endif
+
+extern void
+re_free_registers(OnigRegion* r)
+{
+ /* 0: don't free self */
+ onig_region_free(r, 0);
+}
+
+extern int
+re_adjust_startpos(regex_t* reg, const char* string, int size,
+ int startpos, int range)
+{
+ if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
+ UChar *p;
+ UChar *s = (UChar* )string + startpos;
+
+ if (range > 0) {
+ p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
+ }
+ else {
+ p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
+ }
+ return p - (UChar* )string;
+ }
+
+ return startpos;
+}
+
+extern int
+re_match(regex_t* reg, const char* str, int size, int pos,
+ struct re_registers* regs)
+{
+ return onig_match(reg, (UChar* )str, (UChar* )(str + size),
+ (UChar* )(str + pos), regs, ONIG_OPTION_NONE);
+}
+
+extern int
+re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
+ struct re_registers* regs)
+{
+ return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
+ (UChar* )(string + startpos),
+ (UChar* )(string + startpos + range),
+ regs, ONIG_OPTION_NONE);
+}
+
+extern int
+re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
+{
+ int r;
+ OnigErrorInfo einfo;
+
+ r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
+ if (r != 0) {
+ if (IS_NOT_NULL(ebuf))
+ (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
+ }
+
+ return r;
+}
+
+extern int
+re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
+{
+ int r;
+ OnigErrorInfo einfo;
+ OnigEncoding enc;
+
+ /* I think encoding and options should be arguments of this function.
+ But this is adapted to present re.c. (2002/11/29)
+ */
+ enc = OnigEncDefaultCharEncoding;
+
+ r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
+ reg->options, enc, OnigDefaultSyntax, &einfo);
+ if (r != 0) {
+ if (IS_NOT_NULL(ebuf))
+ (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
+ }
+ return r;
+}
+
+extern void
+re_free_pattern(regex_t* reg)
+{
+ onig_free(reg);
+}
+
+extern int
+re_alloc_pattern(regex_t** reg)
+{
+ return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, OnigEncDefaultCharEncoding,
+ OnigDefaultSyntax);
+}
+
+extern void
+re_set_casetable(const char* table)
+{
+ onigenc_set_default_caseconv_table((UChar* )table);
+}
+
+#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
+static const unsigned char mbctab_ascii[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const unsigned char mbctab_euc[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+};
+
+static const unsigned char mbctab_sjis[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
+};
+
+static const unsigned char mbctab_utf8[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
+};
+
+const unsigned char *re_mbctab = mbctab_ascii;
+#endif
+
+extern void
+#ifdef ONIG_RUBY_M17N
+re_mbcinit(OnigEncoding enc)
+#else
+re_mbcinit(int mb_code)
+#endif
+{
+#ifdef ONIG_RUBY_M17N
+
+ onigenc_set_default_encoding(enc);
+
+#else
+
+ OnigEncoding enc;
+
+ switch (mb_code) {
+ case MBCTYPE_ASCII:
+ enc = ONIG_ENCODING_ASCII;
+ break;
+ case MBCTYPE_EUC:
+ enc = ONIG_ENCODING_EUC_JP;
+ break;
+ case MBCTYPE_SJIS:
+ enc = ONIG_ENCODING_SJIS;
+ break;
+ case MBCTYPE_UTF8:
+ enc = ONIG_ENCODING_UTF8;
+ break;
+ default:
+ return ;
+ break;
+ }
+
+ onigenc_set_default_encoding(enc);
+#endif
+
+#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
+ switch (mb_code) {
+ case MBCTYPE_ASCII:
+ re_mbctab = mbctab_ascii;
+ break;
+ case MBCTYPE_EUC:
+ re_mbctab = mbctab_euc;
+ break;
+ case MBCTYPE_SJIS:
+ re_mbctab = mbctab_sjis;
+ break;
+ case MBCTYPE_UTF8:
+ re_mbctab = mbctab_utf8;
+ break;
+ }
+#endif
+}
diff --git a/regint.h b/regint.h
new file mode 100644
index 000000000..35736b6dc
--- /dev/null
+++ b/regint.h
@@ -0,0 +1,685 @@
+/**********************************************************************
+
+ regint.h - Oniguruma (regular expression library)
+
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#ifndef REGINT_H
+#define REGINT_H
+
+/* for debug */
+/* #define ONIG_DEBUG_PARSE_TREE */
+/* #define ONIG_DEBUG_COMPILE */
+/* #define ONIG_DEBUG_SEARCH */
+/* #define ONIG_DEBUG_MATCH */
+/* #define ONIG_DONT_OPTIMIZE */
+
+/* for byte-code statistical data. */
+/* #define ONIG_DEBUG_STATISTICS */
+
+#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
+ defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_STATISTICS)
+#ifndef ONIG_DEBUG
+#define ONIG_DEBUG
+#endif
+#endif
+
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
+ (defined(__ppc__) && defined(__APPLE__)) || \
+ defined(__x86_64) || defined(__x86_64__) || \
+ defined(__mc68020__)
+#define PLATFORM_UNALIGNED_WORD_ACCESS
+#endif
+
+/* config */
+/* spec. config */
+#define USE_NAMED_GROUP
+#define USE_SUBEXP_CALL
+#define USE_FOLD_MATCH /* ess-tsett etc... */
+#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
+#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
+#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+/* internal config */
+#define USE_RECYCLE_NODE
+#define USE_OP_PUSH_OR_JUMP_EXACT
+#define USE_QUALIFIER_PEEK_NEXT
+
+#define INIT_MATCH_STACK_SIZE 160
+#define MATCH_STACK_LIMIT_SIZE 500000
+
+/* interface to external system */
+#ifdef NOT_RUBY /* gived from Makefile */
+#include "config.h"
+#define USE_VARIABLE_META_CHARS
+#define USE_VARIABLE_SYNTAX
+#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
+#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
+#define THREAD_ATOMIC_START /* depend on thread system */
+#define THREAD_ATOMIC_END /* depend on thread system */
+#define THREAD_PASS /* depend on thread system */
+#define xmalloc malloc
+#define xrealloc realloc
+#define xfree free
+#else
+#include "ruby.h"
+#include "version.h"
+#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
+#define THREAD_ATOMIC_START DEFER_INTS
+#define THREAD_ATOMIC_END ENABLE_INTS
+#define THREAD_PASS rb_thread_schedule()
+#define DEFAULT_WARN_FUNCTION rb_warn
+#define DEFAULT_VERB_WARN_FUNCTION rb_warning
+
+#if defined(RUBY_VERSION_MAJOR)
+#if RUBY_VERSION_MAJOR > 1 || \
+(RUBY_VERSION_MAJOR == 1 && \
+ defined(RUBY_VERSION_MINOR) && RUBY_VERSION_MINOR >= 8)
+#define USE_ST_HASH_TABLE
+#endif
+#endif
+
+#endif /* else NOT_RUBY */
+
+#define THREAD_PASS_LIMIT_COUNT 10
+#define xmemset memset
+#define xmemcpy memcpy
+#define xmemmove memmove
+#if defined(_WIN32) && !defined(__CYGWIN__)
+#define xalloca _alloca
+#ifdef NOT_RUBY
+#define vsnprintf _vsnprintf
+#endif
+#else
+#define xalloca alloca
+#endif
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#if defined(HAVE_ALLOCA_H) && !defined(__GNUC__)
+#include <alloca.h>
+#endif
+
+#ifdef HAVE_STRING_H
+# include <string.h>
+#else
+# include <strings.h>
+#endif
+
+#include <ctype.h>
+#include <sys/types.h>
+
+#ifdef ONIG_DEBUG
+# include <stdio.h>
+#endif
+
+#include "regenc.h"
+#include "oniguruma.h"
+
+#ifdef MIN
+#undef MIN
+#endif
+#ifdef MAX
+#undef MAX
+#endif
+#define MIN(a,b) (((a)>(b))?(b):(a))
+#define MAX(a,b) (((a)<(b))?(b):(a))
+
+#define IS_NULL(p) (((void*)(p)) == (void*)0)
+#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
+#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
+#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val)
+#define NULL_UCHARP ((UChar* )0)
+
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+#define WORD_ALIGNMENT_SIZE SIZEOF_INT
+
+#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
+ (pad_size) = WORD_ALIGNMENT_SIZE \
+ - ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
+ if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
+} while (0)
+
+#define ALIGNMENT_RIGHT(addr) do {\
+ (addr) += (WORD_ALIGNMENT_SIZE - 1);\
+ (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
+} while (0)
+
+
+#define B_SHIFT 8
+#define B_MASK 0xff
+
+#define SERIALIZE_2BYTE_INT(i,p) do {\
+ *(p) = ((i) >> B_SHIFT) & B_MASK;\
+ *((p)+1) = (i) & B_MASK;\
+} while (0)
+
+#define SERIALIZE_4BYTE_INT(i,p) do {\
+ *(p) = ((i) >> B_SHIFT*3) & B_MASK;\
+ *((p)+1) = ((i) >> B_SHIFT*2) & B_MASK;\
+ *((p)+2) = ((i) >> B_SHIFT ) & B_MASK;\
+ *((p)+3) = (i) & B_MASK;\
+} while (0)
+
+#define SERIALIZE_8BYTE_INT(i,p) do {\
+ *(p) = ((i) >> B_SHIFT*7) & B_MASK;\
+ *((p)+1) = ((i) >> B_SHIFT*6) & B_MASK;\
+ *((p)+2) = ((i) >> B_SHIFT*5) & B_MASK;\
+ *((p)+3) = ((i) >> B_SHIFT*4) & B_MASK;\
+ *((p)+4) = ((i) >> B_SHIFT*3) & B_MASK;\
+ *((p)+5) = ((i) >> B_SHIFT*2) & B_MASK;\
+ *((p)+6) = ((i) >> B_SHIFT ) & B_MASK;\
+ *((p)+7) = (i) & B_MASK;\
+} while (0)
+
+#define GET_2BYTE_INT_INC(type,i,p) do {\
+ (i) = (type )(((unsigned int )(*(p)) << B_SHIFT) | (unsigned int )((p)[1]));\
+ (p) += 2;\
+} while (0)
+
+#define GET_4BYTE_INT_INC(type,i,p) do {\
+ (i) = (type )(((unsigned int )((p)[0]) << B_SHIFT*3) | \
+ ((unsigned int )((p)[1]) << B_SHIFT*2) | \
+ ((unsigned int )((p)[2]) << B_SHIFT ) | \
+ ((unsigned int )((p)[3]) )); \
+ (p) += 4;\
+} while (0)
+
+#define GET_8BYTE_INT_INC(type,i,p) do {\
+ (i) = (type )(((unsigned long )((p)[0]) << B_SHIFT*7) | \
+ ((unsigned long )((p)[1]) << B_SHIFT*6) | \
+ ((unsigned long )((p)[2]) << B_SHIFT*5) | \
+ ((unsigned long )((p)[3]) << B_SHIFT*4) | \
+ ((unsigned long )((p)[4]) << B_SHIFT*3) | \
+ ((unsigned long )((p)[5]) << B_SHIFT*2) | \
+ ((unsigned long )((p)[6]) << B_SHIFT ) | \
+ ((unsigned long )((p)[7]) )); \
+ (p) += 8;\
+} while (0)
+
+#if SIZEOF_SHORT == 2
+#define GET_SHORT_INC(i,p) GET_2BYTE_INT_INC(short,i,p)
+#define SERIALIZE_SHORT(i,p) SERIALIZE_2BYTE_INT(i,p)
+#elif SIZEOF_SHORT == 4
+#define GET_SHORT_INC(i,p) GET_4BYTE_INT_INC(short,i,p)
+#define SERIALIZE_SHORT(i,p) SERIALIZE_4BYTE_INT(i,p)
+#elif SIZEOF_SHORT == 8
+#define GET_SHORT_INC(i,p) GET_8BYTE_INT_INC(short,i,p)
+#define SERIALIZE_SHORT(i,p) SERIALIZE_8BYTE_INT(i,p)
+#endif
+
+#if SIZEOF_INT == 2
+#define GET_INT_INC(i,p) GET_2BYTE_INT_INC(int,i,p)
+#define GET_UINT_INC(i,p) GET_2BYTE_INT_INC(unsigned,i,p)
+#define SERIALIZE_INT(i,p) SERIALIZE_2BYTE_INT(i,p)
+#define SERIALIZE_UINT(i,p) SERIALIZE_2BYTE_INT(i,p)
+#elif SIZEOF_INT == 4
+#define GET_INT_INC(i,p) GET_4BYTE_INT_INC(int,i,p)
+#define GET_UINT_INC(i,p) GET_4BYTE_INT_INC(unsigned,i,p)
+#define SERIALIZE_INT(i,p) SERIALIZE_4BYTE_INT(i,p)
+#define SERIALIZE_UINT(i,p) SERIALIZE_4BYTE_INT(i,p)
+#elif SIZEOF_INT == 8
+#define GET_INT_INC(i,p) GET_8BYTE_INT_INC(int,i,p)
+#define GET_UINT_INC(i,p) GET_8BYTE_INT_INC(unsigned,i,p)
+#define SERIALIZE_INT(i,p) SERIALIZE_8BYTE_INT(i,p)
+#define SERIALIZE_UINT(i,p) SERIALIZE_8BYTE_INT(i,p)
+#endif
+
+#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
+
+/* stack pop level */
+#define STACK_POP_LEVEL_FREE 0
+#define STACK_POP_LEVEL_MEM_START 1
+#define STACK_POP_LEVEL_ALL 2
+
+/* optimize flags */
+#define ONIG_OPTIMIZE_NONE 0
+#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
+#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
+#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
+#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
+#define ONIG_OPTIMIZE_MAP 5 /* char map */
+
+/* bit status */
+typedef unsigned int BitStatusType;
+
+#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
+#define BIT_STATUS_CLEAR(stats) (stats) = 0
+#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
+#define BIT_STATUS_AT(stats,n) \
+ ((n) < BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
+
+#define BIT_STATUS_ON_AT(stats,n) do {\
+ if ((n) < BIT_STATUS_BITS_NUM)\
+ (stats) |= (1 << (n));\
+ else\
+ (stats) |= 1;\
+} while (0)
+
+#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
+ if ((n) < BIT_STATUS_BITS_NUM)\
+ (stats) |= (1 << (n));\
+} while (0)
+
+
+#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
+
+#define DIGITVAL(code) ((code) - '0')
+#define ODIGITVAL(code) DIGITVAL(code)
+#define XDIGITVAL(enc,code) \
+ (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
+ : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
+
+#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
+#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
+#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
+#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
+#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
+#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
+#define IS_POSIXLINE(option) (IS_SINGLELINE(option) && IS_MULTILINE(option))
+#define IS_FIND_CONDITION(option) ((option) & \
+ (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
+#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
+#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
+#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
+
+/* OP_SET_OPTION is required for these options.
+#define IS_DYNAMIC_OPTION(option) \
+ (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
+*/
+/* ignore-case and multibyte status are included in compiled code. */
+#define IS_DYNAMIC_OPTION(option) 0
+
+
+/* bitset */
+#define BITS_PER_BYTE 8
+#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
+#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
+#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+typedef unsigned int Bits;
+#else
+typedef unsigned char Bits;
+#endif
+typedef Bits BitSet[BITSET_SIZE];
+typedef Bits* BitSetRef;
+
+#define SIZE_BITSET sizeof(BitSet)
+
+#define BITSET_CLEAR(bs) do {\
+ int i;\
+ for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; }\
+} while (0)
+
+#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM]
+#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM))
+
+#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
+#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
+#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos))
+#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos)
+
+/* bytes buffer */
+typedef struct _BBuf {
+ UChar* p;
+ unsigned int used;
+ unsigned int alloc;
+} BBuf;
+
+#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
+
+#define BBUF_SIZE_INC(buf,inc) do{\
+ (buf)->alloc += (inc);\
+ (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
+} while (0)
+
+#define BBUF_EXPAND(buf,low) do{\
+ do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
+ (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
+} while (0)
+
+#define BBUF_ENSURE_SIZE(buf,size) do{\
+ unsigned int new_alloc = (buf)->alloc;\
+ while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
+ if ((buf)->alloc != new_alloc) {\
+ (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
+ (buf)->alloc = new_alloc;\
+ }\
+} while (0)
+
+#define BBUF_WRITE(buf,pos,bytes,n) do{\
+ int used = (pos) + (n);\
+ if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
+ xmemcpy((buf)->p + (pos), (bytes), (n));\
+ if ((buf)->used < (unsigned int )used) (buf)->used = used;\
+} while (0)
+
+#define BBUF_WRITE1(buf,pos,byte) do{\
+ int used = (pos) + 1;\
+ if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
+ (buf)->p[(pos)] = (byte);\
+ if ((buf)->used < (unsigned int )used) (buf)->used = used;\
+} while (0)
+
+#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n))
+#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte))
+#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used)
+#define BBUF_GET_OFFSET_POS(buf) ((buf)->used)
+
+/* from < to */
+#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\
+ if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\
+ xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
+ if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
+} while (0)
+
+/* from > to */
+#define BBUF_MOVE_LEFT(buf,from,to,n) do {\
+ xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
+} while (0)
+
+/* from > to */
+#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\
+ xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
+ (buf)->used -= (from - to);\
+} while (0)
+
+#define BBUF_INSERT(buf,pos,bytes,n) do {\
+ if (pos >= (buf)->used) {\
+ BBUF_WRITE(buf,pos,bytes,n);\
+ }\
+ else {\
+ BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
+ xmemcpy((buf)->p + (pos), (bytes), (n));\
+ }\
+} while (0)
+
+#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
+
+
+#define ANCHOR_BEGIN_BUF (1<<0)
+#define ANCHOR_BEGIN_LINE (1<<1)
+#define ANCHOR_BEGIN_POSITION (1<<2)
+#define ANCHOR_END_BUF (1<<3)
+#define ANCHOR_SEMI_END_BUF (1<<4)
+#define ANCHOR_END_LINE (1<<5)
+
+#define ANCHOR_WORD_BOUND (1<<6)
+#define ANCHOR_NOT_WORD_BOUND (1<<7)
+#define ANCHOR_WORD_BEGIN (1<<8)
+#define ANCHOR_WORD_END (1<<9)
+#define ANCHOR_PREC_READ (1<<10)
+#define ANCHOR_PREC_READ_NOT (1<<11)
+#define ANCHOR_LOOK_BEHIND (1<<12)
+#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
+
+#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
+#define ANCHOR_ANYCHAR_STAR_PL (1<<15) /* ".*" optimize info (posix-line) */
+
+/* operation code */
+enum OpCode {
+ OP_FINISH = 0, /* matching process terminator (no more alternative) */
+ OP_END = 1, /* pattern code terminator (success end) */
+
+ OP_EXACT1 = 2, /* single byte, N = 1 */
+ OP_EXACT2, /* single byte, N = 2 */
+ OP_EXACT3, /* single byte, N = 3 */
+ OP_EXACT4, /* single byte, N = 4 */
+ OP_EXACT5, /* single byte, N = 5 */
+ OP_EXACTN, /* single byte */
+ OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
+ OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
+ OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
+ OP_EXACTMB2N, /* mb-length = 2 */
+ OP_EXACTMB3N, /* mb-length = 3 */
+ OP_EXACTMBN, /* other length */
+
+ OP_EXACT1_IC, /* single byte, N = 1, ignore case */
+ OP_EXACTN_IC, /* single byte, ignore case */
+
+ OP_CCLASS,
+ OP_CCLASS_MB,
+ OP_CCLASS_MIX,
+ OP_CCLASS_NOT,
+ OP_CCLASS_MB_NOT,
+ OP_CCLASS_MIX_NOT,
+
+ OP_ANYCHAR, /* "." */
+ OP_ANYCHAR_ML, /* "." multi-line */
+ OP_ANYCHAR_STAR, /* ".*" */
+ OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
+ OP_ANYCHAR_STAR_PEEK_NEXT,
+ OP_ANYCHAR_ML_STAR_PEEK_NEXT,
+
+ OP_WORD,
+ OP_NOT_WORD,
+ OP_WORD_SB,
+ OP_WORD_MB,
+ OP_WORD_BOUND,
+ OP_NOT_WORD_BOUND,
+ OP_WORD_BEGIN,
+ OP_WORD_END,
+
+ OP_BEGIN_BUF,
+ OP_END_BUF,
+ OP_BEGIN_LINE,
+ OP_END_LINE,
+ OP_SEMI_END_BUF,
+ OP_BEGIN_POSITION,
+
+ OP_BACKREF1,
+ OP_BACKREF2,
+ OP_BACKREF3,
+ OP_BACKREFN,
+ OP_BACKREFN_IC,
+ OP_BACKREF_MULTI,
+ OP_BACKREF_MULTI_IC,
+
+ OP_MEMORY_START,
+ OP_MEMORY_START_PUSH, /* push back-tracker to stack */
+ OP_MEMORY_END_PUSH, /* push back-tracker to stack */
+ OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
+ OP_MEMORY_END,
+ OP_MEMORY_END_REC, /* push marker to stack */
+
+ OP_SET_OPTION_PUSH, /* set option and push recover option */
+ OP_SET_OPTION, /* set option */
+
+ OP_FAIL, /* pop stack and move */
+ OP_JUMP,
+ OP_PUSH,
+ OP_POP,
+ OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
+ OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
+ OP_REPEAT, /* {n,m} */
+ OP_REPEAT_NG, /* {n,m}? (non greedy) */
+ OP_REPEAT_INC,
+ OP_REPEAT_INC_NG, /* non greedy */
+ OP_NULL_CHECK_START, /* null loop checker start */
+ OP_NULL_CHECK_END, /* null loop checker end */
+ OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
+ OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
+
+ OP_PUSH_POS, /* (?=...) start */
+ OP_POP_POS, /* (?=...) end */
+ OP_PUSH_POS_NOT, /* (?!...) start */
+ OP_FAIL_POS, /* (?!...) end */
+ OP_PUSH_STOP_BT, /* (?>...) start */
+ OP_POP_STOP_BT, /* (?>...) end */
+ OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
+ OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
+ OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
+
+ OP_CALL, /* \g<name> */
+ OP_RETURN
+};
+
+/* arguments type */
+#define ARG_SPECIAL -1
+#define ARG_NON 0
+#define ARG_RELADDR 1
+#define ARG_ABSADDR 2
+#define ARG_LENGTH 3
+#define ARG_MEMNUM 4
+#define ARG_OPTION 5
+
+typedef short int RelAddrType;
+typedef short int AbsAddrType;
+typedef short int LengthType;
+typedef short int MemNumType;
+typedef int RepeatNumType;
+
+#define SIZE_OPCODE 1
+#define SIZE_RELADDR sizeof(RelAddrType)
+#define SIZE_ABSADDR sizeof(AbsAddrType)
+#define SIZE_LENGTH sizeof(LengthType)
+#define SIZE_MEMNUM sizeof(MemNumType)
+#define SIZE_REPEATNUM sizeof(RepeatNumType)
+#define SIZE_OPTION sizeof(OnigOptionType)
+#define SIZE_CODE_POINT sizeof(OnigCodePoint)
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+#define GET_RELADDR_INC(addr,p) do{\
+ addr = *((RelAddrType* )(p));\
+ (p) += SIZE_RELADDR;\
+} while(0)
+
+#define GET_ABSADDR_INC(addr,p) do{\
+ addr = *((AbsAddrType* )(p));\
+ (p) += SIZE_ABSADDR;\
+} while(0)
+
+#define GET_LENGTH_INC(len,p) do{\
+ len = *((LengthType* )(p));\
+ (p) += SIZE_LENGTH;\
+} while(0)
+
+#define GET_MEMNUM_INC(num,p) do{\
+ num = *((MemNumType* )(p));\
+ (p) += SIZE_MEMNUM;\
+} while(0)
+
+#define GET_REPEATNUM_INC(num,p) do{\
+ num = *((RepeatNumType* )(p));\
+ (p) += SIZE_REPEATNUM;\
+} while(0)
+
+#define GET_OPTION_INC(option,p) do{\
+ option = *((OnigOptionType* )(p));\
+ (p) += SIZE_OPTION;\
+} while(0)
+#else
+
+#define GET_RELADDR_INC(addr,p) GET_SHORT_INC(addr,p)
+#define GET_ABSADDR_INC(addr,p) GET_SHORT_INC(addr,p)
+#define GET_LENGTH_INC(len,p) GET_SHORT_INC(len,p)
+#define GET_MEMNUM_INC(num,p) GET_SHORT_INC(num,p)
+#define GET_REPEATNUM_INC(num,p) GET_INT_INC(num,p)
+#define GET_OPTION_INC(option,p) GET_UINT_INC(option,p)
+
+#define SERIALIZE_RELADDR(addr,p) SERIALIZE_SHORT(addr,p)
+#define SERIALIZE_ABSADDR(addr,p) SERIALIZE_SHORT(addr,p)
+#define SERIALIZE_LENGTH(len,p) SERIALIZE_SHORT(len,p)
+#define SERIALIZE_MEMNUM(num,p) SERIALIZE_SHORT(num,p)
+#define SERIALIZE_REPEATNUM(num,p) SERIALIZE_INT(num,p)
+#define SERIALIZE_OPTION(option,p) SERIALIZE_UINT(option,p)
+
+#define SERIALIZE_BUFSIZE SIZEOF_INT
+
+#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
+
+/* code point's address must be aligned address. */
+#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
+#define GET_BYTE_INC(byte,p) do{\
+ byte = *(p);\
+ (p)++;\
+} while(0)
+
+
+/* op-code + arg size */
+#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
+#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
+#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_POP SIZE_OPCODE
+#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
+#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
+#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_PUSH_POS SIZE_OPCODE
+#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_POP_POS SIZE_OPCODE
+#define SIZE_OP_FAIL_POS SIZE_OPCODE
+#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
+#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
+#define SIZE_OP_FAIL SIZE_OPCODE
+#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
+#define SIZE_OP_POP_STOP_BT SIZE_OPCODE
+#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
+#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
+#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
+#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
+#define SIZE_OP_RETURN SIZE_OPCODE
+
+
+typedef struct {
+ UChar esc;
+ UChar anychar;
+ UChar anytime;
+ UChar zero_or_one_time;
+ UChar one_or_more_time;
+ UChar anychar_anytime;
+} OnigMetaCharTableType;
+
+extern OnigMetaCharTableType OnigMetaCharTable;
+
+#define MC_ESC OnigMetaCharTable.esc
+#define MC_ANYCHAR OnigMetaCharTable.anychar
+#define MC_ANYTIME OnigMetaCharTable.anytime
+#define MC_ZERO_OR_ONE_TIME OnigMetaCharTable.zero_or_one_time
+#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time
+#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime
+
+
+#ifdef ONIG_DEBUG
+
+typedef struct {
+ short int opcode;
+ char* name;
+ short int arg_type;
+} OnigOpInfoType;
+
+extern OnigOpInfoType OnigOpInfo[];
+
+extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
+
+#ifdef ONIG_DEBUG_STATISTICS
+extern void onig_statistics_init P_((void));
+extern void onig_print_statistics P_((FILE* f));
+#endif
+#endif
+
+extern char* onig_error_code_to_format P_((int code));
+extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
+extern UChar* onig_strdup P_((UChar* s, UChar* end));
+extern int onig_bbuf_init P_((BBuf* buf, int size));
+extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax));
+extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
+extern void onig_chain_reduce P_((regex_t* reg));
+extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
+
+#endif /* REGINT_H */
diff --git a/regparse.c b/regparse.c
new file mode 100644
index 000000000..2260df415
--- /dev/null
+++ b/regparse.c
@@ -0,0 +1,4815 @@
+/**********************************************************************
+
+ regparse.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regparse.h"
+
+#define WARN_BUFSIZE 256
+
+#define SYN_POSIX_COMMON_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
+ ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
+ ONIG_SYN_OP_LINE_ANCHOR | \
+ ONIG_SYN_OP_ESC_CONTROL_CHARS )
+
+#define SYN_GNU_REGEX_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
+ ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
+ ONIG_SYN_OP_VBAR_ALT | \
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
+ ONIG_SYN_OP_QMARK_ZERO_ONE | \
+ ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
+ ONIG_SYN_OP_ESC_W_WORD | \
+ ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
+ ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
+ ONIG_SYN_OP_LINE_ANCHOR )
+
+#define SYN_GNU_REGEX_BV \
+ ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
+ ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
+ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+
+#ifdef USE_VARIABLE_SYNTAX
+OnigSyntaxType OnigSyntaxPosixBasic = {
+ ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL )
+ , 0
+ , 0
+ , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
+};
+
+OnigSyntaxType OnigSyntaxPosixExtended = {
+ ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
+ ONIG_SYN_OP_BRACE_INTERVAL |
+ ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
+ , 0
+ , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
+ ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
+ ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
+ ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+ , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
+};
+
+OnigSyntaxType OnigSyntaxEmacs = {
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL |
+ ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
+ ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
+ ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
+ , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
+ , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType OnigSyntaxGrep = {
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
+ ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+ ONIG_SYN_OP_ESC_VBAR_ALT |
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
+ ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
+ ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
+ ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
+ , 0
+ , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType OnigSyntaxGnuRegex = {
+ SYN_GNU_REGEX_OP
+ , 0
+ , SYN_GNU_REGEX_BV
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType OnigSyntaxJava = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
+ ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
+ ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
+ ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY )
+ , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
+ , ONIG_OPTION_SINGLELINE
+};
+
+OnigSyntaxType OnigSyntaxPerl = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
+ ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
+ ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY )
+ , SYN_GNU_REGEX_BV
+ , ONIG_OPTION_SINGLELINE
+};
+#endif /* USE_VARIABLE_SYNTAX */
+
+OnigSyntaxType OnigSyntaxRuby = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
+ ONIG_SYN_OP2_OPTION_RUBY |
+ ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
+ ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
+ ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
+ ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB )
+ , ( SYN_GNU_REGEX_BV |
+ ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
+ ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
+ ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
+ ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
+ ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
+ ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
+
+#ifdef USE_VARIABLE_SYNTAX
+extern int
+onig_set_default_syntax(OnigSyntaxType* syntax)
+{
+ if (IS_NULL(syntax))
+ syntax = ONIG_SYNTAX_RUBY;
+
+ OnigDefaultSyntax = syntax;
+ return 0;
+}
+
+extern void
+onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
+{
+ *to = *from;
+}
+
+extern void
+onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
+{
+ syntax->op = op;
+}
+
+extern void
+onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
+{
+ syntax->op2 = op2;
+}
+
+extern void
+onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
+{
+ syntax->behavior = behavior;
+}
+
+extern void
+onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
+{
+ syntax->options = options;
+}
+#endif
+
+OnigMetaCharTableType OnigMetaCharTable = {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )0 /* anychar '.' */
+ , (OnigCodePoint )0 /* anytime '*' */
+ , (OnigCodePoint )0 /* zero or one time '?' */
+ , (OnigCodePoint )0 /* one or more time '+' */
+ , (OnigCodePoint )0 /* anychar anytime */
+};
+
+#ifdef USE_VARIABLE_META_CHARS
+extern int onig_set_meta_char(unsigned int what, unsigned int c)
+{
+ switch (what) {
+ case ONIG_META_CHAR_ESCAPE:
+ OnigMetaCharTable.esc = c;
+ break;
+ case ONIG_META_CHAR_ANYCHAR:
+ OnigMetaCharTable.anychar = c;
+ break;
+ case ONIG_META_CHAR_ANYTIME:
+ OnigMetaCharTable.anytime = c;
+ break;
+ case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
+ OnigMetaCharTable.zero_or_one_time = c;
+ break;
+ case ONIG_META_CHAR_ONE_OR_MORE_TIME:
+ OnigMetaCharTable.one_or_more_time = c;
+ break;
+ case ONIG_META_CHAR_ANYCHAR_ANYTIME:
+ OnigMetaCharTable.anychar_anytime = c;
+ break;
+ default:
+ return ONIGERR_INVALID_ARGUMENT;
+ break;
+ }
+ return 0;
+}
+#endif /* USE_VARIABLE_META_CHARS */
+
+
+extern void onig_null_warn(char* s) { }
+
+#ifdef DEFAULT_WARN_FUNCTION
+static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
+#else
+static OnigWarnFunc onig_warn = onig_null_warn;
+#endif
+
+#ifdef DEFAULT_VERB_WARN_FUNCTION
+static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
+#else
+static OnigWarnFunc onig_verb_warn = onig_null_warn;
+#endif
+
+extern void onig_set_warn_func(OnigWarnFunc f)
+{
+ onig_warn = f;
+}
+
+extern void onig_set_verb_warn_func(OnigWarnFunc f)
+{
+ onig_verb_warn = f;
+}
+
+static void
+bbuf_free(BBuf* bbuf)
+{
+ if (IS_NOT_NULL(bbuf)) {
+ if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
+ xfree(bbuf);
+ }
+}
+
+static int
+bbuf_clone(BBuf** rto, BBuf* from)
+{
+ int r;
+ BBuf *to;
+
+ *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
+ CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY);
+ r = BBUF_INIT(to, from->alloc);
+ if (r != 0) return r;
+ to->used = from->used;
+ xmemcpy(to->p, from->p, from->used);
+ return 0;
+}
+
+#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
+
+#define SET_ALL_MULTI_BYTE_RANGE(pbuf) \
+ add_code_range_to_buf(pbuf, (OnigCodePoint )0x80, ~((OnigCodePoint )0))
+
+#define ADD_ALL_MULTI_BYTE_RANGE(code, mbuf) do {\
+ if (! ONIGENC_IS_SINGLEBYTE(code)) {\
+ r = SET_ALL_MULTI_BYTE_RANGE(&(mbuf));\
+ if (r) return r;\
+ }\
+} while (0)
+
+
+#define BITSET_IS_EMPTY(bs,empty) do {\
+ int i;\
+ empty = 1;\
+ for (i = 0; i < BITSET_SIZE; i++) {\
+ if ((bs)[i] != 0) {\
+ empty = 0; break;\
+ }\
+ }\
+} while (0)
+
+static void
+bitset_set_range(BitSetRef bs, int from, int to)
+{
+ int i;
+ for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
+ BITSET_SET_BIT(bs, i);
+ }
+}
+
+#if 0
+static void
+bitset_set_all(BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ bs[i] = ~((Bits )0);
+ }
+}
+#endif
+
+static void
+bitset_invert(BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ bs[i] = ~(bs[i]);
+ }
+}
+
+static void
+bitset_invert_to(BitSetRef from, BitSetRef to)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ to[i] = ~(from[i]);
+ }
+}
+
+static void
+bitset_and(BitSetRef dest, BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ dest[i] &= bs[i];
+ }
+}
+
+static void
+bitset_or(BitSetRef dest, BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ dest[i] |= bs[i];
+ }
+}
+
+static void
+bitset_copy(BitSetRef dest, BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ dest[i] = bs[i];
+ }
+}
+
+extern int
+onig_strncmp(UChar* s1, UChar* s2, int n)
+{
+ int x;
+
+ while (n-- > 0) {
+ x = *s2++ - *s1++;
+ if (x) return x;
+ }
+ return 0;
+}
+
+static void
+k_strcpy(UChar* dest, UChar* src, UChar* end)
+{
+ int len = end - src;
+ if (len > 0) {
+ xmemcpy(dest, src, len);
+ dest[len] = (UChar )0;
+ }
+}
+
+extern UChar*
+onig_strdup(UChar* s, UChar* end)
+{
+ int len = end - s;
+
+ if (len > 0) {
+ UChar* r = (UChar* )xmalloc(len + 1);
+ CHECK_NULL_RETURN(r);
+ xmemcpy(r, s, len);
+ r[len] = (UChar )0;
+ return r;
+ }
+ else return NULL;
+}
+
+/* scan pattern methods */
+#define PEND_VALUE -1
+
+#define PFETCH(c) do { (c) = *p++; } while (0)
+#define PUNFETCH p--
+#define PINC p++
+#define PPEEK (p < end ? *p : PEND_VALUE)
+#define PEND (p < end ? 0 : 1)
+
+
+static UChar*
+k_strcat_capa(UChar* dest, UChar* dest_end, UChar* src, UChar* src_end,
+ int capa)
+{
+ UChar* r;
+
+ if (dest)
+ r = (UChar* )xrealloc(dest, capa + 1);
+ else
+ r = (UChar* )xmalloc(capa + 1);
+
+ CHECK_NULL_RETURN(r);
+ k_strcpy(r + (dest_end - dest), src, src_end);
+ return r;
+}
+
+/* dest on static area */
+static UChar*
+strcat_capa_from_static(UChar* dest, UChar* dest_end,
+ UChar* src, UChar* src_end, int capa)
+{
+ UChar* r;
+
+ r = (UChar* )xmalloc(capa + 1);
+ CHECK_NULL_RETURN(r);
+ k_strcpy(r, dest, dest_end);
+ k_strcpy(r + (dest_end - dest), src, src_end);
+ return r;
+}
+
+#ifdef USE_NAMED_GROUP
+
+#define INIT_NAME_BACKREFS_ALLOC_NUM 8
+
+typedef struct {
+ UChar* name;
+ int name_len; /* byte length */
+ int back_num; /* number of backrefs */
+ int back_alloc;
+ int back_ref1;
+ int* back_refs;
+} NameEntry;
+
+#ifdef USE_ST_HASH_TABLE
+
+#include <st.h>
+
+typedef st_table NameTable;
+typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
+
+#define NAMEBUF_SIZE 24
+#define NAMEBUF_SIZE_1 25
+
+#ifdef ONIG_DEBUG
+static int
+i_print_name_entry(UChar* key, NameEntry* e, void* arg)
+{
+ int i;
+ FILE* fp = (FILE* )arg;
+
+ fprintf(fp, "%s: ", e->name);
+ if (e->back_num == 0)
+ fputs("-", fp);
+ else if (e->back_num == 1)
+ fprintf(fp, "%d", e->back_ref1);
+ else {
+ for (i = 0; i < e->back_num; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "%d", e->back_refs[i]);
+ }
+ }
+ fputs("\n", fp);
+ return ST_CONTINUE;
+}
+
+extern int
+onig_print_names(FILE* fp, regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ fprintf(fp, "name table\n");
+ st_foreach(t, i_print_name_entry, (HashDataType )fp);
+ fputs("\n", fp);
+ }
+ return 0;
+}
+#endif
+
+static int
+i_free_name_entry(UChar* key, NameEntry* e, void* arg)
+{
+ xfree(e->name); /* == key */
+ if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
+ return ST_DELETE;
+}
+
+static int
+names_clear(regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ st_foreach(t, i_free_name_entry, 0);
+ }
+ return 0;
+}
+
+extern int
+onig_names_free(regex_t* reg)
+{
+ int r;
+ NameTable* t;
+
+ r = names_clear(reg);
+ if (r) return r;
+
+ t = (NameTable* )reg->name_table;
+ if (IS_NOT_NULL(t)) st_free_table(t);
+ reg->name_table = (void* )NULL;
+ return 0;
+}
+
+static NameEntry*
+name_find(regex_t* reg, UChar* name, UChar* name_end)
+{
+ int len;
+ UChar namebuf[NAMEBUF_SIZE_1];
+ UChar *key;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ e = (NameEntry* )NULL;
+ if (IS_NOT_NULL(t)) {
+ if (*name_end == '\0') {
+ key = name;
+ }
+ else {
+ /* dirty, but st.c API claims NULL terminated key. */
+ len = name_end - name;
+ if (len <= NAMEBUF_SIZE) {
+ xmemcpy(namebuf, name, len);
+ namebuf[len] = '\0';
+ key = namebuf;
+ }
+ else {
+ key = onig_strdup(name, name_end);
+ if (IS_NULL(key)) return (NameEntry* )NULL;
+ }
+ }
+
+ st_lookup(t, (HashDataType )key, (HashDataType * )&e);
+ if (key != name && key != namebuf) xfree(key);
+ }
+ return e;
+}
+
+typedef struct {
+ int (*func)(UChar*,UChar*,int,int*,regex_t*,void*);
+ regex_t* reg;
+ void* arg;
+ int ret;
+} INamesArg;
+
+static int
+i_names(UChar* key, NameEntry* e, INamesArg* arg)
+{
+ int r = (*(arg->func))(e->name, e->name + strlen(e->name), e->back_num,
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
+ arg->reg, arg->arg);
+ if (r != 0) {
+ arg->ret = r;
+ return ST_STOP;
+ }
+ return ST_CONTINUE;
+}
+
+extern int
+onig_foreach_name(regex_t* reg,
+ int (*func)(UChar*,UChar*,int,int*,regex_t*,void*),
+ void* arg)
+{
+ INamesArg narg;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ narg.ret = 0;
+ if (IS_NOT_NULL(t)) {
+ narg.func = func;
+ narg.reg = reg;
+ narg.arg = arg;
+ st_foreach(t, i_names, (HashDataType )&narg);
+ }
+ return narg.ret;
+}
+
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t))
+ return t->num_entries;
+ else
+ return 0;
+}
+
+#else /* USE_ST_HASH_TABLE */
+
+#define INIT_NAMES_ALLOC_NUM 8
+
+typedef struct {
+ NameEntry* e;
+ int num;
+ int alloc;
+} NameTable;
+
+
+#ifdef ONIG_DEBUG
+extern int
+onig_print_names(FILE* fp, regex_t* reg)
+{
+ int i, j;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t) && t->num > 0) {
+ fprintf(fp, "name table\n");
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ fprintf(fp, "%s: ", e->name);
+ if (e->back_num == 0) {
+ fputs("-", fp);
+ }
+ else if (e->back_num == 1) {
+ fprintf(fp, "%d", e->back_ref1);
+ }
+ else {
+ for (j = 0; j < e->back_num; j++) {
+ if (j > 0) fprintf(fp, ", ");
+ fprintf(fp, "%d", e->back_refs[j]);
+ }
+ }
+ fputs("\n", fp);
+ }
+ fputs("\n", fp);
+ }
+ return 0;
+}
+#endif
+
+static int
+names_clear(regex_t* reg)
+{
+ int i;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ if (IS_NOT_NULL(e->name)) {
+ xfree(e->name);
+ e->name = NULL;
+ e->name_len = 0;
+ e->back_num = 0;
+ e->back_alloc = 0;
+ if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
+ e->back_refs = (int* )NULL;
+ }
+ }
+ if (IS_NOT_NULL(t->e)) {
+ xfree(t->e);
+ t->e = NULL;
+ }
+ t->num = 0;
+ }
+ return 0;
+}
+
+extern int
+onig_names_free(regex_t* reg)
+{
+ int r;
+ NameTable* t;
+
+ r = names_clear(reg);
+ if (r) return r;
+
+ t = (NameTable* )reg->name_table;
+ if (IS_NOT_NULL(t)) xfree(t);
+ reg->name_table = NULL;
+ return 0;
+}
+
+static NameEntry*
+name_find(regex_t* reg, UChar* name, UChar* name_end)
+{
+ int i, len;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ len = name_end - name;
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
+ return e;
+ }
+ }
+ return (NameEntry* )NULL;
+}
+
+extern int
+onig_foreach_name(regex_t* reg,
+ int (*func)(UChar*,UChar*,int,int*,regex_t*,void*),
+ void* arg)
+{
+ int i, r;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ r = (*func)(e->name, e->name + e->name_len, e->back_num,
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
+ reg, arg);
+ if (r != 0) return r;
+ }
+ }
+ return 0;
+}
+
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t))
+ return t->num;
+ else
+ return 0;
+}
+
+#endif /* else USE_ST_HASH_TABLE */
+
+static int
+name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
+{
+ int alloc;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (name_end - name <= 0)
+ return ONIGERR_EMPTY_GROUP_NAME;
+
+ e = name_find(reg, name, name_end);
+ if (IS_NULL(e)) {
+#ifdef USE_ST_HASH_TABLE
+ if (IS_NULL(t)) {
+ reg->name_table = t = st_init_strtable();
+ }
+ e = (NameEntry* )xmalloc(sizeof(NameEntry));
+ CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY);
+
+ e->name = onig_strdup(name, name_end);
+ if (IS_NULL(e->name)) return ONIGERR_MEMORY;
+ st_insert(t, (HashDataType )e->name, (HashDataType )e);
+
+ e->name_len = name_end - name;
+ e->back_num = 0;
+ e->back_alloc = 0;
+ e->back_refs = (int* )NULL;
+
+#else
+
+ if (IS_NULL(t)) {
+ alloc = INIT_NAMES_ALLOC_NUM;
+ t = (NameTable* )xmalloc(sizeof(NameTable));
+ CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY);
+ t->e = NULL;
+ t->alloc = 0;
+ t->num = 0;
+
+ t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
+ if (IS_NULL(t->e)) {
+ xfree(t);
+ return ONIGERR_MEMORY;
+ }
+ t->alloc = alloc;
+ reg->name_table = t;
+ goto clear;
+ }
+ else if (t->num == t->alloc) {
+ int i;
+
+ alloc = t->alloc * 2;
+ t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
+ CHECK_NULL_RETURN_VAL(t->e, ONIGERR_MEMORY);
+ t->alloc = alloc;
+
+ clear:
+ for (i = t->num; i < t->alloc; i++) {
+ t->e[i].name = NULL;
+ t->e[i].name_len = 0;
+ t->e[i].back_num = 0;
+ t->e[i].back_alloc = 0;
+ t->e[i].back_refs = (int* )NULL;
+ }
+ }
+ e = &(t->e[t->num]);
+ t->num++;
+ e->name = onig_strdup(name, name_end);
+ e->name_len = name_end - name;
+#endif
+ }
+
+ if (e->back_num >= 1 &&
+ ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
+ onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
+ name, name_end);
+ return ONIGERR_MULTIPLEX_DEFINED_NAME;
+ }
+
+ e->back_num++;
+ if (e->back_num == 1) {
+ e->back_ref1 = backref;
+ }
+ else {
+ if (e->back_num == 2) {
+ alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
+ e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
+ CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
+ e->back_alloc = alloc;
+ e->back_refs[0] = e->back_ref1;
+ e->back_refs[1] = backref;
+ }
+ else {
+ if (e->back_num > e->back_alloc) {
+ alloc = e->back_alloc * 2;
+ e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
+ CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
+ e->back_alloc = alloc;
+ }
+ e->back_refs[e->back_num - 1] = backref;
+ }
+ }
+
+ return 0;
+}
+
+extern int
+onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
+ int** nums)
+{
+ NameEntry* e;
+
+ e = name_find(reg, name, name_end);
+ if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
+
+ switch (e->back_num) {
+ case 0:
+ break;
+ case 1:
+ *nums = &(e->back_ref1);
+ break;
+ default:
+ *nums = e->back_refs;
+ break;
+ }
+ return e->back_num;
+}
+
+extern int
+onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end,
+ OnigRegion *region)
+{
+ int i, n, *nums;
+
+ n = onig_name_to_group_numbers(reg, name, name_end, &nums);
+ if (n < 0)
+ return n;
+ else if (n == 0)
+ return ONIGERR_PARSER_BUG;
+ else if (n == 1)
+ return nums[0];
+ else {
+ if (IS_NOT_NULL(region)) {
+ for (i = n - 1; i >= 0; i--) {
+ if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
+ return nums[i];
+ }
+ }
+ return nums[n - 1];
+ }
+}
+
+#else /* USE_NAMED_GROUP */
+
+extern int
+onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
+ int** nums)
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end,
+ OnigRegion* region)
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onig_foreach_name(regex_t* reg,
+ int (*func)(UChar*,UChar*,int,int*,regex_t*,void*),
+ void* arg)
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ return 0;
+}
+#endif /* else USE_NAMED_GROUP */
+
+
+#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
+
+static void
+scan_env_clear(ScanEnv* env)
+{
+ int i;
+
+ BIT_STATUS_CLEAR(env->capture_history);
+ BIT_STATUS_CLEAR(env->bt_mem_start);
+ BIT_STATUS_CLEAR(env->bt_mem_end);
+ BIT_STATUS_CLEAR(env->backrefed_mem);
+ env->error = (UChar* )NULL;
+ env->error_end = (UChar* )NULL;
+ env->num_call = 0;
+ env->num_mem = 0;
+#ifdef USE_NAMED_GROUP
+ env->num_named = 0;
+#endif
+ env->mem_alloc = 0;
+ env->mem_nodes_dynamic = (Node** )NULL;
+
+ for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
+ env->mem_nodes_static[i] = NULL_NODE;
+}
+
+static int
+scan_env_add_mem_entry(ScanEnv* env)
+{
+ int i, need, alloc;
+ Node** p;
+
+ need = env->num_mem + 1;
+ if (need >= SCANENV_MEMNODES_SIZE) {
+ if (env->mem_alloc <= need) {
+ if (IS_NULL(env->mem_nodes_dynamic)) {
+ alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
+ p = (Node** )xmalloc(sizeof(Node*) * alloc);
+ xmemcpy(p, env->mem_nodes_static,
+ sizeof(Node*) * SCANENV_MEMNODES_SIZE);
+ }
+ else {
+ alloc = env->mem_alloc * 2;
+ p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
+ }
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+
+ for (i = env->num_mem + 1; i < alloc; i++)
+ p[i] = NULL_NODE;
+
+ env->mem_nodes_dynamic = p;
+ env->mem_alloc = alloc;
+ }
+ }
+
+ env->num_mem++;
+ return env->num_mem;
+}
+
+static int
+scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
+{
+ if (env->num_mem >= num)
+ SCANENV_MEM_NODES(env)[num] = node;
+ else
+ return ONIGERR_PARSER_BUG;
+ return 0;
+}
+
+
+#ifdef USE_RECYCLE_NODE
+typedef struct _FreeNode {
+ struct _FreeNode* next;
+} FreeNode;
+
+static FreeNode* FreeNodeList = (FreeNode* )NULL;
+#endif
+
+extern void
+onig_node_free(Node* node)
+{
+ if (IS_NULL(node)) return ;
+
+ switch (NTYPE(node)) {
+ case N_STRING:
+ if (IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
+ xfree(NSTRING(node).s);
+ }
+ break;
+
+ case N_LIST:
+ case N_ALT:
+ onig_node_free(NCONS(node).left);
+ onig_node_free(NCONS(node).right);
+ break;
+
+ case N_CCLASS:
+ if (NCCLASS(node).mbuf)
+ bbuf_free(NCCLASS(node).mbuf);
+ break;
+
+ case N_QUALIFIER:
+ if (NQUALIFIER(node).target)
+ onig_node_free(NQUALIFIER(node).target);
+ break;
+
+ case N_EFFECT:
+ if (NEFFECT(node).target)
+ onig_node_free(NEFFECT(node).target);
+ break;
+
+ case N_BACKREF:
+ if (IS_NOT_NULL(NBACKREF(node).back_dynamic))
+ xfree(NBACKREF(node).back_dynamic);
+ break;
+
+ case N_ANCHOR:
+ if (NANCHOR(node).target)
+ onig_node_free(NANCHOR(node).target);
+ break;
+ }
+
+#ifdef USE_RECYCLE_NODE
+ {
+ FreeNode* n;
+
+ n = (FreeNode* )node;
+ n->next = FreeNodeList;
+ FreeNodeList = n;
+ }
+#else
+ xfree(node);
+#endif
+}
+
+#ifdef USE_RECYCLE_NODE
+extern int
+onig_free_node_list()
+{
+ FreeNode* n;
+
+ THREAD_ATOMIC_START;
+ while (FreeNodeList) {
+ n = FreeNodeList;
+ FreeNodeList = FreeNodeList->next;
+ xfree(n);
+ }
+ THREAD_ATOMIC_END;
+ return 0;
+}
+#endif
+
+static Node*
+node_new()
+{
+ Node* node;
+
+#ifdef USE_RECYCLE_NODE
+ if (IS_NOT_NULL(FreeNodeList)) {
+ node = (Node* )FreeNodeList;
+ FreeNodeList = FreeNodeList->next;
+ return node;
+ }
+#endif
+
+ node = (Node* )xmalloc(sizeof(Node));
+ return node;
+}
+
+
+static void
+initialize_cclass(CClassNode* cc)
+{
+ BITSET_CLEAR(cc->bs);
+ cc->not = 0;
+ cc->mbuf = NULL;
+}
+
+static Node*
+node_new_cclass()
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_CCLASS;
+
+ initialize_cclass(&(NCCLASS(node)));
+ return node;
+}
+
+static Node*
+node_new_ctype(int type)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_CTYPE;
+ NCTYPE(node).type = type;
+ return node;
+}
+
+static Node*
+node_new_anychar()
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_ANYCHAR;
+ return node;
+}
+
+static Node*
+node_new_list(Node* left, Node* right)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_LIST;
+ NCONS(node).left = left;
+ NCONS(node).right = right;
+ return node;
+}
+
+static Node*
+node_new_alt(Node* left, Node* right)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_ALT;
+ NCONS(node).left = left;
+ NCONS(node).right = right;
+ return node;
+}
+
+extern Node*
+onig_node_new_anchor(int type)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_ANCHOR;
+ NANCHOR(node).type = type;
+ NANCHOR(node).target = NULL;
+ NANCHOR(node).char_len = -1;
+ return node;
+}
+
+static Node*
+node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env)
+{
+ int i;
+ Node* node = node_new();
+
+ CHECK_NULL_RETURN(node);
+ node->type = N_BACKREF;
+ NBACKREF(node).state = 0;
+ NBACKREF(node).back_num = back_num;
+ NBACKREF(node).back_dynamic = (int* )NULL;
+ if (by_name != 0)
+ NBACKREF(node).state |= NST_NAME_REF;
+
+ for (i = 0; i < back_num; i++) {
+ if (backrefs[i] <= env->num_mem &&
+ IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
+ NBACKREF(node).state |= NST_RECURSION; /* /...(\1).../ */
+ break;
+ }
+ }
+
+ if (back_num <= NODE_BACKREFS_SIZE) {
+ for (i = 0; i < back_num; i++)
+ NBACKREF(node).back_static[i] = backrefs[i];
+ }
+ else {
+ int* p = (int* )xmalloc(sizeof(int) * back_num);
+ if (IS_NULL(p)) {
+ onig_node_free(node);
+ return NULL;
+ }
+ NBACKREF(node).back_dynamic = p;
+ for (i = 0; i < back_num; i++)
+ p[i] = backrefs[i];
+ }
+ return node;
+}
+
+#ifdef USE_SUBEXP_CALL
+static Node*
+node_new_call(UChar* name, UChar* name_end)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ node->type = N_CALL;
+ NCALL(node).state = 0;
+ NCALL(node).ref_num = CALLNODE_REFNUM_UNDEF;
+ NCALL(node).target = NULL_NODE;
+ NCALL(node).name = name;
+ NCALL(node).name_end = name_end;
+ return node;
+}
+#endif
+
+static Node*
+node_new_qualifier(int lower, int upper, int by_number)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_QUALIFIER;
+ NQUALIFIER(node).target = NULL;
+ NQUALIFIER(node).lower = lower;
+ NQUALIFIER(node).upper = upper;
+ NQUALIFIER(node).greedy = 1;
+ NQUALIFIER(node).by_number = by_number;
+ NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
+ NQUALIFIER(node).head_exact = NULL_NODE;
+ NQUALIFIER(node).next_head_exact = NULL_NODE;
+ NQUALIFIER(node).is_refered = 0;
+ return node;
+}
+
+static Node*
+node_new_effect(int type)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_EFFECT;
+ NEFFECT(node).type = type;
+ NEFFECT(node).state = 0;
+ NEFFECT(node).regnum = 0;
+ NEFFECT(node).option = 0;
+ NEFFECT(node).target = NULL;
+ NEFFECT(node).call_addr = -1;
+ NEFFECT(node).opt_count = 0;
+ return node;
+}
+
+extern Node*
+onig_node_new_effect(int type)
+{
+ return node_new_effect(type);
+}
+
+static Node*
+node_new_effect_memory(OnigOptionType option, int is_named)
+{
+ Node* node = node_new_effect(EFFECT_MEMORY);
+ CHECK_NULL_RETURN(node);
+ if (is_named != 0)
+ SET_EFFECT_STATUS(node, NST_NAMED_GROUP);
+
+#ifdef USE_SUBEXP_CALL
+ NEFFECT(node).option = option;
+#endif
+ return node;
+}
+
+static Node*
+node_new_option(OnigOptionType option)
+{
+ Node* node = node_new_effect(EFFECT_OPTION);
+ CHECK_NULL_RETURN(node);
+ NEFFECT(node).option = option;
+ return node;
+}
+
+extern int
+onig_node_str_cat(Node* node, UChar* s, UChar* end)
+{
+ int addlen = end - s;
+
+ if (addlen > 0) {
+ int len = NSTRING(node).end - NSTRING(node).s;
+
+ if (NSTRING(node).capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
+ UChar* p;
+ int capa = len + addlen + NODE_STR_MARGIN;
+
+ if (capa <= NSTRING(node).capa) {
+ k_strcpy(NSTRING(node).s + len, s, end);
+ }
+ else {
+ if (NSTRING(node).s == NSTRING(node).buf)
+ p = strcat_capa_from_static(NSTRING(node).s, NSTRING(node).end,
+ s, end, capa);
+ else
+ p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa);
+
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ NSTRING(node).s = p;
+ NSTRING(node).capa = capa;
+ }
+ }
+ else {
+ k_strcpy(NSTRING(node).s + len, s, end);
+ }
+ NSTRING(node).end = NSTRING(node).s + len + addlen;
+ }
+
+ return 0;
+}
+
+static int
+node_str_cat_char(Node* node, UChar c)
+{
+ UChar s[1];
+
+ s[0] = c;
+ return onig_node_str_cat(node, s, s + 1);
+}
+
+extern void
+onig_node_conv_to_str_node(Node* node, int flag)
+{
+ node->type = N_STRING;
+
+ NSTRING(node).flag = flag;
+ NSTRING(node).capa = 0;
+ NSTRING(node).s = NSTRING(node).buf;
+ NSTRING(node).end = NSTRING(node).buf;
+}
+
+static Node*
+node_new_str(UChar* s, UChar* end)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ node->type = N_STRING;
+ NSTRING(node).capa = 0;
+ NSTRING(node).flag = 0;
+ NSTRING(node).s = NSTRING(node).buf;
+ NSTRING(node).end = NSTRING(node).buf;
+ if (onig_node_str_cat(node, s, end)) {
+ onig_node_free(node);
+ return NULL;
+ }
+ return node;
+}
+
+static Node*
+node_new_str_raw(UChar* s, UChar* end)
+{
+ Node* node = node_new_str(s, end);
+ NSTRING_SET_RAW(node);
+ return node;
+}
+
+static Node*
+node_new_empty()
+{
+ return node_new_str(NULL, NULL);
+}
+
+static Node*
+node_new_str_char(UChar c)
+{
+ UChar p[1];
+
+ p[0] = c;
+ return node_new_str(p, p + 1);
+}
+
+static Node*
+node_new_str_raw_char(UChar c)
+{
+ UChar p[1];
+
+ p[0] = c;
+ return node_new_str_raw(p, p + 1);
+}
+
+static Node*
+str_node_split_last_char(StrNode* sn, OnigEncoding enc)
+{
+ UChar *p;
+ Node* n = NULL_NODE;
+
+ if (sn->end > sn->s) {
+ p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
+ if (p && p > sn->s) { /* can be splitted. */
+ n = node_new_str(p, sn->end);
+ if ((sn->flag & NSTR_RAW) != 0)
+ NSTRING_SET_RAW(n);
+ sn->end = p;
+ }
+ }
+ return n;
+}
+
+static int
+str_node_can_be_split(StrNode* sn, OnigEncoding enc)
+{
+ if (sn->end > sn->s) {
+ return ((enc_len(enc, *(sn->s)) < sn->end - sn->s) ? 1 : 0);
+ }
+ return 0;
+}
+
+extern int
+onig_scan_unsigned_number(UChar** src, UChar* end, OnigEncoding enc)
+{
+ unsigned int num, val;
+ int c;
+ UChar* p = *src;
+
+ num = 0;
+ while (!PEND) {
+ PFETCH(c);
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ val = (unsigned int )DIGITVAL(c);
+ if ((INT_MAX_LIMIT - val) / 10UL < num)
+ return -1; /* overflow */
+
+ num = num * 10 + val;
+ }
+ else {
+ PUNFETCH;
+ break;
+ }
+ }
+ *src = p;
+ return num;
+}
+
+static int
+scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
+ OnigEncoding enc)
+{
+ int c;
+ unsigned int num, val;
+ UChar* p = *src;
+
+ num = 0;
+ while (!PEND && maxlen-- != 0) {
+ PFETCH(c);
+ if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
+ val = (unsigned int )XDIGITVAL(enc,c);
+ if ((INT_MAX_LIMIT - val) / 16UL < num)
+ return -1; /* overflow */
+
+ num = (num << 4) + XDIGITVAL(enc,c);
+ }
+ else {
+ PUNFETCH;
+ break;
+ }
+ }
+ *src = p;
+ return num;
+}
+
+static int
+scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
+ OnigEncoding enc)
+{
+ int c;
+ unsigned int num, val;
+ UChar* p = *src;
+
+ num = 0;
+ while (!PEND && maxlen-- != 0) {
+ PFETCH(c);
+ if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
+ val = ODIGITVAL(c);
+ if ((INT_MAX_LIMIT - val) / 8UL < num)
+ return -1; /* overflow */
+
+ num = (num << 3) + val;
+ }
+ else {
+ PUNFETCH;
+ break;
+ }
+ }
+ *src = p;
+ return num;
+}
+
+
+#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
+ BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
+
+/* data format:
+ [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
+ (all data size is OnigCodePoint)
+ */
+static int
+new_code_range(BBuf** pbuf)
+{
+#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
+ int r;
+ OnigCodePoint n;
+ BBuf* bbuf;
+
+ bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
+ CHECK_NULL_RETURN_VAL(*pbuf, ONIGERR_MEMORY);
+ r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
+ if (r) return r;
+
+ n = 0;
+ BBUF_WRITE_CODE_POINT(bbuf, 0, n);
+ return 0;
+}
+
+static int
+add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
+{
+ int r, inc_n, pos;
+ int low, high, bound, x;
+ OnigCodePoint n, *data;
+ BBuf* bbuf;
+
+ if (from > to) {
+ n = from; from = to; to = n;
+ }
+
+ if (IS_NULL(*pbuf)) {
+ r = new_code_range(pbuf);
+ if (r) return r;
+ bbuf = *pbuf;
+ n = 0;
+ }
+ else {
+ bbuf = *pbuf;
+ GET_CODE_POINT(n, bbuf->p);
+ }
+ data = (OnigCodePoint* )(bbuf->p);
+ data++;
+
+ for (low = 0, bound = n; low < bound; ) {
+ x = (low + bound) >> 1;
+ if (from > data[x*2 + 1])
+ low = x + 1;
+ else
+ bound = x;
+ }
+
+ for (high = low, bound = n; high < bound; ) {
+ x = (high + bound) >> 1;
+ if (to >= data[x*2] - 1)
+ high = x + 1;
+ else
+ bound = x;
+ }
+
+ inc_n = low + 1 - high;
+ if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
+ return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
+
+ if (inc_n != 1) {
+ if (from > data[low*2])
+ from = data[low*2];
+ if (to < data[(high - 1)*2 + 1])
+ to = data[(high - 1)*2 + 1];
+ }
+
+ if (inc_n != 0 && (OnigCodePoint )high < n) {
+ int from_pos = SIZE_CODE_POINT * (1 + high * 2);
+ int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
+ int size = (n - high) * 2 * SIZE_CODE_POINT;
+
+ if (inc_n > 0) {
+ BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
+ }
+ else {
+ BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
+ }
+ }
+
+ pos = SIZE_CODE_POINT * (1 + low * 2);
+ BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
+ BBUF_WRITE_CODE_POINT(bbuf, pos, from);
+ BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
+ n += inc_n;
+ BBUF_WRITE_CODE_POINT(bbuf, 0, n);
+
+ return 0;
+}
+
+static int
+add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
+{
+ if (from > to) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ return 0;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
+ }
+
+ return add_code_range_to_buf(pbuf, from, to);
+}
+
+static int
+not_code_range_buf(BBuf* bbuf, BBuf** pbuf)
+{
+ int r, i, n;
+ OnigCodePoint pre, from, to, *data;
+
+ *pbuf = (BBuf* )NULL;
+ if (IS_NULL(bbuf)) {
+ set_all:
+ return SET_ALL_MULTI_BYTE_RANGE(pbuf);
+ }
+
+ data = (OnigCodePoint* )(bbuf->p);
+ GET_CODE_POINT(n, data);
+ data++;
+ if (n <= 0) goto set_all;
+
+ r = 0;
+ pre = 0x80;
+ for (i = 0; i < n; i++) {
+ from = data[i*2];
+ to = data[i*2+1];
+ if (pre <= from - 1) {
+ r = add_code_range_to_buf(pbuf, pre, from - 1);
+ if (r != 0) return r;
+ }
+ if (to == ~((OnigCodePoint )0)) break;
+ pre = to + 1;
+ }
+ if (to < ~((OnigCodePoint )0)) {
+ r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
+ }
+ return r;
+}
+
+#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
+ BBuf *tbuf; \
+ int tnot; \
+ tnot = not1; not1 = not2; not2 = tnot; \
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
+} while (0)
+
+static int
+or_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
+{
+ int r;
+ OnigCodePoint i, n1, *data1;
+ OnigCodePoint from, to;
+
+ *pbuf = (BBuf* )NULL;
+ if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
+ if (not1 != 0 || not2 != 0)
+ return SET_ALL_MULTI_BYTE_RANGE(pbuf);
+ return 0;
+ }
+
+ r = 0;
+ if (IS_NULL(bbuf2))
+ SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+
+ if (IS_NULL(bbuf1)) {
+ if (not1 != 0) {
+ return SET_ALL_MULTI_BYTE_RANGE(pbuf);
+ }
+ else {
+ if (not2 == 0) {
+ return bbuf_clone(pbuf, bbuf2);
+ }
+ else {
+ return not_code_range_buf(bbuf2, pbuf);
+ }
+ }
+ }
+
+ if (not1 != 0)
+ SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+
+ data1 = (OnigCodePoint* )(bbuf1->p);
+ GET_CODE_POINT(n1, data1);
+ data1++;
+
+ if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
+ r = bbuf_clone(pbuf, bbuf2);
+ }
+ else if (not1 == 0) { /* 1 OR (not 2) */
+ r = not_code_range_buf(bbuf2, pbuf);
+ }
+ if (r != 0) return r;
+
+ for (i = 0; i < n1; i++) {
+ from = data1[i*2];
+ to = data1[i*2+1];
+ r = add_code_range_to_buf(pbuf, from, to);
+ if (r != 0) return r;
+ }
+ return 0;
+}
+
+static int
+and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
+ OnigCodePoint* data, int n)
+{
+ int i, r;
+ OnigCodePoint from2, to2;
+
+ for (i = 0; i < n; i++) {
+ from2 = data[i*2];
+ to2 = data[i*2+1];
+ if (from2 < from1) {
+ if (to2 < from1) continue;
+ else {
+ from1 = to2 + 1;
+ }
+ }
+ else if (from2 <= to1) {
+ if (to2 < to1) {
+ if (from1 <= from2 - 1) {
+ r = add_code_range_to_buf(pbuf, from1, from2-1);
+ if (r != 0) return r;
+ }
+ from1 = to2 + 1;
+ }
+ else {
+ to1 = from2 - 1;
+ }
+ }
+ else {
+ from1 = from2;
+ }
+ if (from1 > to1) break;
+ }
+ if (from1 <= to1) {
+ r = add_code_range_to_buf(pbuf, from1, to1);
+ if (r != 0) return r;
+ }
+ return 0;
+}
+
+static int
+and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
+{
+ int r;
+ OnigCodePoint i, j, n1, n2, *data1, *data2;
+ OnigCodePoint from, to, from1, to1, from2, to2;
+
+ *pbuf = (BBuf* )NULL;
+ if (IS_NULL(bbuf1)) {
+ if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
+ return bbuf_clone(pbuf, bbuf2);
+ return 0;
+ }
+ else if (IS_NULL(bbuf2)) {
+ if (not2 != 0)
+ return bbuf_clone(pbuf, bbuf1);
+ return 0;
+ }
+
+ if (not1 != 0)
+ SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+
+ data1 = (OnigCodePoint* )(bbuf1->p);
+ data2 = (OnigCodePoint* )(bbuf2->p);
+ GET_CODE_POINT(n1, data1);
+ GET_CODE_POINT(n2, data2);
+ data1++;
+ data2++;
+
+ if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
+ for (i = 0; i < n1; i++) {
+ from1 = data1[i*2];
+ to1 = data1[i*2+1];
+ for (j = 0; j < n2; j++) {
+ from2 = data2[j*2];
+ to2 = data2[j*2+1];
+ if (from2 > to1) break;
+ if (to2 < from1) continue;
+ from = MAX(from1, from2);
+ to = MIN(to1, to2);
+ r = add_code_range_to_buf(pbuf, from, to);
+ if (r != 0) return r;
+ }
+ }
+ }
+ else if (not1 == 0) { /* 1 AND (not 2) */
+ for (i = 0; i < n1; i++) {
+ from1 = data1[i*2];
+ to1 = data1[i*2+1];
+ r = and_code_range1(pbuf, from1, to1, data2, n2);
+ if (r != 0) return r;
+ }
+ }
+
+ return 0;
+}
+
+static int
+and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
+{
+ int r, not1, not2;
+ BBuf *buf1, *buf2, *pbuf;
+ BitSetRef bsr1, bsr2;
+ BitSet bs1, bs2;
+
+ not1 = dest->not;
+ bsr1 = dest->bs;
+ buf1 = dest->mbuf;
+ not2 = cc->not;
+ bsr2 = cc->bs;
+ buf2 = cc->mbuf;
+
+ if (not1 != 0) {
+ bitset_invert_to(bsr1, bs1);
+ bsr1 = bs1;
+ }
+ if (not2 != 0) {
+ bitset_invert_to(bsr2, bs2);
+ bsr2 = bs2;
+ }
+ bitset_and(bsr1, bsr2);
+ if (bsr1 != dest->bs) {
+ bitset_copy(dest->bs, bsr1);
+ bsr1 = dest->bs;
+ }
+ if (not1 != 0) {
+ bitset_invert(dest->bs);
+ }
+
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+ if (not1 != 0 && not2 != 0) {
+ r = or_code_range_buf(buf1, 0, buf2, 0, &pbuf);
+ }
+ else {
+ r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
+ if (r == 0 && not1 != 0) {
+ BBuf *tbuf;
+ r = not_code_range_buf(pbuf, &tbuf);
+ if (r != 0) {
+ bbuf_free(pbuf);
+ return r;
+ }
+ bbuf_free(pbuf);
+ pbuf = tbuf;
+ }
+ }
+ if (r != 0) return r;
+
+ dest->mbuf = pbuf;
+ bbuf_free(buf1);
+ return r;
+ }
+ return 0;
+}
+
+static int
+or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
+{
+ int r, not1, not2;
+ BBuf *buf1, *buf2, *pbuf;
+ BitSetRef bsr1, bsr2;
+ BitSet bs1, bs2;
+
+ not1 = dest->not;
+ bsr1 = dest->bs;
+ buf1 = dest->mbuf;
+ not2 = cc->not;
+ bsr2 = cc->bs;
+ buf2 = cc->mbuf;
+
+ if (not1 != 0) {
+ bitset_invert_to(bsr1, bs1);
+ bsr1 = bs1;
+ }
+ if (not2 != 0) {
+ bitset_invert_to(bsr2, bs2);
+ bsr2 = bs2;
+ }
+ bitset_or(bsr1, bsr2);
+ if (bsr1 != dest->bs) {
+ bitset_copy(dest->bs, bsr1);
+ bsr1 = dest->bs;
+ }
+ if (not1 != 0) {
+ bitset_invert(dest->bs);
+ }
+
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+ if (not1 != 0 && not2 != 0) {
+ r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
+ }
+ else {
+ r = or_code_range_buf(buf1, not1, buf2, not2, &pbuf);
+ if (r == 0 && not1 != 0) {
+ BBuf *tbuf;
+ r = not_code_range_buf(pbuf, &tbuf);
+ if (r != 0) {
+ bbuf_free(pbuf);
+ return r;
+ }
+ bbuf_free(pbuf);
+ pbuf = tbuf;
+ }
+ }
+ if (r != 0) return r;
+
+ dest->mbuf = pbuf;
+ bbuf_free(buf1);
+ return r;
+ }
+ else
+ return 0;
+}
+
+static int
+conv_backslash_value(int c, ScanEnv* env)
+{
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
+ switch (c) {
+ case 'n': return '\n';
+ case 't': return '\t';
+ case 'r': return '\r';
+ case 'f': return '\f';
+ case 'a': return '\007';
+ case 'b': return '\010';
+ case 'e': return '\033';
+ case 'v':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
+ return '\v';
+ break;
+
+ default:
+ break;
+ }
+ }
+ return c;
+}
+
+static int
+is_invalid_qualifier_target(Node* node)
+{
+ switch (NTYPE(node)) {
+ case N_ANCHOR:
+ return 1;
+ break;
+
+ case N_EFFECT:
+ if (NEFFECT(node).type == EFFECT_OPTION)
+ return is_invalid_qualifier_target(NEFFECT(node).target);
+ break;
+
+ case N_LIST: /* ex. (?:\G\A)* */
+ do {
+ if (! is_invalid_qualifier_target(NCONS(node).left)) return 0;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ return 0;
+ break;
+
+ case N_ALT: /* ex. (?:abc|\A)* */
+ do {
+ if (is_invalid_qualifier_target(NCONS(node).left)) return 1;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ default:
+ break;
+ }
+ return 0;
+}
+
+/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
+static int
+popular_qualifier_num(QualifierNode* qf)
+{
+ if (qf->greedy) {
+ if (qf->lower == 0) {
+ if (qf->upper == 1) return 0;
+ else if (IS_REPEAT_INFINITE(qf->upper)) return 1;
+ }
+ else if (qf->lower == 1) {
+ if (IS_REPEAT_INFINITE(qf->upper)) return 2;
+ }
+ }
+ else {
+ if (qf->lower == 0) {
+ if (qf->upper == 1) return 3;
+ else if (IS_REPEAT_INFINITE(qf->upper)) return 4;
+ }
+ else if (qf->lower == 1) {
+ if (IS_REPEAT_INFINITE(qf->upper)) return 5;
+ }
+ }
+ return -1;
+}
+
+extern void
+onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
+{
+#define NQ_ASIS 0 /* as is */
+#define NQ_DEL 1 /* delete parent */
+#define NQ_A 2 /* to '*' */
+#define NQ_AQ 3 /* to '*?' */
+#define NQ_QQ 4 /* to '??' */
+#define NQ_P_QQ 5 /* to '+)??' */
+#define NQ_PQ_Q 6 /* to '+?)?' */
+
+ static char reduces[][6] = {
+ {NQ_DEL, NQ_A, NQ_A, NQ_QQ, NQ_AQ, NQ_ASIS}, /* '?' */
+ {NQ_DEL, NQ_DEL, NQ_DEL, NQ_P_QQ, NQ_P_QQ, NQ_DEL}, /* '*' */
+ {NQ_A, NQ_A, NQ_DEL, NQ_ASIS, NQ_P_QQ, NQ_DEL}, /* '+' */
+ {NQ_DEL, NQ_AQ, NQ_AQ, NQ_DEL, NQ_AQ, NQ_AQ}, /* '??' */
+ {NQ_DEL, NQ_DEL, NQ_DEL, NQ_DEL, NQ_DEL, NQ_DEL}, /* '*?' */
+ {NQ_ASIS, NQ_PQ_Q, NQ_DEL, NQ_AQ, NQ_AQ, NQ_DEL} /* '+?' */
+ };
+
+ int pnum, cnum;
+ QualifierNode *p, *c;
+
+ p = &(NQUALIFIER(pnode));
+ c = &(NQUALIFIER(cnode));
+ pnum = popular_qualifier_num(p);
+ cnum = popular_qualifier_num(c);
+
+ switch(reduces[cnum][pnum]) {
+ case NQ_DEL:
+ *p = *c;
+ break;
+ case NQ_A:
+ p->target = c->target;
+ p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
+ break;
+ case NQ_AQ:
+ p->target = c->target;
+ p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
+ break;
+ case NQ_QQ:
+ p->target = c->target;
+ p->lower = 0; p->upper = 1; p->greedy = 0;
+ break;
+ case NQ_P_QQ:
+ p->target = cnode;
+ p->lower = 0; p->upper = 1; p->greedy = 0;
+ c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
+ return ;
+ break;
+ case NQ_PQ_Q:
+ p->target = cnode;
+ p->lower = 0; p->upper = 1; p->greedy = 1;
+ c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
+ return ;
+ break;
+ case NQ_ASIS:
+ p->target = cnode;
+ return ;
+ break;
+ }
+
+ c->target = NULL_NODE;
+ onig_node_free(cnode);
+}
+
+
+enum TokenSyms {
+ TK_EOT = 0, /* end of token */
+ TK_BYTE = 1,
+ TK_RAW_BYTE = 2,
+ TK_CODE_POINT,
+ TK_ANYCHAR,
+ TK_CHAR_TYPE,
+ TK_BACKREF,
+ TK_CALL,
+ TK_ANCHOR,
+ TK_OP_REPEAT,
+ TK_INTERVAL,
+ TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
+ TK_ALT,
+ TK_SUBEXP_OPEN,
+ TK_SUBEXP_CLOSE,
+ TK_CC_OPEN,
+ TK_QUOTE_OPEN,
+ TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
+ /* in cc */
+ TK_CC_CLOSE,
+ TK_CC_RANGE,
+ TK_POSIX_BRACKET_OPEN,
+ TK_CC_AND, /* && */
+ TK_CC_CC_OPEN /* [ */
+};
+
+typedef struct {
+ enum TokenSyms type;
+ int escaped;
+ int base; /* is number: 8, 16 (used in [....]) */
+ UChar* backp;
+ union {
+ int c;
+ OnigCodePoint code;
+ int anchor;
+ int subtype;
+ struct {
+ int lower;
+ int upper;
+ int greedy;
+ int possessive;
+ } repeat;
+ struct {
+ int num;
+ int ref1;
+ int* refs;
+ int by_name;
+ } backref;
+ struct {
+ UChar* name;
+ UChar* name_end;
+ } call;
+ struct {
+ int not;
+ } prop;
+ } u;
+} OnigToken;
+
+
+static int
+fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
+{
+ int low, up, syn_allow, non_low = 0;
+ int c;
+ UChar* p = *src;
+
+ syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
+
+ if (PEND) {
+ if (syn_allow)
+ return 1; /* "....{" : OK! */
+ else
+ return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
+ }
+
+ if (! syn_allow) {
+ c = PPEEK;
+ if (c == ')' || c == '(' || c == '|') {
+ return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
+ }
+ }
+
+ low = onig_scan_unsigned_number(&p, end, env->enc);
+ if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+ if (low > ONIG_MAX_REPEAT_NUM)
+ return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+
+ if (p == *src) { /* can't read low */
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
+ /* allow {,n} as {0,n} */
+ low = 0;
+ non_low = 1;
+ }
+ else
+ goto invalid;
+ }
+
+ if (PEND) goto invalid;
+ PFETCH(c);
+ if (c == ',') {
+ UChar* prev = p;
+ up = onig_scan_unsigned_number(&p, end, env->enc);
+ if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+ if (up > ONIG_MAX_REPEAT_NUM)
+ return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+
+ if (p == prev) {
+ if (non_low != 0)
+ goto invalid;
+ up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
+ }
+ }
+ else {
+ if (non_low != 0)
+ goto invalid;
+
+ PUNFETCH;
+ up = low; /* {n} : exact n times */
+ }
+
+ if (PEND) goto invalid;
+ PFETCH(c);
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
+ if (c != MC_ESC) goto invalid;
+ PFETCH(c);
+ }
+ if (c != '}') goto invalid;
+
+ if (!IS_REPEAT_INFINITE(up) && low > up) {
+ return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
+ }
+
+ tok->type = TK_INTERVAL;
+ tok->u.repeat.lower = low;
+ tok->u.repeat.upper = up;
+ *src = p;
+ return 0;
+
+ invalid:
+ if (syn_allow)
+ return 1; /* OK */
+ else
+ return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
+}
+
+/* \M-, \C-, \c, or \... */
+static int
+fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
+{
+ int c;
+ UChar* p = *src;
+
+ if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
+
+ PFETCH(c);
+ switch (c) {
+ case 'M':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_META;
+ PFETCH(c);
+ if (c != '-') return ONIGERR_META_CODE_SYNTAX;
+ if (PEND) return ONIGERR_END_PATTERN_AT_META;
+ PFETCH(c);
+ if (c == MC_ESC) {
+ c = fetch_escaped_value(&p, end, env);
+ if (c < 0) return c;
+ }
+ c = ((c & 0xff) | 0x80);
+ }
+ else
+ goto backslash;
+ break;
+
+ case 'C':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
+ PFETCH(c);
+ if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
+ goto control;
+ }
+ else
+ goto backslash;
+
+ case 'c':
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
+ control:
+ if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
+ PFETCH(c);
+ if (c == MC_ESC) {
+ c = fetch_escaped_value(&p, end, env);
+ if (c < 0) return c;
+ }
+ else if (c == '?')
+ c = 0177;
+ else
+ c &= 0x9f;
+ break;
+ }
+ /* fall through */
+
+ default:
+ {
+ backslash:
+ c = conv_backslash_value(c, env);
+ }
+ break;
+ }
+
+ *src = p;
+ return c;
+}
+
+static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
+
+#ifdef USE_NAMED_GROUP
+/*
+ def: 0 -> define name (don't allow number name)
+ 1 -> reference name (allow number name)
+*/
+static int
+fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
+{
+ int r, len, is_num;
+ int c = 0;
+ UChar *name_end;
+ UChar *p = *src;
+
+ name_end = end;
+ r = 0;
+ is_num = 0;
+ if (PEND) {
+ return ONIGERR_EMPTY_GROUP_NAME;
+ }
+ else {
+ PFETCH(c);
+ if (c == '>')
+ return ONIGERR_EMPTY_GROUP_NAME;
+
+ if (ONIGENC_IS_CODE_DIGIT(env->enc, c)) {
+ if (ref == 1)
+ is_num = 1;
+ else {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+ }
+ len = enc_len(env->enc, c);
+ while (!PEND && len-- > 1)
+ PFETCH(c);
+ }
+
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (c == '>' || c == ')') break;
+
+ len = enc_len(env->enc, c);
+ if (is_num == 1) {
+ if (! ONIGENC_IS_CODE_DIGIT(env->enc, c)) {
+ if (!ONIGENC_IS_CODE_ALPHA(env->enc, c) && c != '_')
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ else
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+ }
+ else {
+ if (len == 1) {
+ if (!ONIGENC_IS_CODE_ALPHA(env->enc, c) &&
+ !ONIGENC_IS_CODE_DIGIT(env->enc, c) &&
+ c != '_') {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+ }
+
+ while (!PEND && len-- > 1)
+ PFETCH(c);
+ }
+ if (c != '>') {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
+ }
+ else {
+ c = **src;
+ if (ONIGENC_IS_CODE_UPPER(env->enc, c))
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+
+ if (r == 0) {
+ *rname_end = name_end;
+ *src = p;
+ return 0;
+ }
+ else {
+ onig_scan_env_set_error_string(env, r, *src, name_end);
+ return r;
+ }
+}
+#else
+static int
+fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
+{
+ int r, len;
+ int c = 0;
+ UChar *name_end;
+ UChar *p = *src;
+
+ r = 0;
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (enc_len(env->enc, c) > 1)
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+
+ if (c == '>' || c == ')') break;
+ if (! ONIGENC_IS_CODE_DIGIT(env->enc, c))
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ if (c != '>') {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
+ }
+
+ if (r == 0) {
+ *rname_end = name_end;
+ *src = p;
+ return 0;
+ }
+ else {
+ err:
+ onig_scan_env_set_error_string(env, r, *src, name_end);
+ return r;
+ }
+}
+#endif
+
+static void
+CC_ESC_WARN(ScanEnv* env, UChar *c)
+{
+ if (onig_warn == onig_null_warn) return ;
+
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
+ char buf[WARN_BUFSIZE];
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ env->pattern, env->pattern_end,
+ "character class has '%s' without escape", c);
+ (*onig_warn)(buf);
+ }
+}
+
+static void
+CCEND_ESC_WARN(ScanEnv* env, UChar* c)
+{
+ if (onig_warn == onig_null_warn) return ;
+
+ if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
+ char buf[WARN_BUFSIZE];
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
+ (env)->pattern, (env)->pattern_end,
+ "regular expression has '%s' without escape", c);
+ (*onig_warn)(buf);
+ }
+}
+
+static UChar*
+find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
+ UChar **next, OnigEncoding enc)
+{
+ int i;
+ OnigCodePoint x;
+ UChar *q;
+ UChar *p = from;
+
+ while (p < to) {
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
+ q = p + enc_len(enc, *p);
+ if (x == s[0]) {
+ for (i = 1; i < n && q < to; i++) {
+ x = ONIGENC_MBC_TO_CODE(enc, q, to);
+ if (x != s[i]) break;
+ q += enc_len(enc, *q);
+ }
+ if (i >= n) {
+ if (IS_NOT_NULL(next))
+ *next = q;
+ return p;
+ }
+ }
+ p = q;
+ }
+ return NULL_UCHARP;
+}
+
+static int
+str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
+ OnigCodePoint bad, OnigEncoding enc)
+{
+ int i, in_esc;
+ OnigCodePoint x;
+ UChar *q;
+ UChar *p = from;
+
+ in_esc = 0;
+ while (p < to) {
+ if (in_esc) {
+ in_esc = 0;
+ p += enc_len(enc, *p);
+ }
+ else {
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
+ q = p + enc_len(enc, *p);
+ if (x == s[0]) {
+ for (i = 1; i < n && q < to; i++) {
+ x = ONIGENC_MBC_TO_CODE(enc, q, to);
+ if (x != s[i]) break;
+ q += enc_len(enc, *q);
+ }
+ if (i >= n) return 1;
+ p += enc_len(enc, *p);
+ }
+ else {
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
+ if (x == bad) return 0;
+ else if (x == MC_ESC) in_esc = 1;
+ p = q;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
+{
+ int c, num;
+ OnigSyntaxType* syn = env->syntax;
+ UChar* prev;
+ UChar* p = *src;
+
+ if (PEND) {
+ tok->type = TK_EOT;
+ return tok->type;
+ }
+
+ PFETCH(c);
+ tok->type = TK_BYTE;
+ tok->base = 0;
+ tok->u.c = c;
+ if (c == ']') {
+ tok->type = TK_CC_CLOSE;
+ }
+ else if (c == '-') {
+ tok->type = TK_CC_RANGE;
+ }
+ else if (c == MC_ESC) {
+ if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
+ goto end;
+
+ if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
+
+ PFETCH(c);
+ tok->escaped = 1;
+ tok->u.c = c;
+ switch (c) {
+ case 'w':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_WORD;
+ break;
+ case 'W':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_WORD;
+ break;
+ case 'd':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_DIGIT;
+ break;
+ case 'D':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_DIGIT;
+ break;
+ case 's':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_WHITE_SPACE;
+ break;
+ case 'S':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
+ break;
+
+ case 'p':
+ case 'P':
+ if (PPEEK == '{' &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) {
+ PINC;
+ tok->type = TK_CHAR_PROPERTY;
+ tok->u.prop.not = (c == 'P' ? 1 : 0);
+ }
+ break;
+
+ case 'x':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK == '{' && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
+ PINC;
+ num = scan_unsigned_hexadecimal_number(&p, end, 8, env->enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND && ONIGENC_IS_CODE_XDIGIT(env->enc, *p) && p - prev >= 9)
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+
+ if (p > prev + 1 && !PEND && PPEEK == '}') {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
+ }
+ else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 2, env->enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 16;
+ tok->u.c = num;
+ }
+ break;
+
+ case 'u':
+ if (PEND) break;
+
+ prev = p;
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 4, env->enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 16;
+ tok->u.c = num;
+ }
+ break;
+
+ case '0':
+ case '1': case '2': case '3': case '4': case '5': case '6': case '7':
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
+ PUNFETCH;
+ prev = p;
+ num = scan_unsigned_octal_number(&p, end, 3, env->enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 8;
+ tok->u.c = num;
+ }
+ break;
+
+ default:
+ PUNFETCH;
+ num = fetch_escaped_value(&p, end, env);
+ if (num < 0) return num;
+ if (tok->u.c != num) {
+ tok->u.c = num;
+ tok->type = TK_RAW_BYTE;
+ }
+ break;
+ }
+ }
+ else if (c == '[') {
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && PPEEK == ':') {
+ OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
+ tok->backp = p; /* point at '[' is readed */
+ PINC;
+ if (str_exist_check_with_esc(send, 2, p, end, (OnigCodePoint )']',
+ env->enc)) {
+ tok->type = TK_POSIX_BRACKET_OPEN;
+ }
+ else {
+ PUNFETCH;
+ goto cc_in_cc;
+ }
+ }
+ else {
+ cc_in_cc:
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
+ tok->type = TK_CC_CC_OPEN;
+ }
+ else {
+ CC_ESC_WARN(env, "[");
+ }
+ }
+ }
+ else if (c == '&') {
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
+ !PEND && PPEEK == '&') {
+ PINC;
+ tok->type = TK_CC_AND;
+ }
+ }
+
+ end:
+ *src = p;
+ return tok->type;
+}
+
+static int
+fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
+{
+ int r, c, num;
+ OnigSyntaxType* syn = env->syntax;
+ UChar* prev;
+ UChar* p = *src;
+
+ start:
+ if (PEND) {
+ tok->type = TK_EOT;
+ return tok->type;
+ }
+
+ tok->type = TK_BYTE;
+ tok->base = 0;
+ PFETCH(c);
+ if (c == MC_ESC) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
+
+ PFETCH(c);
+ tok->u.c = c;
+ tok->escaped = 1;
+ switch (c) {
+ case '*':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '+':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 1;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '?':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = 1;
+ greedy_check:
+ if (!PEND && PPEEK == '?' &&
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
+ PFETCH(c);
+ tok->u.repeat.greedy = 0;
+ tok->u.repeat.possessive = 0;
+ }
+ else if (!PEND && PPEEK == '+' &&
+ ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
+ tok->type != TK_INTERVAL) ||
+ (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
+ tok->type == TK_INTERVAL))) {
+ PFETCH(c);
+ tok->u.repeat.greedy = 1;
+ tok->u.repeat.possessive = 1;
+ }
+ else {
+ tok->u.repeat.greedy = 1;
+ tok->u.repeat.possessive = 0;
+ }
+ break;
+
+ case '{':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
+ tok->backp = p;
+ r = fetch_range_qualifier(&p, end, tok, env);
+ if (r < 0) return r; /* error */
+ if (r > 0) {
+ /* normal char */
+ }
+ else
+ goto greedy_check;
+ break;
+
+ case '|':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
+ tok->type = TK_ALT;
+ break;
+
+ case '(':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_OPEN;
+ break;
+
+ case ')':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_CLOSE;
+ break;
+
+ case 'w':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_WORD;
+ break;
+
+ case 'W':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_WORD;
+ break;
+
+ case 'b':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_WORD_BOUND;
+ break;
+
+ case 'B':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
+ break;
+
+#ifdef USE_WORD_BEGIN_END
+ case '<':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_WORD_BEGIN;
+ break;
+
+ case '>':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_WORD_END;
+ break;
+#endif
+
+ case 's':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_WHITE_SPACE;
+ break;
+
+ case 'S':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
+ break;
+
+ case 'd':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_DIGIT;
+ break;
+
+ case 'D':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_DIGIT;
+ break;
+
+ case 'A':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
+ begin_buf:
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_BEGIN_BUF;
+ break;
+
+ case 'Z':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_SEMI_END_BUF;
+ break;
+
+ case 'z':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
+ end_buf:
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_END_BUF;
+ break;
+
+ case 'G':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_BEGIN_POSITION;
+ break;
+
+ case '`':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
+ goto begin_buf;
+ break;
+
+ case '\'':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
+ goto end_buf;
+ break;
+
+ case 'x':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK == '{' && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
+ PINC;
+ num = scan_unsigned_hexadecimal_number(&p, end, 8, env->enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND && ONIGENC_IS_CODE_XDIGIT(env->enc, *p) && p - prev >= 9)
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+
+ if (p > prev + 1 && !PEND && PPEEK == '}') {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
+ }
+ else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 2, env->enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 16;
+ tok->u.c = num;
+ }
+ break;
+
+ case 'u':
+ if (PEND) break;
+
+ prev = p;
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 4, env->enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 16;
+ tok->u.c = num;
+ }
+ break;
+
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ PUNFETCH;
+ prev = p;
+ num = onig_scan_unsigned_number(&p, end, env->enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (num > ONIG_MAX_BACKREF_NUM) return ONIGERR_TOO_BIG_BACKREF_NUMBER;
+
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
+ (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+ if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
+ return ONIGERR_INVALID_BACKREF;
+ }
+
+ tok->type = TK_BACKREF;
+ tok->u.backref.num = 1;
+ tok->u.backref.ref1 = num;
+ tok->u.backref.by_name = 0;
+ break;
+ }
+ else if (c == '8' || c == '9') {
+ /* normal char */
+ p = prev; PINC;
+ break;
+ }
+
+ p = prev;
+ /* fall through */
+ case '0':
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
+ prev = p;
+ num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), env->enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 8;
+ tok->u.c = num;
+ }
+ else if (c != '0') {
+ PINC;
+ }
+ break;
+
+#ifdef USE_NAMED_GROUP
+ case 'k':
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
+ PFETCH(c);
+ if (c == '<') {
+ UChar* name_end;
+ int* backs;
+
+ prev = p;
+ r = fetch_name(&p, end, &name_end, env, 1);
+ if (r < 0) return r;
+ num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ if (num <= 0) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+ int i;
+ for (i = 0; i < num; i++) {
+ if (backs[i] > env->num_mem ||
+ IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
+ return ONIGERR_INVALID_BACKREF;
+ }
+ }
+
+ tok->type = TK_BACKREF;
+ tok->u.backref.by_name = 1;
+ if (num == 1) {
+ tok->u.backref.num = 1;
+ tok->u.backref.ref1 = backs[0];
+ }
+ else {
+ tok->u.backref.num = num;
+ tok->u.backref.refs = backs;
+ }
+ }
+ else
+ PUNFETCH;
+ }
+ break;
+#endif
+
+#ifdef USE_SUBEXP_CALL
+ case 'g':
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
+ PFETCH(c);
+ if (c == '<') {
+ UChar* name_end;
+
+ prev = p;
+ r = fetch_name(&p, end, &name_end, env, 1);
+ if (r < 0) return r;
+
+ tok->type = TK_CALL;
+ tok->u.call.name = prev;
+ tok->u.call.name_end = name_end;
+ }
+ else
+ PUNFETCH;
+ }
+ break;
+#endif
+
+ case 'Q':
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
+ tok->type = TK_QUOTE_OPEN;
+ }
+ break;
+
+ case 'p':
+ case 'P':
+ if (PPEEK == '{' &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) {
+ PINC;
+ tok->type = TK_CHAR_PROPERTY;
+ tok->u.prop.not = (c == 'P' ? 1 : 0);
+ }
+ break;
+
+ default:
+ PUNFETCH;
+ num = fetch_escaped_value(&p, end, env);
+ if (num < 0) return num;
+ /* set_raw: */
+ if (tok->u.c != num) {
+ tok->type = TK_RAW_BYTE;
+ tok->u.c = num;
+ }
+ break;
+ }
+ }
+ else {
+ tok->u.c = c;
+ tok->escaped = 0;
+
+#ifdef USE_VARIABLE_META_CHARS
+ if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
+ if (c == MC_ANYCHAR)
+ goto any_char;
+ else if (c == MC_ANYTIME)
+ goto anytime;
+ else if (c == MC_ZERO_OR_ONE_TIME)
+ goto zero_or_one_time;
+ else if (c == MC_ONE_OR_MORE_TIME)
+ goto one_or_more_time;
+ else if (c == MC_ANYCHAR_ANYTIME) {
+ tok->type = TK_ANYCHAR_ANYTIME;
+ goto out;
+ }
+ }
+#endif
+
+ switch (c) {
+ case '.':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
+ any_char:
+ tok->type = TK_ANYCHAR;
+ break;
+
+ case '*':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
+ anytime:
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '+':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
+ one_or_more_time:
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 1;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '?':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
+ zero_or_one_time:
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = 1;
+ goto greedy_check;
+ break;
+
+ case '{':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
+ tok->backp = p;
+ r = fetch_range_qualifier(&p, end, tok, env);
+ if (r < 0) return r; /* error */
+ if (r > 0) {
+ /* normal char */
+ }
+ else
+ goto greedy_check;
+ break;
+
+ case '|':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
+ tok->type = TK_ALT;
+ break;
+
+ case '(':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_OPEN;
+ break;
+
+ case ')':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_CLOSE;
+ break;
+
+ case '^':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = (IS_SINGLELINE(env->option)
+ ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
+ break;
+
+ case '$':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = (IS_SINGLELINE(env->option)
+ ? ANCHOR_END_BUF : ANCHOR_END_LINE);
+ break;
+
+ case '[':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
+ tok->type = TK_CC_OPEN;
+ break;
+
+ case ']':
+ if (*src > env->pattern) /* /].../ is allowed. */
+ CCEND_ESC_WARN(env, "]");
+ break;
+
+ case '#':
+ if (IS_EXTEND(env->option)) {
+ while (!PEND) {
+ PFETCH(c);
+ if (ONIG_IS_NEWLINE(c))
+ break;
+ }
+ goto start;
+ break;
+ }
+ break;
+
+ case ' ': case '\t': case '\n': case '\r': case '\f':
+ if (IS_EXTEND(env->option))
+ goto start;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ out:
+ *src = p;
+ return tok->type;
+}
+
+static int
+add_ctype_to_cc_by_list(CClassNode* cc, int ctype, int not,
+ OnigEncoding enc)
+{
+ int i, r, nsb, nmb;
+ OnigCodePointRange *sbr, *mbr;
+ OnigCodePoint j;
+
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &nsb, &nmb, &sbr, &mbr);
+ if (r != 0) return r;
+
+ if (not == 0) {
+ for (i = 0; i < nsb; i++) {
+ for (j = sbr[i].from; j <= sbr[i].to; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ }
+ for (i = 0; i < nmb; i++) {
+ r = add_code_range_to_buf(&(cc->mbuf), mbr[i].from, mbr[i].to);
+ if (r != 0) return r;
+ }
+ }
+ else {
+ OnigCodePoint prev = 0;
+ for (i = 0; i < nsb; i++) {
+ for (j = prev; j < sbr[i].from; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ prev = sbr[i].to + 1;
+ }
+ if (prev < 0x7f) {
+ for (j = prev; j < 0x7f; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ }
+
+ prev = 0x80;
+ for (i = 0; i < nmb; i++) {
+ if (prev < mbr[i].from) {
+ r = add_code_range_to_buf(&(cc->mbuf), prev, mbr[i].from - 1);
+ if (r != 0) return r;
+ }
+ prev = mbr[i].to + 1;
+ }
+ if (prev < 0x7fffffff) {
+ r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
+ if (r != 0) return r;
+ }
+ }
+
+ return r;
+}
+
+static int
+add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
+{
+ int c, r;
+ OnigEncoding enc = env->enc;
+
+ if (ONIGENC_CTYPE_SUPPORT_LEVEL(enc) != ONIGENC_CTYPE_SUPPORT_LEVEL_SB) {
+ r = add_ctype_to_cc_by_list(cc, ctype, not, env->enc);
+ return r;
+ }
+
+ r = 0;
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ case ONIGENC_CTYPE_BLANK:
+ case ONIGENC_CTYPE_CNTRL:
+ case ONIGENC_CTYPE_DIGIT:
+ case ONIGENC_CTYPE_LOWER:
+ case ONIGENC_CTYPE_PUNCT:
+ case ONIGENC_CTYPE_SPACE:
+ case ONIGENC_CTYPE_UPPER:
+ case ONIGENC_CTYPE_XDIGIT:
+ case ONIGENC_CTYPE_ASCII:
+ case ONIGENC_CTYPE_ALNUM:
+ if (not != 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ }
+ break;
+
+ case ONIGENC_CTYPE_GRAPH:
+ case ONIGENC_CTYPE_PRINT:
+ if (not != 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ break;
+
+ case ONIGENC_CTYPE_WORD:
+ if (not == 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! ONIGENC_IS_CODE_SB_WORD(enc, c) && ! ONIGENC_IS_MBC_HEAD(enc, c))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ }
+ break;
+
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+parse_ctype_to_enc_ctype(int pctype, int* not)
+{
+ int ctype;
+
+ switch (pctype) {
+ case CTYPE_WORD:
+ ctype = ONIGENC_CTYPE_WORD;
+ *not = 0;
+ break;
+ case CTYPE_NOT_WORD:
+ ctype = ONIGENC_CTYPE_WORD;
+ *not = 1;
+ break;
+ case CTYPE_WHITE_SPACE:
+ ctype = ONIGENC_CTYPE_SPACE;
+ *not = 0;
+ break;
+ case CTYPE_NOT_WHITE_SPACE:
+ ctype = ONIGENC_CTYPE_SPACE;
+ *not = 1;
+ break;
+ case CTYPE_DIGIT:
+ ctype = ONIGENC_CTYPE_DIGIT;
+ *not = 0;
+ break;
+ case CTYPE_NOT_DIGIT:
+ ctype = ONIGENC_CTYPE_DIGIT;
+ *not = 1;
+ break;
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+ return ctype;
+}
+
+typedef struct {
+ UChar *name;
+ int ctype;
+ short int len;
+} PosixBracketEntryType;
+
+static int
+parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
+{
+#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
+#define POSIX_BRACKET_NAME_MAX_LEN 6
+
+ static PosixBracketEntryType PBS[] = {
+ { "alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { "alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { "blank", ONIGENC_CTYPE_BLANK, 5 },
+ { "cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { "digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { "graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { "lower", ONIGENC_CTYPE_LOWER, 5 },
+ { "print", ONIGENC_CTYPE_PRINT, 5 },
+ { "punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { "space", ONIGENC_CTYPE_SPACE, 5 },
+ { "upper", ONIGENC_CTYPE_UPPER, 5 },
+ { "xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { "ascii", ONIGENC_CTYPE_ASCII, 5 }, /* I don't know origin. Perl? */
+ { (UChar* )NULL, -1, 0 }
+ };
+
+ PosixBracketEntryType *pb;
+ int not, i, c, r;
+ UChar *p = *src;
+
+ if (PPEEK == '^') {
+ PINC;
+ not = 1;
+ }
+ else
+ not = 0;
+
+ if (end - p < POSIX_BRACKET_NAME_MAX_LEN + 1)
+ goto not_posix_bracket;
+
+ for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
+ if (onig_strncmp(p, pb->name, pb->len) == 0) {
+ p += pb->len;
+ if (end - p < 2 || *p != ':' || *(p+1) != ']')
+ return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
+
+ r = add_ctype_to_cc(cc, pb->ctype, not, env);
+ if (r != 0) return r;
+
+ PINC; PINC;
+ *src = p;
+ return 0;
+ }
+ }
+
+ not_posix_bracket:
+ c = 0;
+ i = 0;
+ while (!PEND && ((c = PPEEK) != ':') && c != ']') {
+ PINC;
+ if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
+ }
+ if (c == ':' && !PEND) {
+ PINC;
+ if (!PEND) {
+ PFETCH(c);
+ if (c == ']')
+ return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
+ }
+ }
+
+ return 1; /* 1: is not POSIX bracket, but no error. */
+}
+
+static int
+property_name_to_ctype(UChar* p, UChar* end)
+{
+ static PosixBracketEntryType PBS[] = {
+ { "Alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { "Alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { "Blank", ONIGENC_CTYPE_BLANK, 5 },
+ { "Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { "Digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { "Graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { "Lower", ONIGENC_CTYPE_LOWER, 5 },
+ { "Print", ONIGENC_CTYPE_PRINT, 5 },
+ { "Punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { "Space", ONIGENC_CTYPE_SPACE, 5 },
+ { "Upper", ONIGENC_CTYPE_UPPER, 5 },
+ { "XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { "ASCII", ONIGENC_CTYPE_ASCII, 5 },
+ { (UChar* )NULL, -1, 0 }
+ };
+
+ PosixBracketEntryType *pb;
+ int len;
+
+ len = end - p;
+ for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
+ if (len == pb->len && onig_strncmp(p, pb->name, pb->len) == 0)
+ return pb->ctype;
+ }
+
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+}
+
+static int
+fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
+{
+ int ctype;
+ UChar *prev, *p = *src;
+ int c = 0;
+
+ while (!PEND) {
+ prev = p;
+ PFETCH(c);
+ if (c == '}') {
+ ctype = property_name_to_ctype(*src, prev);
+ if (ctype < 0) return ctype;
+
+ *src = p;
+ return ctype;
+ }
+ else if (c == '(' || c == ')' || c == '{' || c == '|')
+ break;
+ }
+
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+}
+
+static int
+parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
+ ScanEnv* env)
+{
+ int r, ctype;
+ CClassNode* cc;
+
+ ctype = fetch_char_property_to_ctype(src, end, env);
+ if (ctype < 0) return ctype;
+
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ cc = &(NCCLASS(*np));
+ r = add_ctype_to_cc(cc, ctype, 0, env);
+ if (r != 0) return r;
+ if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc);
+
+ return 0;
+}
+
+
+enum CCSTATE {
+ CCS_VALUE,
+ CCS_RANGE,
+ CCS_COMPLETE,
+ CCS_START
+};
+
+enum CCVALTYPE {
+ CCV_SB,
+ CCV_CODE_POINT,
+ CCV_CLASS
+};
+
+static int
+next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
+ enum CCSTATE* state, ScanEnv* env)
+{
+ int r;
+
+ if (*state == CCS_RANGE)
+ return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
+
+ if (*state == CCS_VALUE && *type != CCV_CLASS) {
+ if (*type == CCV_SB)
+ BITSET_SET_BIT(cc->bs, (int )(*vs));
+ else if (*type == CCV_CODE_POINT) {
+ r = add_code_range(&(cc->mbuf), env, *vs, *vs);
+ if (r < 0) return r;
+ }
+ }
+
+ *state = CCS_VALUE;
+ *type = CCV_CLASS;
+ return 0;
+}
+
+static int
+next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
+ int* vs_israw, int v_israw,
+ enum CCVALTYPE intype, enum CCVALTYPE* type,
+ enum CCSTATE* state, ScanEnv* env)
+{
+ int r;
+
+ switch (*state) {
+ case CCS_VALUE:
+ if (*type == CCV_SB)
+ BITSET_SET_BIT(cc->bs, (int )(*vs));
+ else if (*type == CCV_CODE_POINT) {
+ r = add_code_range(&(cc->mbuf), env, *vs, *vs);
+ if (r < 0) return r;
+ }
+ break;
+
+ case CCS_RANGE:
+ if (intype == *type) {
+ if (intype == CCV_SB) {
+ if (*vs > v) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ goto ccs_range_end;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
+ }
+ bitset_set_range(cc->bs, (int )*vs, (int )v);
+ }
+ else {
+ r = add_code_range(&(cc->mbuf), env, *vs, v);
+ if (r < 0) return r;
+ }
+ }
+ else {
+ if (intype == CCV_CODE_POINT && *type == CCV_SB &&
+ ONIGENC_IS_CONTINUOUS_SB_MB(env->enc)) {
+ bitset_set_range(cc->bs, (int )*vs, 0x7f);
+ r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )0x80, v);
+ if (r < 0) return r;
+ }
+ else
+ return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
+ }
+ ccs_range_end:
+ *state = CCS_COMPLETE;
+ break;
+
+ case CCS_COMPLETE:
+ case CCS_START:
+ *state = CCS_VALUE;
+ break;
+
+ default:
+ break;
+ }
+
+ *vs_israw = v_israw;
+ *vs = v;
+ *type = intype;
+ return 0;
+}
+
+static int
+char_exist_check(UChar c, UChar* from, UChar* to, int ignore_escaped,
+ OnigEncoding enc)
+{
+ int in_esc;
+ UChar* p = from;
+
+ in_esc = 0;
+ while (p < to) {
+ if (ignore_escaped && in_esc) {
+ in_esc = 0;
+ }
+ else {
+ if (*p == c) return 1;
+ if (*p == MC_ESC) in_esc = 1;
+ }
+ p += enc_len(enc, *p);
+ }
+ return 0;
+}
+
+static int
+parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
+ ScanEnv* env)
+{
+ int r, neg, len, fetched, and_start;
+ OnigCodePoint v, vs;
+ UChar *p;
+ Node* node;
+ CClassNode *cc, *prev_cc;
+ CClassNode work_cc;
+
+ enum CCSTATE state;
+ enum CCVALTYPE val_type, in_type;
+ int val_israw, in_israw;
+
+ prev_cc = (CClassNode* )NULL;
+ *np = NULL_NODE;
+ r = fetch_token_in_cc(tok, src, end, env);
+ if (r == TK_BYTE && tok->u.c == '^' && tok->escaped == 0) {
+ neg = 1;
+ r = fetch_token_in_cc(tok, src, end, env);
+ }
+ else {
+ neg = 0;
+ }
+
+ if (r < 0) return r;
+ if (r == TK_CC_CLOSE) {
+ if (! char_exist_check(']', *src, env->pattern_end, 1, env->enc))
+ return ONIGERR_EMPTY_CHAR_CLASS;
+
+ CC_ESC_WARN(env, "]");
+ r = tok->type = TK_BYTE; /* allow []...] */
+ }
+
+ *np = node = node_new_cclass();
+ CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
+ cc = &(NCCLASS(node));
+
+ and_start = 0;
+ state = CCS_START;
+ p = *src;
+ while (r != TK_CC_CLOSE) {
+ fetched = 0;
+ switch (r) {
+ case TK_BYTE:
+ len = enc_len(env->enc, tok->u.c);
+ if (len > 1) {
+ PUNFETCH;
+ v = ONIGENC_MBC_TO_CODE(env->enc, p, end);
+ p += len;
+ in_type = CCV_CODE_POINT;
+ }
+ else {
+ sb_char:
+ v = (OnigCodePoint )tok->u.c;
+ in_type = CCV_SB;
+ }
+ in_israw = 0;
+ goto val_entry2;
+ break;
+
+ case TK_RAW_BYTE:
+ len = enc_len(env->enc, tok->u.c);
+ if (len > 1 && tok->base != 0) { /* tok->base != 0 : octal or hexadec. */
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ UChar* bufp = buf;
+ UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
+ int i, base = tok->base;
+
+ if (len > ONIGENC_CODE_TO_MBC_MAXLEN) {
+ bufp = (UChar* )xmalloc(len);
+ if (IS_NULL(bufp)) {
+ r = ONIGERR_MEMORY;
+ goto err;
+ }
+ bufe = bufp + len;
+ }
+ bufp[0] = tok->u.c;
+ for (i = 1; i < len; i++) {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto raw_byte_err;
+ if (r != TK_RAW_BYTE || tok->base != base) break;
+ bufp[i] = tok->u.c;
+ }
+ if (i < len) {
+ r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
+ raw_byte_err:
+ if (bufp != buf) xfree(bufp);
+ goto err;
+ }
+ v = ONIGENC_MBC_TO_CODE(env->enc, bufp, bufe);
+ if (bufp != buf) xfree(bufp);
+ in_type = CCV_CODE_POINT;
+ }
+ else {
+ v = (OnigCodePoint )tok->u.c;
+ in_type = CCV_SB;
+ }
+ in_israw = 1;
+ goto val_entry2;
+ break;
+
+ case TK_CODE_POINT:
+ v = tok->u.code;
+ in_israw = 1;
+ val_entry:
+ len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
+ if (len < 0) {
+ r = len;
+ goto err;
+ }
+ in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
+ val_entry2:
+ r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
+ &state, env);
+ if (r != 0) goto err;
+ break;
+
+ case TK_POSIX_BRACKET_OPEN:
+ r = parse_posix_bracket(cc, &p, end, env);
+ if (r < 0) goto err;
+ if (r == 1) { /* is not POSIX bracket */
+ CC_ESC_WARN(env, "[");
+ p = tok->backp;
+ v = (OnigCodePoint )tok->u.c;
+ in_israw = 0;
+ goto val_entry;
+ }
+ goto next_class;
+ break;
+
+ case TK_CHAR_TYPE:
+ {
+ int ctype, not;
+ ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
+ r = add_ctype_to_cc(cc, ctype, not, env);
+ if (r != 0) return r;
+ }
+
+ next_class:
+ r = next_state_class(cc, &vs, &val_type, &state, env);
+ if (r != 0) goto err;
+ break;
+
+ case TK_CHAR_PROPERTY:
+ {
+ int ctype;
+
+ ctype = fetch_char_property_to_ctype(&p, end, env);
+ if (ctype < 0) return ctype;
+ r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
+ if (r != 0) return r;
+ goto next_class;
+ }
+ break;
+
+ case TK_CC_RANGE:
+ if (state == CCS_VALUE) {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ fetched = 1;
+ if (r == TK_CC_CLOSE) { /* allow [x-] */
+ range_end_val:
+ v = (OnigCodePoint )'-';
+ in_israw = 0;
+ goto val_entry;
+ }
+ else if (r == TK_CC_AND) {
+ CC_ESC_WARN(env, "-");
+ goto range_end_val;
+ }
+ state = CCS_RANGE;
+ }
+ else if (state == CCS_START) {
+ /* [-xa] is allowed */
+ v = (OnigCodePoint )tok->u.c;
+ in_israw = 0;
+
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ fetched = 1;
+ /* [--x] or [a&&-x] is warned. */
+ if (r == TK_CC_RANGE || and_start != 0)
+ CC_ESC_WARN(env, "-");
+
+ goto val_entry;
+ }
+ else if (state == CCS_RANGE) {
+ CC_ESC_WARN(env, "-");
+ goto sb_char; /* [!--x] is allowed */
+ }
+ else { /* CCS_COMPLETE */
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ fetched = 1;
+ if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
+ else if (r == TK_CC_AND) {
+ CC_ESC_WARN(env, "-");
+ goto range_end_val;
+ }
+
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
+ CC_ESC_WARN(env, "-");
+ goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */
+ }
+ r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
+ goto err;
+ }
+ break;
+
+ case TK_CC_CC_OPEN: /* [ */
+ {
+ Node *anode;
+ CClassNode* acc;
+
+ r = parse_char_class(&anode, tok, &p, end, env);
+ if (r != 0) goto cc_open_err;
+ acc = &(NCCLASS(anode));
+ r = or_cclass(cc, acc, env->enc);
+
+ onig_node_free(anode);
+ cc_open_err:
+ if (r != 0) goto err;
+ }
+ break;
+
+ case TK_CC_AND: /* && */
+ {
+ if (state == CCS_VALUE) {
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB,
+ &val_type, &state, env);
+ if (r != 0) goto err;
+ }
+ /* initialize local variables */
+ and_start = 1;
+ state = CCS_START;
+
+ if (IS_NOT_NULL(prev_cc)) {
+ r = and_cclass(prev_cc, cc, env->enc);
+ if (r != 0) goto err;
+ bbuf_free(cc->mbuf);
+ }
+ else {
+ prev_cc = cc;
+ cc = &work_cc;
+ }
+ initialize_cclass(cc);
+ }
+ break;
+
+ case TK_EOT:
+ r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
+ goto err;
+ break;
+ default:
+ r = ONIGERR_PARSER_BUG;
+ goto err;
+ break;
+ }
+
+ if (fetched)
+ r = tok->type;
+ else {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ }
+ }
+
+ if (state == CCS_VALUE) {
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB,
+ &val_type, &state, env);
+ if (r != 0) goto err;
+ }
+
+ if (IS_NOT_NULL(prev_cc)) {
+ r = and_cclass(prev_cc, cc, env->enc);
+ if (r != 0) goto err;
+ bbuf_free(cc->mbuf);
+ cc = prev_cc;
+ }
+
+ cc->not = neg;
+ if (cc->not != 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
+ int is_empty;
+
+ is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
+ if (is_empty != 0)
+ BITSET_IS_EMPTY(cc->bs, is_empty);
+ if (is_empty == 0)
+ BITSET_SET_BIT(cc->bs, ONIG_NEWLINE);
+ }
+ *src = p;
+ return 0;
+
+ err:
+ if (cc != &(NCCLASS(*np)))
+ bbuf_free(cc->mbuf);
+ onig_node_free(*np);
+ return r;
+}
+
+static int parse_subexp(Node** top, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env);
+
+static int
+parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env)
+{
+ Node *target;
+ OnigOptionType option;
+ int r, c, num;
+ int list_capture;
+ UChar* p = *src;
+
+ *np = NULL;
+ if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
+
+ option = env->option;
+ if (PPEEK == '?' &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
+ PINC;
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+
+ PFETCH(c);
+ switch (c) {
+ case '#': /* (?#...) comment */
+ while (1) {
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH(c);
+ if (c == ')') break;
+ }
+ *src = p;
+ return 3; /* 3: comment */
+ break;
+
+ case ':': /* (?:...) grouping only */
+ group:
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(np, tok, term, &p, end, env);
+ if (r < 0) return r;
+ *src = p;
+ return 1; /* group */
+ break;
+
+ case '=':
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ);
+ break;
+ case '!': /* preceding read */
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
+ break;
+ case '>': /* (?>...) stop backtrack */
+ *np = node_new_effect(EFFECT_STOP_BACKTRACK);
+ break;
+
+ case '<': /* look behind (?<=...), (?<!...) */
+ PFETCH(c);
+ if (c == '=')
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
+ else if (c == '!')
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
+#ifdef USE_NAMED_GROUP
+ else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ UChar *name;
+ UChar *name_end;
+
+ PUNFETCH;
+ list_capture = 0;
+
+ named_group:
+ name = p;
+ r = fetch_name(&p, end, &name_end, env, 0);
+ if (r < 0) return r;
+
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) return num;
+ if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM)
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+
+ r = name_add(env->reg, name, name_end, num, env);
+ if (r != 0) return r;
+ *np = node_new_effect_memory(env->option, 1);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ NEFFECT(*np).regnum = num;
+ if (list_capture != 0)
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ env->num_named++;
+ }
+#endif
+ else
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+
+ case '@':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
+#ifdef USE_NAMED_GROUP
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ PFETCH(c);
+ if (c == '<') {
+ list_capture = 1;
+ goto named_group; /* (?@<name>...) */
+ }
+ PUNFETCH;
+ }
+#endif
+ *np = node_new_effect_memory(env->option, 0);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) {
+ onig_node_free(*np);
+ return num;
+ }
+ else if (num >= BIT_STATUS_BITS_NUM) {
+ onig_node_free(*np);
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+ }
+ NEFFECT(*np).regnum = num;
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ }
+ else {
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+ break;
+
+#ifdef USE_POSIXLINE_OPTION
+ case 'p':
+#endif
+ case '-': case 'i': case 'm': case 's': case 'x':
+ {
+ int neg = 0;
+
+ while (1) {
+ switch (c) {
+ case ':':
+ case ')':
+ break;
+
+ case '-': neg = 1; break;
+ case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
+ case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
+ case 's':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
+ ONOFF(option, ONIG_OPTION_MULTILINE, neg);
+ }
+ else
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+
+ case 'm':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
+ ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
+ }
+ else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
+ ONOFF(option, ONIG_OPTION_MULTILINE, neg);
+ }
+ else
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+#ifdef USE_POSIXLINE_OPTION
+ case 'p':
+ ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
+ break;
+#endif
+ default:
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+
+ if (c == ')') {
+ *np = node_new_option(option);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ *src = p;
+ return 2; /* option only */
+ }
+ else if (c == ':') {
+ OnigOptionType prev = env->option;
+
+ env->option = option;
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&target, tok, term, &p, end, env);
+ env->option = prev;
+ if (r < 0) return r;
+ *np = node_new_option(option);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ NEFFECT(*np).target = target;
+ *src = p;
+ return 0;
+ }
+
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH(c);
+ }
+ }
+ break;
+
+ default:
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+ }
+ else {
+#ifdef USE_NAMED_GROUP
+ if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
+ goto group;
+#endif
+ *np = node_new_effect_memory(env->option, 0);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) return num;
+ NEFFECT(*np).regnum = num;
+ }
+
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&target, tok, term, &p, end, env);
+ if (r < 0) return r;
+
+ if (NTYPE(*np) == N_ANCHOR)
+ NANCHOR(*np).target = target;
+ else {
+ NEFFECT(*np).target = target;
+ if (NEFFECT(*np).type == EFFECT_MEMORY) {
+ /* Don't move this to previous of parse_subexp() */
+ r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np);
+ if (r != 0) return r;
+ }
+ }
+
+ *src = p;
+ return 0;
+}
+
+static int
+set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
+{
+ QualifierNode* qn;
+
+ qn = &(NQUALIFIER(qnode));
+ if (qn->lower == 1 && qn->upper == 1) {
+ return 1;
+ }
+
+ switch (NTYPE(target)) {
+ case N_STRING:
+ if (! group) {
+ StrNode* sn = &(NSTRING(target));
+ if (str_node_can_be_split(sn, env->enc)) {
+ Node* n = str_node_split_last_char(sn, env->enc);
+ if (IS_NOT_NULL(n)) {
+ qn->target = n;
+ return 2;
+ }
+ }
+ }
+ break;
+
+ case N_QUALIFIER:
+ { /* check redundant double repeat. */
+ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
+ QualifierNode* qnt = &(NQUALIFIER(target));
+
+#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+ if (qn->by_number == 0 && qnt->by_number == 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
+ if (IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->lower == 0) { /* '*' */
+ redundant:
+ {
+ char buf[WARN_BUFSIZE];
+ if (onig_verb_warn != onig_null_warn) {
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ env->pattern, env->pattern_end,
+ "redundant nested repeat operator");
+ (*onig_verb_warn)(buf);
+ }
+ goto warn_exit;
+ }
+ }
+ else if (qn->lower == 1) { /* '+' */
+ /* (?:a?)+? only allowed. */
+ if (qn->greedy || !(qnt->upper == 1 && qnt->greedy))
+ goto redundant;
+ }
+ }
+ else if (qn->upper == 1 && qn->lower == 0) {
+ if (qn->greedy) { /* '?' */
+ if (!(qnt->lower == 1 && qnt->greedy == 0)) /* not '+?' */
+ goto redundant;
+ }
+ else { /* '??' */
+ /* '(?:a+)?? only allowd. (?:a*)?? can be replaced to (?:a+)?? */
+ if (!(qnt->greedy && qnt->lower == 1 &&
+ IS_REPEAT_INFINITE(qnt->upper)))
+ goto redundant;
+ }
+ }
+ }
+
+ warn_exit:
+#endif
+ if (popular_qualifier_num(qnt) >= 0 && popular_qualifier_num(qn) >= 0) {
+ onig_reduce_nested_qualifier(qnode, target);
+ goto q_exit;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ qn->target = target;
+ q_exit:
+ return 0;
+}
+
+#ifdef USE_FOLD_MATCH
+static int
+make_alt_node_from_fold_info(OnigEncFoldMatchInfo* info, Node** node)
+{
+ int i;
+ UChar *s, *end;
+ Node *root, **ptail, *snode;
+
+ ptail = &root;
+ for (i = 0; i < info->target_num; i++) {
+ s = info->target_str[i];
+ end = s + info->target_byte_len[i];
+ /* ex.
+ U+00DF match "ss" and "SS, but not match "Ss".
+ So, string nodes must be raw.
+ */
+ snode = node_new_str_raw(s, end);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+
+ *ptail = node_new_alt(snode, NULL_NODE);
+ CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
+ ptail = &(NCONS(*ptail).right);
+ }
+ *ptail = NULL_NODE;
+ *node = root;
+ return 0;
+}
+
+static int
+make_fold_alt_node_from_cc(OnigEncoding enc, CClassNode* cc, Node** root)
+{
+ int i, j, flen, len, ncode, n;
+ UChar *s, *end, buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ OnigCodePoint* codes;
+ Node **ptail, *snode;
+ OnigEncFoldMatchInfo* info;
+
+ *root = NULL_NODE;
+ ptail = root;
+
+ ncode = ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc, &codes);
+ n = 0;
+ for (i = 0; i < ncode; i++) {
+ if (onig_is_code_in_cc(enc, codes[i], cc)) {
+ len = ONIGENC_CODE_TO_MBC(enc, codes[i], buf);
+ flen = ONIGENC_GET_FOLD_MATCH_INFO(enc, buf, buf + len, &info);
+ if (flen > 0) { /* fold */
+ for (j = 0; j < info->target_num; j++) {
+ s = info->target_str[j];
+ end = s + info->target_byte_len[j];
+ if (onig_strncmp(s, buf, enc_len(enc, *s)) == 0)
+ continue; /* ignore single char. */
+
+ snode = node_new_str_raw(s, end);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+
+ *ptail = node_new_alt(snode, NULL_NODE);
+ CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
+ ptail = &(NCONS(*ptail).right);
+ n++;
+ }
+ }
+ }
+ }
+
+ return n;
+}
+#endif
+
+static int
+parse_exp(Node** np, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env)
+{
+ int r, len, group = 0;
+ Node* qn;
+ Node** targetp;
+
+ start:
+ *np = NULL;
+ if (tok->type == term)
+ goto end_of_token;
+
+ switch (tok->type) {
+ case TK_ALT:
+ case TK_EOT:
+ end_of_token:
+ *np = node_new_empty();
+ return tok->type;
+ break;
+
+ case TK_SUBEXP_OPEN:
+ r = parse_effect(np, tok, TK_SUBEXP_CLOSE, src, end, env);
+ if (r < 0) return r;
+ if (r == 1) group = 1;
+ else if (r == 2) { /* option only */
+ Node* target;
+ OnigOptionType prev = env->option;
+
+ env->option = NEFFECT(*np).option;
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&target, tok, term, src, end, env);
+ env->option = prev;
+ if (r < 0) return r;
+ NEFFECT(*np).target = target;
+ return tok->type;
+ }
+ else if (r == 3) { /* comment */
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ goto start;
+ }
+ break;
+
+ case TK_SUBEXP_CLOSE:
+ if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
+ return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
+
+ if (tok->escaped) goto tk_raw_byte;
+ else goto tk_byte;
+ break;
+
+ case TK_BYTE:
+ tk_byte:
+ {
+ *np = node_new_str_char((UChar )tok->u.c);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+
+ while (1) {
+ len = enc_len(env->enc, tok->u.c);
+ if (len > 1) {
+ r = onig_node_str_cat(*np, *src, *src + len - 1);
+ if (r < 0) return r;
+ *src += (len - 1);
+ }
+
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ if (r != TK_BYTE) break;
+
+ r = node_str_cat_char(*np, (UChar )tok->u.c);
+ if (r < 0) return r;
+ }
+
+ fold_entry:
+#ifdef USE_FOLD_MATCH
+ if (IS_IGNORECASE(env->option) && ONIGENC_IS_FOLD_MATCH(env->enc)) {
+ int flen, ret;
+ Node *root, **ptail, *work, *snode, *anode;
+ UChar *p, *pprev;
+ OnigEncFoldMatchInfo* fold_info;
+ StrNode* sn = &(NSTRING(*np));
+
+ ptail = &root;
+ pprev = sn->s;
+ for (p = sn->s; p < sn->end; ) {
+ flen = ONIGENC_GET_FOLD_MATCH_INFO(env->enc, p, sn->end, &fold_info);
+ if (flen > 0) { /* fold */
+ ret = make_alt_node_from_fold_info(fold_info, &anode);
+ if (ret != 0) return ret;
+ work = node_new_list(anode, NULL);
+ CHECK_NULL_RETURN_VAL(work, ONIGERR_MEMORY);
+
+ if (pprev < p) {
+ snode = node_new_str(pprev, p);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ *ptail = node_new_list(snode, work);
+ CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
+ }
+ else {
+ *ptail = work;
+ }
+ ptail = &(NCONS(work).right);
+ p += flen;
+ pprev = p;
+ }
+ else
+ p += enc_len(env->enc, *p);
+ }
+ *ptail = NULL_NODE;
+ if (IS_NOT_NULL(root)) {
+ if (pprev < sn->end) {
+ snode = node_new_str(pprev, sn->end);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ *ptail = node_new_list(snode, NULL_NODE);
+ CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
+ }
+ onig_node_free(*np);
+ *np = root;
+ }
+ }
+#endif
+ targetp = np;
+ goto repeat;
+ }
+ break;
+
+ case TK_RAW_BYTE:
+ tk_raw_byte:
+ {
+ int expect_len;
+
+ *np = node_new_str_raw_char((UChar )tok->u.c);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ expect_len = enc_len(env->enc, tok->u.c);
+ len = 1;
+ while (1) {
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ if (r != TK_RAW_BYTE) {
+#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
+ if (len >= expect_len) {
+ NSTRING_CLEAR_RAW(*np);
+ }
+#endif
+ goto fold_entry;
+ }
+
+ r = node_str_cat_char(*np, (UChar )tok->u.c);
+ if (r < 0) return r;
+ len++;
+ }
+ }
+ break;
+
+ case TK_CODE_POINT:
+ {
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
+ if (num < 0) return num;
+#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
+ *np = node_new_str_raw(buf, buf + num);
+#else
+ *np = node_new_str(buf, buf + num);
+#endif
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ }
+ break;
+
+ case TK_QUOTE_OPEN:
+ {
+ OnigCodePoint end_op[] = { (OnigCodePoint )MC_ESC, (OnigCodePoint )'E' };
+ UChar *qstart, *qend, *nextp;
+
+ qstart = *src;
+ qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
+ if (IS_NULL(qend)) {
+ nextp = qend = end;
+ }
+ *np = node_new_str(qstart, qend);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ *src = nextp;
+ }
+ break;
+
+ case TK_CHAR_TYPE:
+ {
+ switch (tok->u.subtype) {
+ case CTYPE_WORD:
+ case CTYPE_NOT_WORD:
+ *np = node_new_ctype(tok->u.subtype);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ break;
+
+ case CTYPE_WHITE_SPACE:
+ case CTYPE_NOT_WHITE_SPACE:
+ case CTYPE_DIGIT:
+ case CTYPE_NOT_DIGIT:
+ {
+ CClassNode* cc;
+ int ctype, not;
+
+ ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
+
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ cc = &(NCCLASS(*np));
+ add_ctype_to_cc(cc, ctype, 0, env);
+ if (not != 0) CCLASS_SET_NOT(cc);
+ }
+ break;
+
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+ }
+ break;
+
+ case TK_CHAR_PROPERTY:
+ r = parse_char_property(np, tok, src, end, env);
+ if (r != 0) return r;
+ break;
+
+ case TK_CC_OPEN:
+ r = parse_char_class(np, tok, src, end, env);
+ if (r != 0) return r;
+
+#ifdef USE_FOLD_MATCH
+ if (IS_IGNORECASE(env->option) && ONIGENC_IS_FOLD_MATCH(env->enc)) {
+ int res;
+ Node *alt_root, *work;
+ CClassNode* cc = &(NCCLASS(*np));
+
+ res = make_fold_alt_node_from_cc(env->enc, cc, &alt_root);
+ if (res < 0) return res;
+ if (res > 0) {
+ work = node_new_alt(*np, alt_root);
+ if (IS_NULL(work)) {
+ onig_node_free(alt_root);
+ return ONIGERR_MEMORY;
+ }
+ *np = work;
+ }
+ }
+#endif
+ break;
+
+ case TK_ANYCHAR:
+ *np = node_new_anychar();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ break;
+
+ case TK_ANYCHAR_ANYTIME:
+ *np = node_new_anychar();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ qn = node_new_qualifier(0, REPEAT_INFINITE, 0);
+ CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
+ NQUALIFIER(qn).target = *np;
+ *np = qn;
+ break;
+
+ case TK_BACKREF:
+ len = tok->u.backref.num;
+ *np = node_new_backref(len,
+ (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
+ tok->u.backref.by_name, env);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case TK_CALL:
+ *np = node_new_call(tok->u.call.name, tok->u.call.name_end);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ env->num_call++;
+ break;
+#endif
+
+ case TK_ANCHOR:
+ *np = onig_node_new_anchor(tok->u.anchor);
+ break;
+
+ case TK_OP_REPEAT:
+ case TK_INTERVAL:
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
+ return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
+ else
+ *np = node_new_empty();
+ }
+ else {
+ *src = tok->backp;
+ goto tk_byte;
+ }
+ break;
+
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+
+ {
+ targetp = np;
+
+ re_entry:
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+
+ repeat:
+ if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
+ if (is_invalid_qualifier_target(*targetp))
+ return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
+
+ qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper,
+ (r == TK_INTERVAL ? 1 : 0));
+ CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
+ NQUALIFIER(qn).greedy = tok->u.repeat.greedy;
+ r = set_qualifier(qn, *targetp, group, env);
+ if (r < 0) return r;
+
+ if (tok->u.repeat.possessive != 0) {
+ Node* en;
+ en = node_new_effect(EFFECT_STOP_BACKTRACK);
+ CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
+ NEFFECT(en).target = qn;
+ qn = en;
+ }
+
+ if (r == 0) {
+ *targetp = qn;
+ }
+ else if (r == 2) { /* split case: /abc+/ */
+ Node *tmp;
+
+ *targetp = node_new_list(*targetp, NULL);
+ CHECK_NULL_RETURN_VAL(*targetp, ONIGERR_MEMORY);
+ tmp = NCONS(*targetp).right = node_new_list(qn, NULL);
+ CHECK_NULL_RETURN_VAL(tmp, ONIGERR_MEMORY);
+ targetp = &(NCONS(tmp).left);
+ }
+ goto re_entry;
+ }
+ }
+
+ return r;
+}
+
+static int
+parse_branch(Node** top, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ Node *node, **headp;
+
+ *top = NULL;
+ r = parse_exp(&node, tok, term, src, end, env);
+ if (r < 0) return r;
+
+ if (r == TK_EOT || r == term || r == TK_ALT) {
+ *top = node;
+ }
+ else {
+ *top = node_new_list(node, NULL);
+ headp = &(NCONS(*top).right);
+ while (r != TK_EOT && r != term && r != TK_ALT) {
+ r = parse_exp(&node, tok, term, src, end, env);
+ if (r < 0) return r;
+
+ if (NTYPE(node) == N_LIST) {
+ *headp = node;
+ while (IS_NOT_NULL(NCONS(node).right)) node = NCONS(node).right;
+ headp = &(NCONS(node).right);
+ }
+ else {
+ *headp = node_new_list(node, NULL);
+ headp = &(NCONS(*headp).right);
+ }
+ }
+ }
+
+ return r;
+}
+
+/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
+static int
+parse_subexp(Node** top, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ Node *node, **headp;
+
+ *top = NULL;
+ r = parse_branch(&node, tok, term, src, end, env);
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
+
+ if (r == term) {
+ *top = node;
+ }
+ else if (r == TK_ALT) {
+ *top = node_new_alt(node, NULL);
+ headp = &(NCONS(*top).right);
+ while (r == TK_ALT) {
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ r = parse_branch(&node, tok, term, src, end, env);
+ if (r < 0) return r;
+
+ *headp = node_new_alt(node, NULL);
+ headp = &(NCONS(*headp).right);
+ }
+
+ if (tok->type != term)
+ goto err;
+ }
+ else {
+ err:
+ if (term == TK_SUBEXP_CLOSE)
+ return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
+ else
+ return ONIGERR_PARSER_BUG;
+ }
+
+ return r;
+}
+
+static int
+parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ OnigToken tok;
+
+ r = fetch_token(&tok, src, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(top, &tok, TK_EOT, src, end, env);
+ if (r < 0) return r;
+ return 0;
+}
+
+extern int
+onig_parse_make_tree(Node** root, UChar* pattern, UChar* end, regex_t* reg,
+ ScanEnv* env)
+{
+ int r;
+ UChar* p;
+
+#ifdef USE_NAMED_GROUP
+ names_clear(reg);
+#endif
+
+ scan_env_clear(env);
+ env->option = reg->options;
+ env->enc = reg->enc;
+ env->syntax = reg->syntax;
+ env->pattern = pattern;
+ env->pattern_end = end;
+ env->reg = reg;
+
+ *root = NULL;
+ p = pattern;
+ r = parse_regexp(root, &p, end, env);
+ reg->num_mem = env->num_mem;
+ return r;
+}
+
+extern void
+onig_scan_env_set_error_string(ScanEnv* env, int ecode,
+ UChar* arg, UChar* arg_end)
+{
+ env->error = arg;
+ env->error_end = arg_end;
+}
diff --git a/regparse.h b/regparse.h
new file mode 100644
index 000000000..b2726becb
--- /dev/null
+++ b/regparse.h
@@ -0,0 +1,277 @@
+/**********************************************************************
+
+ regparse.h - Oniguruma (regular expression library)
+
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#ifndef REGPARSE_H
+#define REGPARSE_H
+
+#include "regint.h"
+
+/* node type */
+#define N_STRING (1<< 0)
+#define N_CCLASS (1<< 1)
+#define N_CTYPE (1<< 2)
+#define N_ANYCHAR (1<< 3)
+#define N_BACKREF (1<< 4)
+#define N_QUALIFIER (1<< 5)
+#define N_EFFECT (1<< 6)
+#define N_ANCHOR (1<< 7)
+#define N_LIST (1<< 8)
+#define N_ALT (1<< 9)
+#define N_CALL (1<<10)
+
+#define IS_NODE_TYPE_SIMPLE(type) \
+ (((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0)
+
+#define NTYPE(node) ((node)->type)
+#define NCONS(node) ((node)->u.cons)
+#define NSTRING(node) ((node)->u.str)
+#define NCCLASS(node) ((node)->u.cclass)
+#define NCTYPE(node) ((node)->u.ctype)
+#define NQUALIFIER(node) ((node)->u.qualifier)
+#define NANCHOR(node) ((node)->u.anchor)
+#define NBACKREF(node) ((node)->u.backref)
+#define NEFFECT(node) ((node)->u.effect)
+#define NCALL(node) ((node)->u.call)
+
+#define CTYPE_WORD (1<<0)
+#define CTYPE_NOT_WORD (1<<1)
+#define CTYPE_WHITE_SPACE (1<<2)
+#define CTYPE_NOT_WHITE_SPACE (1<<3)
+#define CTYPE_DIGIT (1<<4)
+#define CTYPE_NOT_DIGIT (1<<5)
+
+
+#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL)
+#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
+
+#define EFFECT_MEMORY (1<<0)
+#define EFFECT_OPTION (1<<1)
+#define EFFECT_STOP_BACKTRACK (1<<2)
+
+#define REPEAT_INFINITE -1
+#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
+
+#define NODE_STR_MARGIN 16
+#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
+#define NODE_BACKREFS_SIZE 7
+
+#define NSTR_RAW (1<<0) /* by backslashed number */
+#define NSTR_CASE_AMBIG (1<<1)
+
+#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
+#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
+#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
+#define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG
+#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
+#define NSTRING_IS_CASE_AMBIG(node) \
+ (((node)->u.str.flag & NSTR_CASE_AMBIG) != 0)
+
+#define BACKREFS_P(br) \
+ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
+
+#define CCLASS_SET_NOT(cc) (cc)->not = 1
+
+#define NQ_TARGET_ISNOT_EMPTY 0
+#define NQ_TARGET_IS_EMPTY 1
+#define NQ_TARGET_IS_EMPTY_MEM 2
+#define NQ_TARGET_IS_EMPTY_REC 3
+
+
+typedef struct {
+ UChar* s;
+ UChar* end;
+ unsigned int flag;
+ int capa; /* (allocated size - 1) or 0: use buf[] */
+ UChar buf[NODE_STR_BUF_SIZE];
+} StrNode;
+
+typedef struct {
+ int not;
+ BitSet bs;
+ BBuf* mbuf; /* multi-byte info or NULL */
+} CClassNode;
+
+typedef struct {
+ struct _Node* target;
+ int lower;
+ int upper;
+ int greedy;
+ int by_number; /* {n,m} */
+ int target_empty_info;
+ struct _Node* head_exact;
+ struct _Node* next_head_exact;
+ int is_refered; /* include called node. don't eliminate even if {0} */
+} QualifierNode;
+
+/* status bits */
+#define NST_MIN_FIXED (1<<0)
+#define NST_MAX_FIXED (1<<1)
+#define NST_CLEN_FIXED (1<<2)
+#define NST_MARK1 (1<<3)
+#define NST_MARK2 (1<<4)
+#define NST_MEM_BACKREFED (1<<5)
+#define NST_SIMPLE_REPEAT (1<<6) /* for stop backtrack optimization */
+
+#define NST_RECURSION (1<<7)
+#define NST_CALLED (1<<8)
+#define NST_ADDR_FIXED (1<<9)
+#define NST_NAMED_GROUP (1<<10)
+#define NST_NAME_REF (1<<11)
+
+#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
+#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
+
+#define IS_EFFECT_CALLED(en) (((en)->state & NST_CALLED) != 0)
+#define IS_EFFECT_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
+#define IS_EFFECT_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
+#define IS_EFFECT_MARK1(en) (((en)->state & NST_MARK1) != 0)
+#define IS_EFFECT_MARK2(en) (((en)->state & NST_MARK2) != 0)
+#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
+#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
+#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
+#define IS_EFFECT_SIMPLE_REPEAT(en) (((en)->state & NST_SIMPLE_REPEAT) != 0)
+#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
+
+#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
+#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
+#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
+#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
+
+typedef struct {
+ int state;
+ int type;
+ int regnum;
+ OnigOptionType option;
+ struct _Node* target;
+ AbsAddrType call_addr;
+ /* for multiple call reference */
+ OnigDistance min_len; /* min length (byte) */
+ OnigDistance max_len; /* max length (byte) */
+ int char_len; /* character length */
+ int opt_count; /* referenced count in optimize_node_left() */
+} EffectNode;
+
+#define CALLNODE_REFNUM_UNDEF -1
+
+#ifdef USE_SUBEXP_CALL
+
+typedef struct {
+ int offset;
+ struct _Node* target;
+} UnsetAddr;
+
+typedef struct {
+ int num;
+ int alloc;
+ UnsetAddr* us;
+} UnsetAddrList;
+
+typedef struct {
+ int state;
+ int ref_num;
+ UChar* name;
+ UChar* name_end;
+ struct _Node* target; /* EffectNode : EFFECT_MEMORY */
+ UnsetAddrList* unset_addr_list;
+} CallNode;
+
+#endif
+
+typedef struct {
+ int state;
+ int back_num;
+ int back_static[NODE_BACKREFS_SIZE];
+ int* back_dynamic;
+} BackrefNode;
+
+typedef struct {
+ int type;
+ struct _Node* target;
+ int char_len;
+} AnchorNode;
+
+typedef struct _Node {
+ int type;
+ union {
+ StrNode str;
+ CClassNode cclass;
+ QualifierNode qualifier;
+ EffectNode effect;
+#ifdef USE_SUBEXP_CALL
+ CallNode call;
+#endif
+ BackrefNode backref;
+ AnchorNode anchor;
+ struct {
+ struct _Node* left;
+ struct _Node* right;
+ } cons;
+ struct {
+ int type;
+ } ctype;
+ } u;
+} Node;
+
+#define NULL_NODE ((Node* )0)
+
+#define SCANENV_MEMNODES_SIZE 8
+#define SCANENV_MEM_NODES(senv) \
+ (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
+ (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
+
+typedef struct {
+ OnigOptionType option;
+ OnigEncoding enc;
+ OnigSyntaxType* syntax;
+ BitStatusType capture_history;
+ BitStatusType bt_mem_start;
+ BitStatusType bt_mem_end;
+ BitStatusType backrefed_mem;
+ UChar* pattern;
+ UChar* pattern_end;
+ UChar* error;
+ UChar* error_end;
+ regex_t* reg; /* for reg->names only */
+ int num_call;
+#ifdef USE_SUBEXP_CALL
+ UnsetAddrList* unset_addr_list;
+#endif
+ int num_mem;
+#ifdef USE_NAMED_GROUP
+ int num_named;
+#endif
+ int mem_alloc;
+ Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
+ Node** mem_nodes_dynamic;
+} ScanEnv;
+
+
+#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
+#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
+#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
+
+extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
+extern int onig_strncmp P_((UChar* s1, UChar* s2, int n));
+extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
+extern int onig_scan_unsigned_number P_((UChar** src, UChar* end, OnigEncoding enc));
+extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
+extern void onig_node_conv_to_str_node P_((Node* node, int raw));
+extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end));
+extern void onig_node_free P_((Node* node));
+extern Node* onig_node_new_effect P_((int type));
+extern Node* onig_node_new_anchor P_((int type));
+extern int onig_free_node_list();
+extern int onig_names_free P_((regex_t* reg));
+extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));
+
+#ifdef ONIG_DEBUG
+#ifdef USE_NAMED_GROUP
+extern int onig_print_names(FILE*, regex_t*);
+#endif
+#endif
+
+#endif /* REGPARSE_H */
diff --git a/sjis.c b/sjis.c
new file mode 100644
index 000000000..8485910e6
--- /dev/null
+++ b/sjis.c
@@ -0,0 +1,174 @@
+/**********************************************************************
+
+ sjis.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regenc.h"
+
+static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
+};
+
+#define SJIS_ISMB_FIRST(byte) (OnigEncodingSJIS.len_table[byte] > 1)
+#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
+
+static OnigCodePoint
+sjis_mbc_to_code(UChar* p, UChar* end)
+{
+ int c, i, len;
+ OnigCodePoint n;
+
+ c = *p++;
+ len = enc_len(ONIG_ENCODING_SJIS, c);
+ n = c;
+ if (len == 1) return n;
+
+ for (i = 1; i < len; i++) {
+ if (p >= end) break;
+ c = *p++;
+ n <<= 8; n += c;
+ }
+ return n;
+}
+
+static int
+sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
+ *p++ = (UChar )(code & 0xff);
+
+#if 0
+ if (enc_len(ONIG_ENCODING_SJIS, buf[0]) != (p - buf))
+ return REGERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+static int
+sjis_mbc_to_lower(UChar* p, UChar* lower)
+{
+ int len;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ return 1;
+ }
+ else {
+ len = enc_len(ONIG_ENCODING_SJIS, *p);
+ if (lower != p) {
+ /* memcpy(lower, p, len); */
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+sjis_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ int first = onigenc_mb2_code_to_mbc_first(code);
+ return (enc_len(ONIG_ENCODING_SJIS, first) > 1 ? TRUE : FALSE);
+ }
+
+ ctype &= ~ONIGENC_CTYPE_WORD;
+ if (ctype == 0) return FALSE;
+ }
+
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static UChar*
+sjis_left_adjust_char_head(UChar* start, UChar* s)
+{
+ UChar *p;
+ int len;
+
+ if (s <= start) return s;
+ p = s;
+
+ if (SJIS_ISMB_TRAIL(*p)) {
+ while (p > start) {
+ if (! SJIS_ISMB_FIRST(*--p)) {
+ p++;
+ break;
+ }
+ }
+ }
+ len = enc_len(ONIG_ENCODING_SJIS, *p);
+ if (p + len > s) return p;
+ p += len;
+ return p + ((s - p) & ~1);
+}
+
+static int
+sjis_is_allowed_reverse_match(UChar* s, UChar* end)
+{
+ UChar c = *s;
+ return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
+}
+
+OnigEncodingType OnigEncodingSJIS = {
+ {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
+ },
+ "Shift_JIS", /* name */
+ 2, /* max byte length */
+ FALSE, /* is_fold_match */
+ ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
+ FALSE, /* is continuous sb mb codepoint */
+ sjis_mbc_to_code,
+ onigenc_mb2_code_to_mbclen,
+ sjis_code_to_mbc,
+ sjis_mbc_to_lower,
+ onigenc_mbn_mbc_is_case_ambig,
+ sjis_code_is_ctype,
+ onigenc_nothing_get_ctype_code_range,
+ sjis_left_adjust_char_head,
+ sjis_is_allowed_reverse_match,
+ onigenc_nothing_get_all_fold_match_code,
+ onigenc_nothing_get_fold_match_info
+};
diff --git a/utf8.c b/utf8.c
new file mode 100644
index 000000000..604cfac2e
--- /dev/null
+++ b/utf8.c
@@ -0,0 +1,566 @@
+/**********************************************************************
+
+ utf8.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regenc.h"
+
+#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
+
+#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
+ ((EncUnicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
+
+static unsigned short EncUnicode_ISO_8859_1_CtypeTable[256] = {
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x10d0, 0x10d0, 0x10d0,
+ 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
+ 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
+ 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x1050, 0x1050, 0x1050, 0x10d0,
+ 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
+ 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
+ 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
+ 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x18d0,
+ 0x1050, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x10d0, 0x1050, 0x10d0, 0x1050, 0x1004,
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1142, 0x10d0, 0x1050, 0x1050, 0x1050, 0x1050, 0x1050, 0x1050,
+ 0x1050, 0x1050, 0x1871, 0x10d0, 0x1050, 0x10d0, 0x1050, 0x1050,
+ 0x1050, 0x1050, 0x1850, 0x1850, 0x1050, 0x1871, 0x1050, 0x10d0,
+ 0x1050, 0x1850, 0x1871, 0x10d0, 0x1850, 0x1850, 0x1850, 0x10d0,
+ 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
+ 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
+ 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1050,
+ 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1050,
+ 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871
+};
+
+static OnigCodePoint
+utf8_mbc_to_code(UChar* p, UChar* end)
+{
+ int c, len;
+ OnigCodePoint n;
+
+ c = *p++;
+ len = enc_len(ONIG_ENCODING_UTF8, c);
+ if (len > 1) {
+ len--;
+ n = c & ((1 << (6 - len)) - 1);
+ while (len--) {
+ c = *p++;
+ n = (n << 6) | (c & ((1 << 6) - 1));
+ }
+ return n;
+ }
+ else
+ return (OnigCodePoint )c;
+}
+
+static int
+utf8_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xffffff80) == 0) return 1;
+ else if ((code & 0xfffff800) == 0) {
+ if (code <= 0xff && code >= 0xfe)
+ return 1;
+ return 2;
+ }
+ else if ((code & 0xffff0000) == 0) return 3;
+ else if ((code & 0xffe00000) == 0) return 4;
+ else if ((code & 0xfc000000) == 0) return 5;
+ else if ((code & 0x80000000) == 0) return 6;
+ else
+ return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+}
+
+#if 0
+static int
+utf8_code_to_mbc_first(OnigCodePoint code)
+{
+ if ((code & 0xffffff80) == 0)
+ return code;
+ else {
+ if ((code & 0xfffff800) == 0)
+ return ((code>>6)& 0x1f) | 0xc0;
+ else if ((code & 0xffff0000) == 0)
+ return ((code>>12) & 0x0f) | 0xe0;
+ else if ((code & 0xffe00000) == 0)
+ return ((code>>18) & 0x07) | 0xf0;
+ else if ((code & 0xfc000000) == 0)
+ return ((code>>24) & 0x03) | 0xf8;
+ else if ((code & 0x80000000) == 0)
+ return ((code>>30) & 0x01) | 0xfc;
+ else {
+ return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ }
+ }
+}
+#endif
+
+static int
+utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
+#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80)
+
+ if ((code & 0xffffff80) == 0) {
+ *buf = (UChar )code;
+ return 1;
+ }
+ else {
+ UChar *p = buf;
+
+ if ((code & 0xfffff800) == 0) {
+ *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0);
+ }
+ else if ((code & 0xffff0000) == 0) {
+ *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0xffe00000) == 0) {
+ *p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0xfc000000) == 0) {
+ *p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
+ *p++ = UTF8_TRAILS(code, 18);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0x80000000) == 0) {
+ *p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
+ *p++ = UTF8_TRAILS(code, 24);
+ *p++ = UTF8_TRAILS(code, 18);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else {
+ return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ }
+
+ *p++ = UTF8_TRAIL0(code);
+ return p - buf;
+ }
+}
+
+static int
+utf8_mbc_to_lower(UChar* p, UChar* lower)
+{
+ int len;
+
+ /* !!! U+0080 - U+00ff is treated by fold match. !!! */
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ return 1; /* return byte length of converted char to lower */
+ }
+ else {
+ len = enc_len(ONIG_ENCODING_UTF8, *p);
+ if (lower != p) {
+ /* memcpy(lower, p, len); */
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf8_mbc_is_case_ambig(UChar* p)
+{
+ /* !!! U+0080 - U+00ff ( 0x80[0xc2,0x80] - 0xff[0xc3,0xbf] )
+ is treated by fold match. !!! */
+
+ if (ONIGENC_IS_MBC_ASCII(p))
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+
+ return FALSE;
+}
+
+static int
+utf8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256) {
+ return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
+ }
+
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static int
+utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
+ OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
+{
+#define CR_SET(sbl,mbl) do { \
+ *nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
+ *nmb = sizeof(mbl) / sizeof(OnigCodePointRange); \
+ *sbr = sbl; \
+ *mbr = mbl; \
+} while (0)
+
+#define CR_SB_SET(sbl) do { \
+ *nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
+ *nmb = 0; \
+ *sbr = sbl; \
+} while (0)
+
+ static OnigCodePointRange SBAlpha[] = {
+ { 0x41, 0x5a },
+ { 0x61, 0x7a }
+ };
+
+ static OnigCodePointRange MBAlpha[] = {
+ { 0xaa, 0xaa },
+ { 0xb5, 0xb5 },
+ { 0xba, 0xba },
+ { 0xc0, 0xd6 },
+ { 0xd8, 0xf6 },
+ { 0xf8, 0x220 }
+ };
+
+ static OnigCodePointRange SBBlank[] = {
+ { 0x09, 0x09 },
+ { 0x20, 0x20 }
+ };
+
+ static OnigCodePointRange MBBlank[] = {
+ { 0xa0, 0xa0 }
+ };
+
+ static OnigCodePointRange SBCntrl[] = {
+ { 0x00, 0x1f },
+ { 0x7f, 0x7f }
+ };
+
+ static OnigCodePointRange MBCntrl[] = {
+ { 0x80, 0x9f }
+ };
+
+ static OnigCodePointRange SBDigit[] = {
+ { 0x30, 0x39 }
+ };
+
+ static OnigCodePointRange SBGraph[] = {
+ { 0x21, 0x7e }
+ };
+
+ static OnigCodePointRange MBGraph[] = {
+ { 0xa1, 0x220 }
+ };
+
+ static OnigCodePointRange SBLower[] = {
+ { 0x61, 0x7a }
+ };
+
+ static OnigCodePointRange MBLower[] = {
+ { 0xaa, 0xaa },
+ { 0xb5, 0xb5 },
+ { 0xba, 0xba },
+ { 0xdf, 0xf6 },
+ { 0xf8, 0xff }
+ };
+
+ static OnigCodePointRange SBPrint[] = {
+ { 0x20, 0x7e }
+ };
+
+ static OnigCodePointRange MBPrint[] = {
+ { 0xa0, 0x220 }
+ };
+
+ static OnigCodePointRange SBPunct[] = {
+ { 0x21, 0x23 },
+ { 0x25, 0x2a },
+ { 0x2c, 0x2f },
+ { 0x3a, 0x3b },
+ { 0x3f, 0x40 },
+ { 0x5b, 0x5d },
+ { 0x5f, 0x5f },
+ { 0x7b, 0x7b },
+ { 0x7d, 0x7d }
+ };
+
+ static OnigCodePointRange MBPunct[] = {
+ { 0xa1, 0xa1 },
+ { 0xab, 0xab },
+ { 0xad, 0xad },
+ { 0xb7, 0xb7 },
+ { 0xbb, 0xbb },
+ { 0xbf, 0xbf }
+ };
+
+ static OnigCodePointRange SBSpace[] = {
+ { 0x09, 0x0d },
+ { 0x20, 0x20 }
+ };
+
+ static OnigCodePointRange MBSpace[] = {
+ { 0xa0, 0xa0 }
+ };
+
+ static OnigCodePointRange SBUpper[] = {
+ { 0x41, 0x5a }
+ };
+
+ static OnigCodePointRange MBUpper[] = {
+ { 0xc0, 0xd6 },
+ { 0xd8, 0xde }
+ };
+
+ static OnigCodePointRange SBXDigit[] = {
+ { 0x30, 0x39 },
+ { 0x41, 0x46 },
+ { 0x61, 0x66 }
+ };
+
+ static OnigCodePointRange SBWord[] = {
+ { 0x30, 0x39 },
+ { 0x41, 0x5a },
+ { 0x5f, 0x5f },
+ { 0x61, 0x7a }
+ };
+
+ static OnigCodePointRange MBWord[] = {
+ { 0xaa, 0xaa },
+ { 0xb2, 0xb3 },
+ { 0xb5, 0xb5 },
+ { 0xb9, 0xba },
+ { 0xbc, 0xbe },
+ { 0xc0, 0xd6 },
+ { 0xd8, 0xf6 },
+#if 0
+ { 0xf8, 0x220 }
+#else
+ { 0xf8, 0x7fffffff } /* all multibyte code as word */
+#endif
+ };
+
+ static OnigCodePointRange SBAscii[] = {
+ { 0x00, 0x7f }
+ };
+
+ static OnigCodePointRange SBAlnum[] = {
+ { 0x30, 0x39 },
+ { 0x41, 0x5a },
+ { 0x61, 0x7a }
+ };
+
+ static OnigCodePointRange MBAlnum[] = {
+ { 0xaa, 0xaa },
+ { 0xb5, 0xb5 },
+ { 0xba, 0xba },
+ { 0xc0, 0xd6 },
+ { 0xd8, 0xf6 },
+ { 0xf8, 0x220 }
+ };
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ CR_SET(SBAlpha, MBAlpha);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ CR_SET(SBBlank, MBBlank);
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ CR_SET(SBCntrl, MBCntrl);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ CR_SB_SET(SBDigit);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ CR_SET(SBGraph, MBGraph);
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ CR_SET(SBLower, MBLower);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ CR_SET(SBPrint, MBPrint);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ CR_SET(SBPunct, MBPunct);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ CR_SET(SBSpace, MBSpace);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ CR_SET(SBUpper, MBUpper);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ CR_SB_SET(SBXDigit);
+ break;
+ case ONIGENC_CTYPE_WORD:
+ CR_SET(SBWord, MBWord);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ CR_SB_SET(SBAscii);
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ CR_SET(SBAlnum, MBAlnum);
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return 0;
+}
+
+static int
+utf8_get_all_fold_match_code(OnigCodePoint** codes)
+{
+ static OnigCodePoint list[] = {
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
+ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
+ };
+
+ *codes = list;
+ return sizeof(list) / sizeof(OnigCodePoint);
+}
+
+static int
+utf8_get_fold_match_info(UChar* p, UChar* end, OnigEncFoldMatchInfo** info)
+{
+
+ static OnigEncFoldMatchInfo xc[] = {
+ { 2, { 2, 2 }, { "\303\200", "\303\240" } }, /* CodePoint 0xc0 */
+ { 2, { 2, 2 }, { "\303\201", "\303\241" } },
+ { 2, { 2, 2 }, { "\303\202", "\303\242" } },
+ { 2, { 2, 2 }, { "\303\203", "\303\243" } },
+ { 2, { 2, 2 }, { "\303\204", "\303\244" } },
+ { 2, { 2, 2 }, { "\303\205", "\303\245" } },
+ { 2, { 2, 2 }, { "\303\206", "\303\246" } },
+ { 2, { 2, 2 }, { "\303\207", "\303\247" } },
+ { 2, { 2, 2 }, { "\303\210", "\303\250" } },
+ { 2, { 2, 2 }, { "\303\211", "\303\251" } },
+ { 2, { 2, 2 }, { "\303\212", "\303\252" } },
+ { 2, { 2, 2 }, { "\303\213", "\303\253" } },
+ { 2, { 2, 2 }, { "\303\214", "\303\254" } },
+ { 2, { 2, 2 }, { "\303\215", "\303\255" } },
+ { 2, { 2, 2 }, { "\303\216", "\303\256" } },
+ { 2, { 2, 2 }, { "\303\217", "\303\257" } },
+ { 2, { 2, 2 }, { "\303\220", "\303\260" } }, /* CodePoint 0xd0 */
+ { 2, { 2, 2 }, { "\303\221", "\303\261" } },
+ { 2, { 2, 2 }, { "\303\222", "\303\262" } },
+ { 2, { 2, 2 }, { "\303\223", "\303\263" } },
+ { 2, { 2, 2 }, { "\303\224", "\303\264" } },
+ { 2, { 2, 2 }, { "\303\225", "\303\265" } },
+ { 2, { 2, 2 }, { "\303\226", "\303\266" } },
+ { 0, { 0 }, { "" } },
+ { 2, { 2, 2 }, { "\303\230", "\303\270" } },
+ { 2, { 2, 2 }, { "\303\231", "\303\271" } },
+ { 2, { 2, 2 }, { "\303\232", "\303\272" } },
+ { 2, { 2, 2 }, { "\303\233", "\303\273" } },
+ { 2, { 2, 2 }, { "\303\234", "\303\274" } },
+ { 2, { 2, 2 }, { "\303\235", "\303\275" } },
+ { 2, { 2, 2 }, { "\303\236", "\303\276" } },
+ { 3, { 2, 2, 2 }, { "\303\237", "ss", "SS" }} /* ess-tsett(U+00DF) */
+ };
+
+ if (p + 1 >= end) return -1;
+ if (*p < 0x80) {
+ if ((*p == 'S' && *(p+1) == 'S') ||
+ (*p == 's' && *(p+1) == 's')) {
+ *info = &(xc[0xdf - 0xc0]);
+ return 2;
+ }
+ }
+ else if (*p == 195) { /* 195 == '\303' */
+ int c = *(p+1);
+ if (c >= 128) {
+ if (c <= 159) { /* upper */
+ if (c == 151) return -1; /* 0xd7 */
+ *info = &(xc[c - 128]);
+ return 2;
+ }
+ else { /* lower */
+ if (c == 183) return -1; /* 0xf7 */
+ *info = &(xc[c - 160]);
+ return 2;
+ }
+ }
+ }
+
+ return -1; /* is not a fold string. */
+}
+
+
+static UChar*
+utf8_left_adjust_char_head(UChar* start, UChar* s)
+{
+ UChar *p;
+
+ if (s <= start) return s;
+ p = s;
+
+ while (!utf8_islead(*p) && p > start) p--;
+ return p;
+}
+
+static int
+utf8_is_allowed_reverse_match(UChar* s, UChar* end)
+{
+ return TRUE;
+}
+
+OnigEncodingType OnigEncodingUTF8 = {
+ {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
+ },
+ "UTF-8", /* name */
+ 6, /* max byte length */
+ TRUE, /* is_fold_match */
+ ONIGENC_CTYPE_SUPPORT_LEVEL_FULL, /* ctype_support_level */
+ TRUE, /* is continuous sb mb codepoint */
+ utf8_mbc_to_code,
+ utf8_code_to_mbclen,
+ utf8_code_to_mbc,
+ utf8_mbc_to_lower,
+ utf8_mbc_is_case_ambig,
+ utf8_code_is_ctype,
+ utf8_get_ctype_code_range,
+ utf8_left_adjust_char_head,
+ utf8_is_allowed_reverse_match,
+ utf8_get_all_fold_match_code,
+ utf8_get_fold_match_info
+};