/**********************************************************************
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regparse.h"
#define WARN_BUFSIZE 256
OnigSyntaxType OnigSyntaxRuby = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
ONIG_SYN_OP2_OPTION_RUBY |
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
ONIG_SYN_OP2_ESC_H_XDIGIT )
, ( SYN_GNU_REGEX_BV |
ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
, ONIG_OPTION_NONE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
extern void onig_null_warn(const char* s) { }
#ifdef DEFAULT_WARN_FUNCTION
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
#else
static OnigWarnFunc onig_warn = onig_null_warn;
#endif
#ifdef DEFAULT_VERB_WARN_FUNCTION
static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
#else
static OnigWarnFunc onig_verb_warn = onig_null_warn;
#endif
extern void onig_set_warn_func(OnigWarnFunc f)
{
onig_warn = f;
}
extern void onig_set_verb_warn_func(OnigWarnFunc f)
{
onig_verb_warn = f;
}
static void
bbuf_free(BBuf* bbuf)
{
if (IS_NOT_NULL(bbuf)) {
if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
xfree(bbuf);
}
}
static int
bbuf_clone(BBuf** rto, BBuf* from)
{
int r;
BBuf *to;
*rto = to = (BBuf* )xmalloc(sizeof(BBuf));
CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY);
r = BBUF_INIT(to, from->alloc);
if (r != 0) return r;
to->used = from->used;
xmemcpy(to->p, from->p, from->used);
return 0;
}
#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
#define MBCODE_START_POS(enc) \
(OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
if (r) return r;\
}\
} while (0)
#define BITSET_IS_EMPTY(bs,empty) do {\
int i;\
empty = 1;\
for (i = 0; i < BITSET_SIZE; i++) {\
if ((bs)[i] != 0) {\
empty = 0; break;\
}\
}\
} while (0)
static void
bitset_set_range(BitSetRef bs, int from, int to)
{
int i;
for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
BITSET_SET_BIT(bs, i);
}
}
#if 0
static void
bitset_set_all(BitSetRef bs)
{
int i;
for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
}
#endif
static void
bitset_invert(BitSetRef bs)
{
int i;
for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
}
static void
bitset_invert_to(BitSetRef from, BitSetRef to)
{
int i;
for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
}
static void
bitset_and(BitSetRef dest, BitSetRef bs)
{
int i;
for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
}
static void
bitset_or(BitSetRef dest, BitSetRef bs)
{
int i;
for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
}
static void
bitset_copy(BitSetRef dest, BitSetRef bs)
{
int i;
for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
}
extern int
onig_strncmp(const UChar* s1, const UChar* s2, int n)
{
int x;
while (n-- > 0) {
x = *s2++ - *s1++;
if (x) return x;
}
return 0;
}
extern void
onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
{
int len = end - src;
if (len > 0) {
xmemcpy(dest, src, len);
dest[len] = (UChar )0;
}
}
static UChar*
strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
{
int slen, term_len, i;
UChar *r;
slen = end - s;
term_len = ONIGENC_MBC_MINLEN(enc);
r = (UChar* )xmalloc(slen + term_len);
CHECK_NULL_RETURN(r);
xmemcpy(r, s, slen);
for (i = 0; i < term_len; i++)
r[slen + i] = (UChar )0;
return r;
}
/* scan pattern methods */
#define PEND_VALUE 0
#define PFETCH_READY UChar* pfetch_prev
#define PEND (p < end ? 0 : 1)
#define PUNFETCH p = pfetch_prev
#define PINC do { \
pfetch_prev = p; \
p += ONIGENC_MBC_ENC_LEN(enc, p); \
} while (0)
#define PFETCH(c) do { \
c = ONIGENC_MBC_TO_CODE(enc, p, end); \
pfetch_prev = p; \
p += ONIGENC_MBC_ENC_LEN(enc, p); \
} while (0)
#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
static UChar*
strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
int capa)
{
UChar* r;
if (dest)
r = (UChar* )xrealloc(dest, capa + 1);
else
r = (UChar* )xmalloc(capa + 1);
CHECK_NULL_RETURN(r);
onig_strcpy(r + (dest_end - dest), src, src_end);
return r;
}
/* dest on static area */
static UChar*
strcat_capa_from_static(UChar* dest, UChar* dest_end,
const UChar* src, const UChar* src_end, int capa)
{
UChar* r;
r = (UChar* )xmalloc(capa + 1);
CHECK_NULL_RETURN(r);
onig_strcpy(r, dest, dest_end);
onig_strcpy(r + (dest_end - dest), src, src_end);
return r;
}
#ifdef USE_NAMED_GROUP
#define INIT_NAME_BACKREFS_ALLOC_NUM 8
typedef struct {
UChar* name;
int name_len; /* byte length */
int back_num; /* number of backrefs */
int back_alloc;
int back_ref1;
int* back_refs;
} NameEntry;
#ifdef USE_ST_HASH_TABLE
#include "ruby/st.h"
typedef struct {
unsigned char* s;
unsigned char* end;
} st_strend_key;
static int strend_cmp(st_strend_key*, st_strend_key*);
static int strend_hash(st_strend_key*);
static struct st_hash_type type_strend_hash = {
strend_cmp,
strend_hash,
};
extern hash_table_type*
onig_st_init_strend_table_with_size(int size)
{
return (hash_table_type* )onig_st_init_table_with_size(&type_strend_hash,
size);
}
extern int
onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
const UChar* end_key, hash_data_type *value)
{
st_strend_key key;
key.s = (unsigned char* )str_key;
key.end = (unsigned char* )end_key;
return onig_st_lookup(table, (st_data_t )(&key), value);
}
extern int
onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
const UChar* end_key, hash_data_type value)
{
st_strend_key* key;
int result;
key = (st_strend_key* )xmalloc(sizeof(st_strend_key));
key->s = (unsigned char* )str_key;
key->end = (unsigned char* )end_key;
result = onig_st_insert(table, (st_data_t )key, value);
if (result) {
xfree(key);
}
return result;
}
static int
strend_cmp(st_strend_key* x, st_strend_key* y)
{
unsigned char *p, *q;
int c;
if ((x->end - x->s) != (y->end - y->s))
return 1;
p = x->s;
q = y->s;
while (p < x->end) {
c = (int )*p - (int )*q;
if (c != 0) return c;
p++; q++;
}
return 0;
}
static int
strend_hash(st_strend_key* x)
{
int val;
unsigned char *p;
val = 0;
p = x->s;
while (p < x->end) {
val = val * 997 + (int )*p++;
}
return val + (val >> 5);
}
typedef st_table NameTable;
typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
#define NAMEBUF_SIZE 24
#define NAMEBUF_SIZE_1 25
#ifdef ONIG_DEBUG
static int
i_print_name_entry(UChar* key, NameEntry* e, void* arg)
{
int i;
FILE* fp = (FILE* )arg;
fprintf(fp, "%s: ", e->name);
if (e->back_num == 0)
fputs("-", fp);
else if (e->back_num == 1)
fprintf(fp, "%d", e->back_ref1);
else {
for (i = 0; i < e->back_num; i++) {
if (i > 0) fprintf(fp, ", ");
fprintf(fp, "%d", e->back_refs[i]);
}
}
fputs("\n", fp);
return ST_CONTINUE;
}
extern int
onig_print_names(FILE* fp, regex_t* reg)
{
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) {
fprintf(fp, "name table\n");
onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
fputs("\n", fp);
}
return 0;
}
#endif
static int
i_free_name_entry(UChar* key, NameEntry* e, void* arg)
{
xfree(e->name);
if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
xfree(key);
xfree(e);
return ST_DELETE;
}
static int
names_clear(regex_t* reg)
{
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) {
onig_st_foreach(t, i_free_name_entry, 0);
}
return 0;
}
extern int
onig_names_free(regex_t* reg)
{
int r;
NameTable* t;
r = names_clear(reg);
if (r) return r;
t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) onig_st_free_table(t);
reg->name_table = (void* )NULL;
return 0;
}
static NameEntry*
name_find(regex_t* reg, const UChar* name, const UChar* name_end)
{
NameEntry* e;
NameTable* t = (NameTable* )reg->name_table;
e = (NameEntry* )NULL;
if (IS_NOT_NULL(t)) {
onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
}
return e;
}
typedef struct {
int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
regex_t* reg;
void* arg;
int ret;
OnigEncoding enc;
} INamesArg;
static int
i_names(UChar* key, NameEntry* e, INamesArg* arg)
{
int r = (*(arg->func))(e->name,
e->name + e->name_len,
e->back_num,
(e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
arg->reg, arg->arg);
if (r != 0) {
arg->ret = r;
return ST_STOP;
}
return ST_CONTINUE;
}
extern int
onig_foreach_name(regex_t* reg,
int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
{
INamesArg narg;
NameTable* t = (NameTable* )reg->name_table;
narg.ret = 0;
if (IS_NOT_NULL(t)) {
narg.func = func;
narg.reg = reg;
narg.arg = arg;
narg.enc = reg->enc; /* should be pattern encoding. */
onig_st_foreach(t, i_names, (HashDataType )&narg);
}
return narg.ret;
}
static int
i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map)
{
int i;
if (e->back_num > 1) {
for (i = 0; i < e->back_num; i++) {
e->back_refs[i] = map[e->back_refs[i]].new_val;
}
}
else if (e->back_num == 1) {
e->back_ref1 = map[e->back_ref1].new_val;
}
return ST_CONTINUE;
}
extern int
onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
{
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) {
onig_st_foreach(t, i_renumber_name, (HashDataType )map);
}
return 0;
}
extern int
onig_number_of_names(regex_t* reg)
{
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t))
return t->num_entries;
else
return 0;
}
#else /* USE_ST_HASH_TABLE */
#define INIT_NAMES_ALLOC_NUM 8
typedef struct {
NameEntry* e;
int num;
int alloc;
} NameTable;
#ifdef ONIG_DEBUG
extern int
onig_print_names(FILE* fp, regex_t* reg)
{
int i, j;
NameEntry* e;
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t) && t->num > 0) {
fprintf(fp, "name table\n");
for (i = 0; i < t->num; i++) {
e = &(t->e[i]);
fprintf(fp, "%s: ", e->name);
if (e->back_num == 0) {
fputs("-", fp);
}
else if (e->back_num == 1) {
fprintf(fp, "%d", e->back_ref1);
}
else {
for (j = 0; j < e->back_num; j++) {
if (j > 0) fprintf(fp, ", ");
fprintf(fp, "%d", e->back_refs[j]);
}
}
fputs("\n", fp);
}
fputs("\n", fp);
}
return 0;
}
#endif
static int
names_clear(regex_t* reg)
{
int i;
NameEntry* e;
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) {
for (i = 0; i < t->num; i++) {
e = &(t->e[i]);
if (IS_NOT_NULL(e->name)) {
xfree(e->name);
e->name = NULL;
e->name_len = 0;
e->back_num = 0;
e->back_alloc = 0;
if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
e->back_refs = (int* )NULL;
}
}
if (IS_NOT_NULL(t->e)) {
xfree(t->e);
t->e = NULL;
}
t->num = 0;
}
return 0;
}
extern int
onig_names_free(regex_t* reg)
{
int r;
NameTable* t;
r = names_clear(reg);
if (r) return r;
t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) xfree(t);
reg->name_table = NULL;
return 0;
}
static NameEntry*
name_find(regex_t* reg, UChar* name, UChar* name_end)
{
int i, len;
NameEntry* e;
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) {
len = name_end - name;
for (i = 0; i < t->num; i++) {
e = &(t->e[i]);
if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
return e;
}
}
return (NameEntry* )NULL;
}
extern int
onig_foreach_name(regex_t* reg,
int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
{
int i, r;
NameEntry* e;
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) {
for (i = 0; i < t->num; i++) {
e = &(t->e[i]);
r = (*func)(e->name, e->name + e->name_len, e->back_num,
(e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
reg, arg);
if (r != 0) return r;
}
}
return 0;
}
extern int
onig_number_of_names(regex_t* reg)
{
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t))
return t->num;
else
return 0;
}
#endif /* else USE_ST_HASH_TABLE */
static int
name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
{
int alloc;
NameEntry* e;
NameTable* t = (NameTable* )reg->name_table;
if (name_end - name <= 0)
return ONIGERR_EMPTY_GROUP_NAME;
e = name_find(reg, name, name_end);
if (IS_NULL(e)) {
#ifdef USE_ST_HASH_TABLE
if (IS_NULL(t)) {
t = onig_st_init_strend_table_with_size(5);
reg->name_table = (void* )t;
}
e = (NameEntry* )xmalloc(sizeof(NameEntry));
CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY);
e->name = strdup_with_null(reg->enc, name, name_end);
if (IS_NULL(e->name)) return ONIGERR_MEMORY;
onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
(HashDataType )e);
e->name_len = name_end - name;
e->back_num = 0;
e->back_alloc = 0;
e->back_refs = (int* )NULL;
#else
if (IS_NULL(t)) {
alloc = INIT_NAMES_ALLOC_NUM;
t = (NameTable* )xmalloc(sizeof(NameTable));
CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY);
|