summaryrefslogtreecommitdiffstats
path: root/lib/base/lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/base/lexer.cpp')
-rw-r--r--lib/base/lexer.cpp978
1 files changed, 978 insertions, 0 deletions
diff --git a/lib/base/lexer.cpp b/lib/base/lexer.cpp
new file mode 100644
index 00000000..e521c51f
--- /dev/null
+++ b/lib/base/lexer.cpp
@@ -0,0 +1,978 @@
+/** BEGIN COPYRIGHT BLOCK
+ * Copyright 2001 Sun Microsystems, Inc.
+ * Portions copyright 1999, 2001-2003 Netscape Communications Corporation.
+ * All rights reserved.
+ * END COPYRIGHT BLOCK **/
+
+/*
+ * Description (lexer.c)
+ *
+ * This module provides functions to assist parsers in lexical
+ * analysis. The idea is to provide a slightly higher-level
+ * interface than that of ctype.h.
+ */
+
+#include "netsite.h"
+#include "base/nsassert.h"
+
+#include "lexer_pvt.h"
+#include "base/lexer.h"
+
+/*
+ * Description (lex_class_check)
+ *
+ * This function checks whether a given character belongs to one or
+ * specified character classes.
+ *
+ * Arguments:
+ *
+ * chtab - character class table pointer
+ * code - character code to be tested
+ * cbits - bit mask of character classes
+ *
+ * Returns:
+ *
+ * The return value is zero if the code is not in any of the character
+ * classes. It is non-zero, if the code is in at least one of the
+ * classes.
+ */
+NSAPI_PUBLIC
+int lex_class_check(void * chtab, char code, unsigned long cbits)
+{
+ LEXClassTab_t * lct; /* character class table pointer */
+ unsigned char * bp; /* bit vector pointer */
+ int rv = 0; /* return value */
+ int i; /* loop index */
+
+ lct = (LEXClassTab_t *)chtab;
+
+ bp = lct->lct_bv + code * lct->lct_bvbytes;
+
+ for (i = 0; i < lct->lct_bvbytes; ++i) {
+ if (*bp++ & cbits) {
+ rv = 1;
+ break;
+ }
+ cbits >>= 8;
+ }
+
+ return rv;
+}
+
+/*
+ * Description (lex_class_create)
+ *
+ * This function creates a new character class table. A
+ * character class table is used to map a character code to a
+ * set of character classes. The mapping for a given character
+ * is expressed as a bit vector, where each bit indicates the
+ * membership of that character in one of the character classes.
+ *
+ * Arguments:
+ *
+ * classc - the number of character classes being defined
+ * classv - pointers to null-terminated strings containing
+ * the character codes in each character class
+ * pchtab - indicates where to store a returned handle for
+ * the character class table
+ *
+ * Returns:
+ *
+ * If successful, the return value is the number of character
+ * classes specified (classc), and a handle for the created table
+ * is returned through pchtab.
+ *
+ * Usage Notes:
+ *
+ * Null (\000) can never be in any character classes, since it
+ * marks the end of the classv[] strings.
+ *
+ * classv[] can included NULL pointers, in which case bits will be
+ * allocated for corresponding empty character classes.
+ */
+NSAPI_PUBLIC
+int lex_class_create(int classc, char * classv[], void **pchtab)
+{
+ int ncodes = 128; /* number of character encodings */
+ int bvbytes; /* bytes per bit vector */
+ LEXClassTab_t * ct; /* class table pointer */
+ unsigned char * bp; /* bit vector pointer */
+ char * cp; /* class string pointer */
+ int bitmask; /* class bit mask */
+ int bnum; /* byte number in bit vector */
+ int ci; /* character index */
+ int i; /* class index */
+
+ /* Get number of bytes per bit vector */
+ NS_ASSERT(classc > 0);
+ bvbytes = (classc + 7) >> 3;
+
+ /* Allocate the character class table */
+ ct = (LEXClassTab_t *)calloc(1, sizeof(LEXClassTab_t) + ncodes * bvbytes);
+ if (ct == NULL) {
+
+ /* Error - insufficient memory */
+ return LEXERR_MALLOC;
+ }
+
+ /* Initialize the class table */
+ ct->lct_classc = classc;
+ ct->lct_bvbytes = bvbytes;
+ ct->lct_bv = (unsigned char *)(ct + 1);
+
+ /* Initialize the bit vectors */
+ for (i = 0; i < classc; ++i) {
+
+ cp = classv[i];
+ if (cp != NULL) {
+
+ bitmask = 1 << (i & 7);
+ bnum = i >> 7;
+
+ while ((ci = *cp++) != 0) {
+ bp = ct->lct_bv + ci + bnum;
+ *bp |= bitmask;
+ }
+ }
+ }
+
+ /* Return pointer to table */
+ NS_ASSERT(pchtab != NULL);
+ *pchtab = (void *)ct;
+
+ return classc;
+}
+
+NSAPI_PUBLIC
+void lex_class_destroy(void * chtab)
+{
+ FREE((void *)chtab);
+}
+
+NSAPI_PUBLIC
+LEXStream_t * lex_stream_create(LEXStreamGet_t strmget, void * strmid,
+ char * buf, int buflen)
+{
+ LEXStream_t * lst; /* stream structure pointer */
+
+ /* Allocate the stream structure */
+ lst = (LEXStream_t *)MALLOC(sizeof(LEXStream_t));
+ if (lst == NULL) {
+ /* Error - insufficient memory */
+ return 0;
+ }
+
+ lst->lst_strmid = strmid;
+ lst->lst_get = strmget;
+
+ /*
+ * Allocate a buffer for the stream if there's a positive length
+ * but a NULL buffer pointer.
+ */
+ if ((buflen > 0) && (buf == NULL)) {
+
+ buf = (char *)MALLOC(buflen);
+ if (buf == NULL) {
+ FREE((void *)lst);
+ return 0;
+ }
+
+ /* Also initialize the current position and residual length */
+ lst->lst_cp = buf;
+ lst->lst_len = 0;
+ lst->lst_flags = LST_FREEBUF;
+ }
+
+ lst->lst_buf = buf;
+ lst->lst_buflen = buflen;
+
+ return lst;
+}
+
+NSAPI_PUBLIC
+void lex_stream_destroy(LEXStream_t * lst)
+{
+ if ((lst->lst_flags & LST_FREEBUF) && (lst->lst_buf != NULL)) {
+ FREE(lst->lst_buf);
+ }
+ FREE((void *)lst);
+}
+
+/*
+ * Description (lex_token_new)
+ *
+ * This function creates a new token object. A token object is
+ * used to accumulate text in an associated buffer. If the
+ * 'growlen' argument is specified as a value that is greater
+ * than zero, then the token buffer will be reallocated as
+ * necessary to accomodate more text. The initial size of
+ * the token buffer is given by 'initlen', which may be zero,
+ * and should be zero if lex_token_setbuf() is used.
+ *
+ * The token object is allocated from the memory pool given
+ * by the 'pool' argument. The default pool for the current
+ * thread is used if 'pool' is null.
+ *
+ * Arguments:
+ *
+ * pool - handle for memory pool to be used
+ * initlen - initial length of token buffer
+ * growlen - amount to grow a full token buffer
+ * token - pointer to returned token handle
+ *
+ * Returns:
+ *
+ * If successful, the function return value is zero and a handle
+ * for the new token is returned via 'token'. Otherwise a negative
+ * error code is returned.
+ */
+
+NSAPI_PUBLIC
+int lex_token_new(pool_handle_t * pool, int initlen, int growlen, void **token)
+{
+ LEXToken_t * lt; /* new token pointer */
+
+ /* Allocate the token structure */
+ if (pool) {
+ lt = (LEXToken_t *)pool_calloc(pool, 1, sizeof(LEXToken_t));
+ }
+ else {
+ lt = (LEXToken_t *)CALLOC(sizeof(LEXToken_t));
+ }
+ if (lt == NULL) {
+ /* Error - insufficient memory */
+ return LEXERR_MALLOC;
+ }
+
+ /* Save the memory pool handle for future allocations */
+ lt->lt_mempool = pool;
+
+ /* Allocate the initial token buffer if initlen > 0 */
+ if (initlen > 0) {
+ if (pool) {
+ lt->lt_buf = (char *)pool_malloc(pool, initlen);
+ }
+ else {
+ lt->lt_buf = (char *)MALLOC(initlen);
+ }
+ if (lt->lt_buf == NULL) {
+ /* Error - insufficient memory */
+ if (pool) {
+ pool_free(pool, (void *)lt);
+ }
+ else {
+ FREE((void *)lt);
+ }
+ return LEXERR_MALLOC;
+ }
+
+ lt->lt_initlen = initlen;
+ lt->lt_buflen = initlen;
+ lt->lt_buf[0] = 0;
+ }
+
+ if (growlen > 0) lt->lt_inclen = growlen;
+
+ NS_ASSERT(token != NULL);
+ *token = (void *)lt;
+
+ return 0;
+}
+
+/*
+ * Description (lex_token_start)
+ *
+ * This function discards any current contents of the token buffer
+ * associated with a specified token object, so that any new data
+ * appended to the token will start at the beginning of the token
+ * buffer. If there is no token buffer currently associated with
+ * the token, and the 'initlen' value specified to lex_token_new()
+ * was greater than zero, then a new token buffer is allocated.
+ * This function enables a token and optionally its token buffer
+ * to be reused.
+ *
+ * Arguments:
+ *
+ * token - handle for token object
+ *
+ * Returns:
+ *
+ * If successful, the function return value is zero. Otherwise
+ * a negative error code is returned.
+ */
+
+NSAPI_PUBLIC int
+lex_token_start(void * token)
+{
+ LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
+
+ /* Do we need to allocate a token buffer? */
+ if ((lt->lt_buf == NULL) && (lt->lt_initlen > 0)) {
+
+ /* Allocate the initial token buffer */
+ if (lt->lt_mempool) {
+ lt->lt_buf = (char *)pool_malloc(lt->lt_mempool, lt->lt_initlen);
+ }
+ else {
+ lt->lt_buf = (char *)MALLOC(lt->lt_initlen);
+ }
+ if (lt->lt_buf == NULL) {
+ /* Error - insufficient memory */
+ return LEXERR_MALLOC;
+ }
+ lt->lt_buflen = lt->lt_initlen;
+ }
+
+ lt->lt_len = 0;
+ lt->lt_buf[0] = 0;
+
+ return 0;
+}
+
+/*
+ * Description (lex_token_info)
+ *
+ * This function returns information about the token buffer currently
+ * associated with a token object. This includes a pointer to the
+ * token data, if any, the current length of the token data, and the
+ * current size of the token buffer.
+ *
+ * Arguments:
+ *
+ * token - handle for token object
+ * tdatalen - pointer to returned token data length
+ * (may be null)
+ * tbufflen - pointer to returned token buffer length
+ * (may be null)
+ *
+ * Returns:
+ *
+ * The function return value is a pointer to the beginning of the
+ * token data, or null if there is no token buffer associated with
+ * the token. The token data length and token buffer length are
+ * returned via 'tdatalen' and 'tbufflen', respectively.
+ */
+
+NSAPI_PUBLIC
+char * lex_token_info(void * token, int * tdatalen, int * tbufflen)
+{
+ LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
+
+ if (tdatalen) *tdatalen = lt->lt_len;
+ if (tbufflen) *tbufflen = lt->lt_buflen;
+
+ return lt->lt_buf;
+}
+
+/*
+ * Description (lex_token)
+ *
+ * This function returns a pointer to the current token buffer, if any.
+ * If the length of the token is also needed, use lex_token_info().
+ * This function would normally be used when the token is a
+ * null-terminated string. See also lex_token_take().
+ *
+ * Arguments:
+ *
+ * token - handle for token object
+ *
+ * Returns:
+ *
+ * A pointer to the beginning of the current token is returned.
+ * The pointer is null if no token buffer is currently associated
+ * with the token object.
+ */
+
+NSAPI_PUBLIC
+char * lex_token(void * token)
+{
+ LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
+
+ return lt->lt_buf;
+}
+
+/*
+ * Description (lex_token_destroy)
+ *
+ * This function destroys a specified token object. The memory
+ * associated with the token object and its token buffer, if any,
+ * is freed to whence it came. Note that token objects can be
+ * associated with a memory pool, and destroyed implicitly when
+ * the pool is destroyed via pool_destroy().
+ *
+ * Arguments:
+ *
+ * token - handle for token object
+ */
+
+NSAPI_PUBLIC
+void lex_token_destroy(void * token)
+{
+ LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
+
+ if (lt) {
+ if (lt->lt_mempool) {
+ if (lt->lt_buf) {
+ pool_free(lt->lt_mempool, (void *)(lt->lt_buf));
+ }
+ pool_free(lt->lt_mempool, (void *)lt);
+ }
+ else {
+ if (lt->lt_buf) {
+ FREE(lt->lt_buf);
+ }
+ FREE(lt);
+ }
+ }
+}
+
+/*
+ * Description (lex_token_get)
+ *
+ * This function returns a pointer to the current token buffer,
+ * leaving the token with no associated token buffer. The caller
+ * assumes ownership of the returned token buffer. The length
+ * of the token data and the length of the token buffer are returned
+ * if requested. Note that lex_token_take() performs a similar
+ * operation.
+ *
+ * Arguments:
+ *
+ * token - handle for token object
+ * tdatalen - pointer to returned token data length
+ * (may be null)
+ * tbufflen - pointer to returned token buffer length
+ * (may be null)
+ *
+ * Returns:
+ *
+ * The function return value is a pointer to the beginning of the
+ * token data, or null if there is no token buffer associated with
+ * the token. The token data length and token buffer length are
+ * returned via 'tdatalen' and 'tbufflen', respectively.
+ */
+
+NSAPI_PUBLIC
+char * lex_token_get(void * token, int * tdatalen, int * tbufflen)
+{
+ LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
+ char * tokenstr;
+
+ tokenstr = lt->lt_buf;
+ if (tdatalen) *tdatalen = lt->lt_len;
+ if (tbufflen) *tbufflen = lt->lt_buflen;
+
+ lt->lt_buf = NULL;
+ lt->lt_buflen = 0;
+ lt->lt_len = 0;
+
+ return tokenstr;
+}
+
+/*
+ * Description (lex_token_take)
+ *
+ * This function returns a pointer to the current token buffer,
+ * leaving the token with no associated token buffer. The caller
+ * assumes ownership of the returned token buffer. Note that
+ * lex_token_get() performs a similar operation, but returns more
+ * information.
+ *
+ * Arguments:
+ *
+ * token - handle for token object
+ *
+ * Returns:
+ *
+ * A pointer to the beginning of the current token is returned.
+ * The pointer is null if no token buffer is currently associated
+ * with the token object.
+ */
+
+NSAPI_PUBLIC
+char * lex_token_take(void * token)
+{
+ LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
+ char * tokenstr;
+
+ tokenstr = lt->lt_buf;
+
+ lt->lt_buf = NULL;
+ lt->lt_buflen = 0;
+ lt->lt_len = 0;
+
+ return tokenstr;
+}
+
+/*
+ * Description (lex_token_append)
+ *
+ * This function appends data to the end of a token. If 'growlen'
+ * was specified as a greater-than-zero value for lex_token_new(),
+ * then the token buffer may be reallocated to accomodate the
+ * new data if necessary. A null byte is maintained in the token
+ * buffer following the token data, but it is not included in the
+ * token data length.
+ *
+ * Arguments:
+ *
+ * token - handle for token object
+ * nbytes - number of bytes of new data
+ * src - pointer to new data
+ *
+ * Returns:
+ *
+ * If successful, the function return value is the new length of
+ * the token data. Otherwise a negative error code is returned.
+ */
+
+NSAPI_PUBLIC
+int lex_token_append(void * token, int nbytes, char * src)
+{
+ LEXToken_t * lt = (LEXToken_t *)token; /* token pointer */
+ int bufsize;
+ int length;
+
+ NS_ASSERT(nbytes >= 0);
+ NS_ASSERT((src != NULL) || (nbytes == 0));
+
+ if (nbytes > 0) {
+
+ bufsize = lt->lt_buflen;
+ length = lt->lt_len + nbytes;
+
+ if (length >= bufsize) {
+
+ while (length >= bufsize) {
+ bufsize += lt->lt_inclen;
+ }
+
+ if (lt->lt_mempool) {
+ if (lt->lt_buf) {
+ lt->lt_buf = (char *)pool_realloc(lt->lt_mempool,
+ lt->lt_buf, bufsize);
+ }
+ else {
+ lt->lt_buf = (char *)pool_malloc(lt->lt_mempool, bufsize);
+ }
+ }
+ else {
+ if (lt->lt_buf) {
+ lt->lt_buf = (char *)REALLOC(lt->lt_buf, bufsize);
+ }
+ else {
+ lt->lt_buf = (char *)MALLOC(bufsize);
+ }
+ }
+ }
+
+ if (lt->lt_buf) {
+
+ memcpy((void *)(lt->lt_buf + lt->lt_len), (void *)src, nbytes);
+ lt->lt_buf[length] = 0;
+ lt->lt_len = length;
+ lt->lt_buflen = bufsize;
+ }
+ else {
+ /* Error - insufficient memory */
+ return LEXERR_MALLOC;
+ }
+ }
+
+ return lt->lt_len;
+}
+
+NSAPI_PUBLIC
+int lex_next_char(LEXStream_t * lst, void * chtab, unsigned long cbits)
+{
+ LEXClassTab_t * lct; /* character class table pointer */
+ unsigned char * bp; /* bit vector pointer */
+ unsigned long bitmask; /* class bit mask temporary */
+ int rv; /* return value */
+ int i; /* loop index */
+
+ lct = (LEXClassTab_t *)chtab;
+
+ /* Go get more stream data if none left in the buffer */
+ if (lst->lst_len <= 0) {
+ rv = (*lst->lst_get)(lst);
+ if (rv <= 0) {
+ return rv;
+ }
+ }
+
+ /* Get the next character from the buffer */
+ rv = *lst->lst_cp;
+
+ bitmask = cbits;
+ bp = lct->lct_bv + rv * lct->lct_bvbytes;
+
+ for (i = 0; i < lct->lct_bvbytes; ++i) {
+ if (*bp++ & bitmask) {
+ /* Update the buffer pointer and length */
+ lst->lst_cp += 1;
+ lst->lst_len -= 1;
+ break;
+ }
+ bitmask >>= 8;
+ }
+
+ return rv;
+}
+
+NSAPI_PUBLIC
+int lex_scan_over(LEXStream_t * lst, void * chtab, unsigned long cbits,
+ void * token)
+{
+ LEXClassTab_t * lct; /* character class table pointer */
+ char * cp; /* current pointer in stream buffer */
+ unsigned char * bp; /* bit vector pointer */
+ unsigned long bitmask; /* class bit mask temporary */
+ int cv = 0; /* current character value */
+ int rv = 0; /* return value */
+ int slen; /* token segment length */
+ int done = 0; /* done indication */
+ int i; /* loop index */
+
+ lct = (LEXClassTab_t *)chtab;
+
+ while (!done) {
+
+ /* Go get more stream data if none left in the buffer */
+ if (lst->lst_len <= 0) {
+ rv = (*lst->lst_get)(lst);
+ if (rv <= 0) {
+ return rv;
+ }
+ }
+
+ slen = 0;
+ cp = lst->lst_cp;
+
+ while (slen < lst->lst_len) {
+ cv = *cp;
+ bitmask = cbits;
+ bp = lct->lct_bv + cv * lct->lct_bvbytes;
+ for (i = 0; i < lct->lct_bvbytes; ++i) {
+ if (*bp++ & bitmask) goto more_token;
+ bitmask >>= 8;
+ }
+
+ done = 1;
+ break;
+
+ more_token:
+ slen += 1;
+ cp += 1;
+ }
+
+ /* If the current segment is not empty, append it to the token */
+ if (slen > 0) {
+ rv = lex_token_append(token, slen, lst->lst_cp);
+ if (rv < 0) break;
+
+ /* Update the stream buffer pointer and length */
+ lst->lst_cp += slen;
+ lst->lst_len -= slen;
+ }
+ }
+
+ return ((rv < 0) ? rv : cv);
+}
+
+/*
+ * Description (lex_scan_string)
+ *
+ * This function parses a quoted string into the specified token.
+ * The current character in the LEX stream is taken to be the
+ * beginning quote character. The quote character may be included
+ * in the string by preceding it with a '\'. Any newline
+ * characters to be included in the string must also be preceded
+ * by '\'. The string is terminated by another occurrence of the
+ * quote character, or an unquoted newline, or EOF.
+ *
+ * Arguments:
+ *
+ * lst - pointer to LEX stream structure
+ * token - handle for token
+ * flags - bit flags (unused - must be zero)
+ *
+ * Returns:
+ *
+ * The terminating character is returned, or zero if EOF. The
+ * string is returned in the token, without the beginning and
+ * ending quote characters. An error is indicated by a negative
+ * return value.
+ */
+
+NSAPI_PUBLIC
+int lex_scan_string(LEXStream_t * lst, void * token, int flags)
+{
+ char * cp; /* current pointer in stream buffer */
+ int cv; /* current character value */
+ int rv; /* return value */
+ int slen; /* token segment length */
+ int done = 0; /* done indication */
+ int cquote = 0; /* character quote indication */
+ int qchar = -1; /* quote character */
+
+ while (!done) {
+
+ /* Go get more stream data if none left in the buffer */
+ if (lst->lst_len <= 0) {
+ rv = (*lst->lst_get)(lst);
+ if (rv <= 0) {
+ return rv;
+ }
+ }
+
+ slen = 0;
+ cp = lst->lst_cp;
+
+ while (slen < lst->lst_len) {
+
+ /* Get the next character */
+ cv = *cp;
+
+ /* Pick up the quote character if we don't have it yet */
+ if (qchar < 0) {
+ qchar = cv;
+
+ /* Don't include it in the string */
+ lst->lst_cp += 1;
+ lst->lst_len -= 1;
+ cp += 1;
+ continue;
+ }
+
+ /* cquote is 1 if the last character was '\' */
+ if (cquote == 0) {
+
+ /* Is this a string terminator? */
+ if ((cv == qchar) || (cv == '\n')) {
+
+ /* Append whatever we have to this point */
+ if (slen > 0) goto append_it;
+
+ /*
+ * If the terminator is the expected quote character,
+ * just skip it. If it's anything else, leave it as
+ * the current character.
+ */
+ if (cv == qchar) {
+ lst->lst_cp += 1;
+ lst->lst_len -= 1;
+ }
+
+ done = 1;
+ goto append_it;
+ }
+
+ /* Got the character quote character? */
+ if (cv == '\\') {
+
+ /* Append anything we have so far first */
+ if (slen > 0) goto append_it;
+
+ /* Then skip the character */
+ cquote = 1;
+ lst->lst_cp += 1;
+ lst->lst_len -= 1;
+ cp += 1;
+ continue;
+ }
+ }
+ else {
+
+ /* Include any character following '\' */
+ cquote = 0;
+ }
+
+ /* Include this character in the string */
+ slen += 1;
+ cp += 1;
+ }
+
+ append_it:
+
+ /* If the current segment is not empty, append it to the token */
+ if (slen > 0) {
+ rv = lex_token_append(token, slen, lst->lst_cp);
+ if (rv < 0) break;
+
+ /* Update the stream buffer pointer and length */
+ lst->lst_cp += slen;
+ lst->lst_len -= slen;
+ }
+ }
+
+ return ((rv < 0) ? rv : cv);
+}
+
+NSAPI_PUBLIC
+int lex_scan_to(LEXStream_t * lst, void * chtab, unsigned long cbits,
+ void * token)
+{
+ LEXClassTab_t * lct; /* character class table pointer */
+ unsigned char * bp; /* bit vector pointer */
+ char * cp; /* current pointer in stream buffer */
+ unsigned long bitmask; /* class bit mask temporary */
+ int cv = 0; /* current character value */
+ int rv = 0; /* return value */
+ int slen; /* token segment length */
+ int done = 0; /* done indication */
+ int i; /* loop index */
+
+ lct = (LEXClassTab_t *)chtab;
+
+ while (!done) {
+
+ /* Go get more stream data if none left in the buffer */
+ if (lst->lst_len <= 0) {
+ rv = (*lst->lst_get)(lst);
+ if (rv <= 0) {
+ return rv;
+ }
+ }
+
+ slen = 0;
+ cp = lst->lst_cp;
+
+ while (slen < lst->lst_len) {
+ cv = *cp;
+ bitmask = cbits;
+ bp = lct->lct_bv + cv * lct->lct_bvbytes;
+ for (i = 0; i < lct->lct_bvbytes; ++i) {
+ if (*bp++ & bitmask) {
+ done = 1;
+ goto append_it;
+ }
+ bitmask >>= 8;
+ }
+
+ slen += 1;
+ cp += 1;
+ }
+
+ append_it:
+
+ /* If the current segment is not empty, append it to the token */
+ if (slen > 0) {
+ rv = lex_token_append(token, slen, lst->lst_cp);
+ if (rv < 0) break;
+
+ /* Update the stream buffer pointer and length */
+ lst->lst_cp += slen;
+ lst->lst_len -= slen;
+ }
+ }
+
+ return ((rv < 0) ? rv : cv);
+}
+
+NSAPI_PUBLIC
+int lex_skip_over(LEXStream_t * lst, void * chtab, unsigned long cbits)
+{
+ LEXClassTab_t * lct; /* character class table pointer */
+ unsigned char * bp; /* bit vector pointer */
+ char * cp; /* current pointer in stream buffer */
+ unsigned long bitmask; /* class bit mask temporary */
+ int rv = 0; /* return value */
+ int slen; /* token segment length */
+ int done = 0; /* done indication */
+ int i; /* loop index */
+
+ lct = (LEXClassTab_t *)chtab;
+
+ while (!done) {
+
+ /* Go get more stream data if none left in the buffer */
+ if (lst->lst_len <= 0) {
+ rv = (*lst->lst_get)(lst);
+ if (rv <= 0) {
+ return rv;
+ }
+ }
+
+ slen = 0;
+ cp = lst->lst_cp;
+
+ while (slen < lst->lst_len) {
+ rv = *cp;
+ bitmask = cbits;
+ bp = lct->lct_bv + rv * lct->lct_bvbytes;
+ for (i = 0; i < lct->lct_bvbytes; ++i) {
+ if (*bp++ & bitmask) goto next_ch;
+ bitmask >>= 8;
+ }
+
+ done = 1;
+ break;
+
+ next_ch:
+ slen += 1;
+ cp += 1;
+ }
+
+ if (slen > 0) {
+ /* Update the stream buffer pointer and length */
+ lst->lst_cp += slen;
+ lst->lst_len -= slen;
+ }
+ }
+
+ return rv;
+}
+
+NSAPI_PUBLIC
+int lex_skip_to(LEXStream_t * lst, void * chtab, unsigned long cbits)
+{
+ LEXClassTab_t * lct; /* character class table pointer */
+ unsigned char * bp; /* bit vector pointer */
+ char * cp; /* current pointer in stream buffer */
+ unsigned long bitmask; /* class bit mask temporary */
+ int rv; /* return value */
+ int slen; /* token segment length */
+ int done = 0; /* done indication */
+ int i; /* loop index */
+
+ lct = (LEXClassTab_t *)chtab;
+
+ while (!done) {
+
+ /* Go get more stream data if none left in the buffer */
+ if (lst->lst_len <= 0) {
+ rv = (*lst->lst_get)(lst);
+ if (rv <= 0) {
+ return rv;
+ }
+ }
+
+ slen = 0;
+ cp = lst->lst_cp;
+
+ while (slen < lst->lst_len) {
+ rv = *cp;
+ bitmask = cbits;
+ bp = lct->lct_bv + rv * lct->lct_bvbytes;
+ for (i = 0; i < lct->lct_bvbytes; ++i) {
+ if (*bp++ & bitmask) {
+ done = 1;
+ goto update_it;
+ }
+ bitmask >>= 8;
+ }
+ slen += 1;
+ cp += 1;
+ }
+
+ update_it:
+ /* Update the stream buffer pointer and length */
+ if (slen > 0) {
+ lst->lst_cp += slen;
+ lst->lst_len -= slen;
+ }
+ }
+
+ return rv;
+}