/* * token.c -- tokenize strings, a la strtok(3) * * Copyright (C) 2007 Oracle. All rights reserved. * Copyright (C) 2007 Chuck Lever * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 0211-1301 USA * */ /* * We've constructed a simple string tokenizer that is better than * strtok(3) in several ways: * * 1. It doesn't interfere with ongoing tokenizations using strtok(3). * 2. It's re-entrant so we can nest tokenizations, if needed. * 3. It can handle double-quoted delimiters (needed for 'context="sd,fslj"'). * 4. It doesn't alter the string we're tokenizing, so it can work * on write-protected strings as well as writable strings. */ #include #include #include #include #include #include #include "token.h" struct tokenizer_state { char *pos; char delimiter; int error; }; static void find_next_nondelimiter(struct tokenizer_state *tstate) { while (*tstate->pos != '\0' && *tstate->pos == tstate->delimiter) tstate->pos++; } static size_t find_next_delimiter(struct tokenizer_state *tstate) { size_t len = 0; int quote_seen = 0; while (*tstate->pos != '\0') { if (*tstate->pos == '"') quote_seen ^= 1; if (!quote_seen && *tstate->pos == tstate->delimiter) break; len++; tstate->pos++; } /* did the string terminate before the close quote? */ if (quote_seen) { tstate->error = EINVAL; return 0; } return len; } /** * next_token - find the next token in a string and return it * @tstate: pointer to tokenizer context object * * Returns the next token found in the current string. * Returns NULL if there are no more tokens in the string, * or if an error occurs. * * Side effect: tstate is updated */ char *next_token(struct tokenizer_state *tstate) { char *token; size_t len; if (!tstate || !tstate->pos || tstate->error) return NULL; find_next_nondelimiter(tstate); if (*tstate->pos == '\0') goto fail; token = tstate->pos; len = find_next_delimiter(tstate); if (len) { token = strndup(token, len); if (token) return token; tstate->error = ENOMEM; } fail: tstate->pos = NULL; return NULL; /* no tokens found in this string */ } /** * init_tokenizer - return an initialized tokenizer context object * @string: pointer to C string * @delimiter: single character that delimits tokens in @string * * Returns an initialized tokenizer context object */ struct tokenizer_state *init_tokenizer(char *string, char delimiter) { struct tokenizer_state *tstate; tstate = malloc(sizeof(*tstate)); if (tstate) { tstate->pos = string; tstate->delimiter = delimiter; tstate->error = 0; } return tstate; } /** * tokenizer_error - digs error value out of tokenizer context * @tstate: pointer to tokenizer context object * */ int tokenizer_error(struct tokenizer_state *tstate) { return tstate ? tstate->error : 0; } /** * end_tokenizer - free a tokenizer context object * @tstate: pointer to tokenizer context object * */ void end_tokenizer(struct tokenizer_state *tstate) { free(tstate); }