# Copyright (C) 2012 Red Hat, Inc. All rights reserved. # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Authors: Jan Safranek # -*- coding: utf-8 -*- """ Simple parser of MOF files. The only thing that is parsed out is list of included files. This list is composed recursively, i.e. a MOF file can include another MOF file, which can then include another one. """ # Based on pywbem.mof_compiler.py, (C) Copyright 2006-2007 Novell, Inc. from pywbem import lex from pywbem import yacc from pywbem.lex import TOKEN import os class MOFLexer: """ Lexer for MOF files. """ def __init__(self, **kwargs): self.lexer = lex.lex(module=self, **kwargs) reserved = { 'pragma':'PRAGMA', 'false': 'FALSE', 'true': 'TRUE', 'null': 'NULL' } tokens = reserved.values() + [ 'IDENTIFIER', 'stringValue', 'floatValue', 'charValue', 'binaryValue', 'octalValue', 'decimalValue', 'hexValue', ] literals = '#(){};[],$:=' # UTF-8 (from Unicode 4.0.0 standard): # Table 3-6. Well-Formed UTF-8 Byte Sequences Code Points # 1st Byte 2nd Byte 3rd Byte 4th Byte # U+0000..U+007F 00..7F # U+0080..U+07FF C2..DF 80..BF # U+0800..U+0FFF E0 A0..BF 80..BF # U+1000..U+CFFF E1..EC 80..BF 80..BF # U+D000..U+D7FF ED 80..9F 80..BF # U+E000..U+FFFF EE..EF 80..BF 80..BF # U+10000..U+3FFFF F0 90..BF 80..BF 80..BF # U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF # U+100000..U+10FFFF F4 80..8F 80..BF 80..BF utf8_2 = r'[\xC2-\xDF][\x80-\xBF]' utf8_3_1 = r'\xE0[\xA0-\xBF][\x80-\xBF]' utf8_3_2 = r'[\xE1-\xEC][\x80-\xBF][\x80-\xBF]' utf8_3_3 = r'\xED[\x80-\x9F][\x80-\xBF]' utf8_3_4 = r'[\xEE-\xEF][\x80-\xBF][\x80-\xBF]' utf8_4_1 = r'\xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]' utf8_4_2 = r'[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]' utf8_4_3 = r'\xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]' utf8Char = r'(%s)|(%s)|(%s)|(%s)|(%s)|(%s)|(%s)|(%s)' % (utf8_2, utf8_3_1, utf8_3_2, utf8_3_3, utf8_3_4, utf8_4_1, utf8_4_2, utf8_4_3) def t_COMMENT(self, t): r'//.*' pass def t_MCOMMENT(self, t): r'/\*(.|\n)*?\*/' t.lineno += t.value.count('\n') t_binaryValue = r'[+-]?[01]+[bB]' t_octalValue = r'[+-]?0[0-7]+' t_decimalValue = r'[+-]?([1-9][0-9]*|0)' t_hexValue = r'[+-]?0[xX][0-9a-fA-F]+' t_floatValue = r'[+-]?[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?' simpleEscape = r"""[bfnrt'"\\]""" hexEscape = r'x[0-9a-fA-F]{1,4}' escapeSequence = r'[\\]((%s)|(%s))' % (simpleEscape, hexEscape) cChar = r"[^'\\\n\r]|(%s)" % escapeSequence sChar = r'[^"\\\n\r]|(%s)' % escapeSequence charValue = r"'%s'" % cChar t_stringValue = r'"(%s)*"' % sChar identifier_re = r'([a-zA-Z_]|(%s))([0-9a-zA-Z_]|(%s))*' % (utf8Char, utf8Char) @TOKEN(identifier_re) def t_IDENTIFIER(self, t): t.type = self.reserved.get(t.value.lower(), 'IDENTIFIER') # check for reserved word return t # Define a rule so we can track line numbers def t_newline(self, t): r'\n+' t.lexer.lineno += len(t.value) t.lexer.linestart = t.lexpos t_ignore = ' \r\t' # Error handling rule def t_error(self, t): msg = "Illegal character '%s' " % t.value[0] msg += "Line %d" % (t.lineno) t.lexer.parser.log(msg) t.lexer.skip(1) class MOFParseError(ValueError): pass class MOFParser: """ Parser of '#pragma include' directivers in MOF files.""" tokens = MOFLexer.tokens def __init__(self, **kwargs): self.lexer = MOFLexer() self.parser = yacc.yacc(module=self, **kwargs) self.files = [] def p_error(self, p): ex = MOFParseError('Parse error at line %d' % (p.lineno)) if p is None: ex.args = ('Unexpected end of file',) raise ex ex.file = self.filename ex.lineno = p.lineno raise ex def p_mofSpecification(self, p): """mof : mofItemList""" def p_mofItemList(self, p): """mofItemList : empty | mofItemList mofItem """ def p_mofItem(self, p): """mofItem : pragma | IDENTIFIER | literal | value """ def p_pragma(self, p): """pragma : '#' PRAGMA pragmaName '(' pragmaParameter ')'""" directive = p[3].lower() param = p[5] if directive == 'include': fname = param fname = os.path.dirname(self.filename) + '/' + fname self._parse_file(fname) def p_pragmaName(self, p): """pragmaName : identifier""" p[0] = p[1] def p_pragmaParameter(self, p): """pragmaParameter : stringValue""" p[0] = self._fixStringValue(p[1]) def _fixStringValue(self, s): s = s[1:-1] rv = '' esc = False i = -1 while i < len(s) - 1: i += 1 ch = s[i] if ch == '\\' and not esc: esc = True continue if not esc: rv += ch continue if ch == '"' : rv += '"' elif ch == 'n' : rv += '\n' elif ch == 't' : rv += '\t' elif ch == 'b' : rv += '\b' elif ch == 'f' : rv += '\f' elif ch == 'r' : rv += '\r' elif ch == '\\': rv += '\\' elif ch in ['x', 'X']: hexc = 0 j = 0 i += 1 while j < 4: c = s[i + j]; c = c.upper() if not c.isdigit() and not c in 'ABCDEF': break; hexc <<= 4 if c.isdigit(): hexc |= ord(c) - ord('0') else: hexc |= ord(c) - ord('A') + 0XA j += 1 rv += chr(hexc) i += j - 1 esc = False return rv def p_value(self, p): """value : integerValue | floatValue | charValue | stringValue | booleanValue | nullValue """ def p_literal(self, p): """literal : '(' | ')' | '{' | '}' | ';' | '[' | ']' | ',' | '$' | ':' | '=' """ def p_integerValue(self, p): """integerValue : binaryValue | octalValue | decimalValue | hexValue """ def p_booleanValue(self, p): """booleanValue : FALSE | TRUE """ def p_nullValue(self, p): """nullValue : NULL""" def p_identifier(self, p): """identifier : IDENTIFIER """ p[0] = p[1] def p_empty(self, p): 'empty :' def _parse_file(self, fname): f = open(fname, 'r') mof = f.read() f.close() self.files.append(fname) old_filename = self.filename self.filename = fname # we must use fresh lexer so the old one can continue with parsing the # old file lex = self.lexer.lexer.clone() lex.parser = self.parser self.parser.parse(mof, lexer=lex) self.filename = old_filename def parse_includes(self, fnames): """ Parse given MOF files and return array with all parsed files, including the included ones. """ self.filename = '__main__' self.files = [] for fname in fnames: if fname[0] != '/': fname = os.path.curdir + '/' + fname self._parse_file(fname) return self.files