2 files changed, 307 insertions, 12 deletions
diff --git a/openlmi-mof-register b/openlmi-mof-register
index 6a0c154..0e31eb7 100755
--- a/openlmi-mof-register
+++ b/openlmi-mof-register
@@ -30,6 +30,7 @@ import re
 import sqlite3
 from tempfile import NamedTemporaryFile
 from shutil import copyfile
+from lmi.base.mofparse import MOFParser
 
 global PEGASUS_REPOSITORY
 global DEFAULT_NAMESPACE
@@ -309,17 +310,8 @@ def db_get_registrations(cursor, mofs, reg):
 
 def parse_includes(mofs, namespace):
     # Get list of all files included from mofs, including the original mofs
-    # Misuse mofcomp --dry-run
-    out = subprocess.check_output(["/usr/bin/mofcomp", "-v", "-d", "-n", namespace]
-                + mofs)
-
-    final_mofs = []
-    include = re.compile("^Compiling file (.*)")
-    for line in out.split("\n"):
-        match = include.match(line)
-        if match:
-            final_mofs.append(match.group(1))
-    return final_mofs
+    parser = MOFParser()
+    return parser.parse_includes(mofs)
 
 def db_register(mofs, reg, version, namespace, cimom):
     db = db_init()
@@ -501,7 +493,7 @@ def main():
     cmd = args[0]
     if cmd == "register" or cmd == "unregister":
         if (just_mofs and (len(args) < 2)) or ((not just_mofs) and (len(args) < 3)):
-            sys.stderr.write("Not enough parameters.\n" )
+            sys.stderr.write("Not enough parameters.\n")
             usage()
             sys.exit(1)
         if (not just_mofs) and (version is None):
diff --git a/src/python/lmi/base/mofparse.py b/src/python/lmi/base/mofparse.py
new file mode 100644
index 0000000..e51c8be
--- /dev/null
+++ b/src/python/lmi/base/mofparse.py
@@ -0,0 +1,303 @@
+# Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+#
+# Authors: Jan Safranek <jsafrane@redhat.com>
+# -*- coding: utf-8 -*-
+"""
+Simple parser of MOF files. The only thing that is parsed out is list of
+included files. This list is composed recursively, i.e. a MOF file can
+include another MOF file, which can then include another one.
+"""
+# Based on pywbem.mof_compiler.py, (C) Copyright 2006-2007 Novell, Inc.
+
+from pywbem import lex
+from pywbem import yacc
+from pywbem.lex import TOKEN
+import os
+
+class MOFLexer:
+    """ Lexer for MOF files. """
+    def __init__(self, **kwargs):
+        self.lexer = lex.lex(module=self, **kwargs)
+
+    reserved = {
+        'pragma':'PRAGMA',
+        'false': 'FALSE',
+        'true': 'TRUE',
+        'null': 'NULL'
+        }
+
+    tokens = reserved.values() + [
+            'IDENTIFIER',
+            'stringValue',
+            'floatValue',
+            'charValue',
+            'binaryValue',
+            'octalValue',
+            'decimalValue',
+            'hexValue',
+        ]
+
+    literals = '#(){};[],$:='
+
+    # UTF-8 (from Unicode 4.0.0 standard):
+    # Table 3-6. Well-Formed UTF-8 Byte Sequences Code Points
+    # 1st Byte 2nd Byte 3rd Byte 4th Byte
+    # U+0000..U+007F     00..7F
+    # U+0080..U+07FF     C2..DF   80..BF
+    # U+0800..U+0FFF     E0       A0..BF   80..BF
+    # U+1000..U+CFFF     E1..EC   80..BF   80..BF
+    # U+D000..U+D7FF     ED       80..9F   80..BF
+    # U+E000..U+FFFF     EE..EF   80..BF   80..BF
+    # U+10000..U+3FFFF   F0       90..BF   80..BF   80..BF
+    # U+40000..U+FFFFF   F1..F3   80..BF   80..BF   80..BF
+    # U+100000..U+10FFFF F4       80..8F   80..BF   80..BF
+
+    utf8_2 = r'[\xC2-\xDF][\x80-\xBF]'
+    utf8_3_1 = r'\xE0[\xA0-\xBF][\x80-\xBF]'
+    utf8_3_2 = r'[\xE1-\xEC][\x80-\xBF][\x80-\xBF]'
+    utf8_3_3 = r'\xED[\x80-\x9F][\x80-\xBF]'
+    utf8_3_4 = r'[\xEE-\xEF][\x80-\xBF][\x80-\xBF]'
+    utf8_4_1 = r'\xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]'
+    utf8_4_2 = r'[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]'
+    utf8_4_3 = r'\xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]'
+
+    utf8Char = r'(%s)|(%s)|(%s)|(%s)|(%s)|(%s)|(%s)|(%s)' % (utf8_2, utf8_3_1,
+            utf8_3_2, utf8_3_3, utf8_3_4, utf8_4_1, utf8_4_2, utf8_4_3)
+
+    def t_COMMENT(self, t):
+        r'//.*'
+        pass
+
+    def t_MCOMMENT(self, t):
+        r'/\*(.|\n)*?\*/'
+        t.lineno += t.value.count('\n')
+
+
+    t_binaryValue = r'[+-]?[01]+[bB]'
+    t_octalValue = r'[+-]?0[0-7]+'
+    t_decimalValue = r'[+-]?([1-9][0-9]*|0)'
+    t_hexValue = r'[+-]?0[xX][0-9a-fA-F]+'
+    t_floatValue = r'[+-]?[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?'
+
+    simpleEscape = r"""[bfnrt'"\\]"""
+    hexEscape = r'x[0-9a-fA-F]{1,4}'
+    escapeSequence = r'[\\]((%s)|(%s))' % (simpleEscape, hexEscape)
+    cChar = r"[^'\\\n\r]|(%s)" % escapeSequence
+    sChar = r'[^"\\\n\r]|(%s)' % escapeSequence
+    charValue = r"'%s'" % cChar
+
+    t_stringValue = r'"(%s)*"' % sChar
+
+    identifier_re = r'([a-zA-Z_]|(%s))([0-9a-zA-Z_]|(%s))*' % (utf8Char, utf8Char)
+
+    @TOKEN(identifier_re)
+    def t_IDENTIFIER(self, t):
+        t.type = self.reserved.get(t.value.lower(), 'IDENTIFIER')  # check for reserved word
+        return t
+
+    # Define a rule so we can track line numbers
+    def t_newline(self, t):
+        r'\n+'
+        t.lexer.lineno += len(t.value)
+        t.lexer.linestart = t.lexpos
+
+    t_ignore = ' \r\t'
+
+    # Error handling rule
+    def t_error(self, t):
+        msg = "Illegal character '%s' " % t.value[0]
+        msg += "Line %d" % (t.lineno)
+        t.lexer.parser.log(msg)
+        t.lexer.skip(1)
+
+class MOFParseError(ValueError):
+    pass
+
+class MOFParser:
+    """ Parser of '#pragma include' directivers in MOF files."""
+    tokens = MOFLexer.tokens
+
+    def __init__(self, **kwargs):
+        self.lexer = MOFLexer()
+        self.parser = yacc.yacc(module=self, **kwargs)
+        self.files = []
+
+    def p_error(self, p):
+        ex = MOFParseError('Parse error at line %d' % (p.lineno))
+        if p is None:
+            ex.args = ('Unexpected end of file',)
+            raise ex
+        ex.file = self.filename
+        ex.lineno = p.lineno
+        raise ex
+
+
+    def p_mofSpecification(self, p):
+        """mof : mofItemList"""
+
+    def p_mofItemList(self, p):
+        """mofItemList : empty
+                         | mofItemList mofItem
+                       """
+
+    def p_mofItem(self, p):
+        """mofItem : pragma
+                     | IDENTIFIER
+                     | literal
+                     | value
+                     """
+
+
+    def p_pragma(self, p):
+        """pragma : '#' PRAGMA pragmaName '(' pragmaParameter ')'"""
+        directive = p[3].lower()
+        param = p[5]
+        if directive == 'include':
+            fname = param
+            fname = os.path.dirname(self.filename) + '/' + fname
+            self._parse_file(fname)
+
+    def p_pragmaName(self, p):
+        """pragmaName : identifier"""
+        p[0] = p[1]
+
+    def p_pragmaParameter(self, p):
+        """pragmaParameter : stringValue"""
+        p[0] = self._fixStringValue(p[1])
+
+    def _fixStringValue(self, s):
+        s = s[1:-1]
+        rv = ''
+        esc = False
+        i = -1
+        while i < len(s) - 1:
+            i += 1
+            ch = s[i]
+            if ch == '\\' and not esc:
+                esc = True
+                continue
+            if not esc:
+                rv += ch
+                continue
+
+            if ch == '"'   : rv += '"'
+            elif ch == 'n' : rv += '\n'
+            elif ch == 't' : rv += '\t'
+            elif ch == 'b' : rv += '\b'
+            elif ch == 'f' : rv += '\f'
+            elif ch == 'r' : rv += '\r'
+            elif ch == '\\': rv += '\\'
+            elif ch in ['x', 'X']:
+                hexc = 0
+                j = 0
+                i += 1
+                while j < 4:
+                    c = s[i + j];
+                    c = c.upper()
+                    if not c.isdigit() and not c in 'ABCDEF':
+                        break;
+                    hexc <<= 4
+                    if c.isdigit():
+                        hexc |= ord(c) - ord('0')
+                    else:
+                        hexc |= ord(c) - ord('A') + 0XA
+                    j += 1
+                rv += chr(hexc)
+                i += j - 1
+
+            esc = False
+
+        return rv
+
+    def p_value(self, p):
+        """value : integerValue
+                 | floatValue
+                 | charValue
+                 | stringValue
+                 | booleanValue
+                 | nullValue
+                 """
+
+    def p_literal(self, p):
+        """literal : '('
+                  | ')'
+                  | '{'
+                  | '}'
+                  | ';'
+                  | '['
+                  | ']'
+                  | ','
+                  | '$'
+                  | ':'
+                  | '='
+                  """
+
+    def p_integerValue(self, p):
+        """integerValue : binaryValue
+                        | octalValue
+                        | decimalValue
+                        | hexValue
+                        """
+
+    def p_booleanValue(self, p):
+        """booleanValue : FALSE
+                        | TRUE
+                        """
+    def p_nullValue(self, p):
+        """nullValue : NULL"""
+
+    def p_identifier(self, p):
+        """identifier : IDENTIFIER
+                      """
+        p[0] = p[1]
+
+    def p_empty(self, p):
+        'empty :'
+
+
+    def _parse_file(self, fname):
+        f = open(fname, 'r')
+        mof = f.read()
+        f.close()
+
+        self.files.append(fname)
+
+        old_filename = self.filename
+        self.filename = fname
+
+        # we must use fresh lexer so the old one can continue with parsing the
+        # old file
+        lex = self.lexer.lexer.clone()
+        lex.parser = self.parser
+        self.parser.parse(mof, lexer=lex)
+        self.filename = old_filename
+
+
+    def parse_includes(self, fnames):
+        """
+        Parse given MOF files and return array with all parsed files,
+        including the included ones.
+        """
+        self.filename = '__main__'
+        self.files = []
+
+        for fname in fnames:
+            if fname[0] != '/':
+                fname = os.path.curdir + '/' + fname
+            self._parse_file(fname)
+
+        return self.files