diff options
Diffstat (limited to 'codegen/docextract.py')
-rw-r--r-- | codegen/docextract.py | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/codegen/docextract.py b/codegen/docextract.py new file mode 100644 index 0000000..e6c6505 --- /dev/null +++ b/codegen/docextract.py @@ -0,0 +1,185 @@ +# -*- Mode: Python; py-indent-offset: 4 -*- +'''Simple module for extracting GNOME style doc comments from C +sources, so I can use them for other purposes.''' + +import sys, os, string, re + +__all__ = ['extract'] + +class FunctionDoc: + def __init__(self): + self.name = None + self.params = [] + self.description = '' + self.ret = '' + def set_name(self, name): + self.name = name + def add_param(self, name, description): + if name == '...': + name = 'Varargs' + self.params.append((name, description)) + def append_to_last_param(self, extra): + self.params[-1] = (self.params[-1][0], self.params[-1][1] + extra) + def append_to_named_param(self, name, extra): + for i in range(len(self.params)): + if self.params[i][0] == name: + self.params[i] = (name, self.params[i][1] + extra) + return + # fall through to adding extra parameter ... + self.add_param(name, extra) + def append_description(self, extra): + self.description = self.description + extra + def append_return(self, extra): + self.ret = self.ret + extra + + def get_param_description(self, name): + for param, description in self.params: + if param == name: + return description + else: + return '' + +comment_start_pat = re.compile(r'^\s*/\*\*\s') +comment_end_pat = re.compile(r'^\s*\*+/') +comment_line_lead = re.compile(r'^\s*\*\s*') +funcname_pat = re.compile(r'^(\w+)\s*:?') +return_pat = re.compile(r'^(returns:|return\s+value:|returns\s*)(.*\n?)$', + re.IGNORECASE) +param_pat = re.compile(r'^@(\S+)\s*:(.*\n?)$') + +def parse_file(fp, doc_dict): + line = fp.readline() + in_comment_block = 0 + while line: + if not in_comment_block: + if comment_start_pat.match(line): + in_comment_block = 1 + cur_doc = FunctionDoc() + in_description = 0 + in_return = 0 + line = fp.readline() + continue + + # we are inside a comment block ... + if comment_end_pat.match(line): + if not cur_doc.name: + sys.stderr.write("no function name found in doc comment\n") + else: + doc_dict[cur_doc.name] = cur_doc + in_comment_block = 0 + line = fp.readline() + continue + + # inside a comment block, and not the end of the block ... + line = comment_line_lead.sub('', line) + if not line: line = '\n' + + if not cur_doc.name: + match = funcname_pat.match(line) + if match: + cur_doc.set_name(match.group(1)) + elif in_return: + match = return_pat.match(line) + if match: + # assume the last return statement was really part of the + # description + return_start = match.group(1) + cur_doc.ret = match.group(2) + cur_doc.description = cur_doc.description + return_start + \ + cur_doc.ret + else: + cur_doc.append_return(line) + elif in_description: + if line[:12] == 'Description:': + line = line[12:] + match = return_pat.match(line) + if match: + in_return = 1 + return_start = match.group(1) + cur_doc.append_return(match.group(2)) + else: + cur_doc.append_description(line) + elif line == '\n': + # end of parameters + in_description = 1 + else: + match = param_pat.match(line) + if match: + param = match.group(1) + desc = match.group(2) + if param == 'returns': + cur_doc.ret = desc + else: + cur_doc.add_param(param, desc) + else: + # must be continuation + try: + if param == 'returns': + cur_doc.append_return(line) + else: + cur_doc.append_to_last_param(line) + except: + sys.stderr.write('something weird while reading param\n') + line = fp.readline() + +def parse_dir(dir, doc_dict): + for file in os.listdir(dir): + if file in ('.', '..'): continue + path = os.path.join(dir, file) + if os.path.isdir(path): + parse_dir(path, doc_dict) + if len(file) > 2 and file[-2:] == '.c': + parse_file(open(path, 'r'), doc_dict) + +def extract(dirs, doc_dict=None): + if not doc_dict: doc_dict = {} + for dir in dirs: + parse_dir(dir, doc_dict) + return doc_dict + +tmpl_section_pat = re.compile(r'^<!-- ##### (\w+) (\w+) ##### -->$') +def parse_tmpl(fp, doc_dict): + cur_doc = None + + line = fp.readline() + while line: + match = tmpl_section_pat.match(line) + if match: + cur_doc = None # new input shouldn't affect the old doc dict + sect_type = match.group(1) + sect_name = match.group(2) + + if sect_type == 'FUNCTION': + cur_doc = doc_dict.get(sect_name) + if not cur_doc: + cur_doc = FunctionDoc() + cur_doc.set_name(sect_name) + doc_dict[sect_name] = cur_doc + elif line == '<!-- # Unused Parameters # -->\n': + cur_doc = None # don't worry about unused params. + elif cur_doc: + if line[:10] == '@Returns: ': + if string.strip(line[10:]): + cur_doc.append_return(line[10:]) + elif line[0] == '@': + pos = string.find(line, ':') + if pos >= 0: + cur_doc.append_to_named_param(line[1:pos], line[pos+1:]) + else: + cur_doc.append_description(line) + else: + cur_doc.append_description(line) + + line = fp.readline() + +def extract_tmpl(dirs, doc_dict=None): + if not doc_dict: doc_dict = {} + for dir in dirs: + for file in os.listdir(dir): + if file in ('.', '..'): continue + path = os.path.join(dir, file) + if os.path.isdir(path): + continue + if len(file) > 2 and file[-2:] == '.sgml': + parse_tmpl(open(path, 'r'), doc_dict) + return doc_dict |