summaryrefslogtreecommitdiffstats
path: root/codegen/docextract.py
diff options
context:
space:
mode:
Diffstat (limited to 'codegen/docextract.py')
-rw-r--r--codegen/docextract.py185
1 files changed, 185 insertions, 0 deletions
diff --git a/codegen/docextract.py b/codegen/docextract.py
new file mode 100644
index 0000000..e6c6505
--- /dev/null
+++ b/codegen/docextract.py
@@ -0,0 +1,185 @@
+# -*- Mode: Python; py-indent-offset: 4 -*-
+'''Simple module for extracting GNOME style doc comments from C
+sources, so I can use them for other purposes.'''
+
+import sys, os, string, re
+
+__all__ = ['extract']
+
+class FunctionDoc:
+ def __init__(self):
+ self.name = None
+ self.params = []
+ self.description = ''
+ self.ret = ''
+ def set_name(self, name):
+ self.name = name
+ def add_param(self, name, description):
+ if name == '...':
+ name = 'Varargs'
+ self.params.append((name, description))
+ def append_to_last_param(self, extra):
+ self.params[-1] = (self.params[-1][0], self.params[-1][1] + extra)
+ def append_to_named_param(self, name, extra):
+ for i in range(len(self.params)):
+ if self.params[i][0] == name:
+ self.params[i] = (name, self.params[i][1] + extra)
+ return
+ # fall through to adding extra parameter ...
+ self.add_param(name, extra)
+ def append_description(self, extra):
+ self.description = self.description + extra
+ def append_return(self, extra):
+ self.ret = self.ret + extra
+
+ def get_param_description(self, name):
+ for param, description in self.params:
+ if param == name:
+ return description
+ else:
+ return ''
+
+comment_start_pat = re.compile(r'^\s*/\*\*\s')
+comment_end_pat = re.compile(r'^\s*\*+/')
+comment_line_lead = re.compile(r'^\s*\*\s*')
+funcname_pat = re.compile(r'^(\w+)\s*:?')
+return_pat = re.compile(r'^(returns:|return\s+value:|returns\s*)(.*\n?)$',
+ re.IGNORECASE)
+param_pat = re.compile(r'^@(\S+)\s*:(.*\n?)$')
+
+def parse_file(fp, doc_dict):
+ line = fp.readline()
+ in_comment_block = 0
+ while line:
+ if not in_comment_block:
+ if comment_start_pat.match(line):
+ in_comment_block = 1
+ cur_doc = FunctionDoc()
+ in_description = 0
+ in_return = 0
+ line = fp.readline()
+ continue
+
+ # we are inside a comment block ...
+ if comment_end_pat.match(line):
+ if not cur_doc.name:
+ sys.stderr.write("no function name found in doc comment\n")
+ else:
+ doc_dict[cur_doc.name] = cur_doc
+ in_comment_block = 0
+ line = fp.readline()
+ continue
+
+ # inside a comment block, and not the end of the block ...
+ line = comment_line_lead.sub('', line)
+ if not line: line = '\n'
+
+ if not cur_doc.name:
+ match = funcname_pat.match(line)
+ if match:
+ cur_doc.set_name(match.group(1))
+ elif in_return:
+ match = return_pat.match(line)
+ if match:
+ # assume the last return statement was really part of the
+ # description
+ return_start = match.group(1)
+ cur_doc.ret = match.group(2)
+ cur_doc.description = cur_doc.description + return_start + \
+ cur_doc.ret
+ else:
+ cur_doc.append_return(line)
+ elif in_description:
+ if line[:12] == 'Description:':
+ line = line[12:]
+ match = return_pat.match(line)
+ if match:
+ in_return = 1
+ return_start = match.group(1)
+ cur_doc.append_return(match.group(2))
+ else:
+ cur_doc.append_description(line)
+ elif line == '\n':
+ # end of parameters
+ in_description = 1
+ else:
+ match = param_pat.match(line)
+ if match:
+ param = match.group(1)
+ desc = match.group(2)
+ if param == 'returns':
+ cur_doc.ret = desc
+ else:
+ cur_doc.add_param(param, desc)
+ else:
+ # must be continuation
+ try:
+ if param == 'returns':
+ cur_doc.append_return(line)
+ else:
+ cur_doc.append_to_last_param(line)
+ except:
+ sys.stderr.write('something weird while reading param\n')
+ line = fp.readline()
+
+def parse_dir(dir, doc_dict):
+ for file in os.listdir(dir):
+ if file in ('.', '..'): continue
+ path = os.path.join(dir, file)
+ if os.path.isdir(path):
+ parse_dir(path, doc_dict)
+ if len(file) > 2 and file[-2:] == '.c':
+ parse_file(open(path, 'r'), doc_dict)
+
+def extract(dirs, doc_dict=None):
+ if not doc_dict: doc_dict = {}
+ for dir in dirs:
+ parse_dir(dir, doc_dict)
+ return doc_dict
+
+tmpl_section_pat = re.compile(r'^<!-- ##### (\w+) (\w+) ##### -->$')
+def parse_tmpl(fp, doc_dict):
+ cur_doc = None
+
+ line = fp.readline()
+ while line:
+ match = tmpl_section_pat.match(line)
+ if match:
+ cur_doc = None # new input shouldn't affect the old doc dict
+ sect_type = match.group(1)
+ sect_name = match.group(2)
+
+ if sect_type == 'FUNCTION':
+ cur_doc = doc_dict.get(sect_name)
+ if not cur_doc:
+ cur_doc = FunctionDoc()
+ cur_doc.set_name(sect_name)
+ doc_dict[sect_name] = cur_doc
+ elif line == '<!-- # Unused Parameters # -->\n':
+ cur_doc = None # don't worry about unused params.
+ elif cur_doc:
+ if line[:10] == '@Returns: ':
+ if string.strip(line[10:]):
+ cur_doc.append_return(line[10:])
+ elif line[0] == '@':
+ pos = string.find(line, ':')
+ if pos >= 0:
+ cur_doc.append_to_named_param(line[1:pos], line[pos+1:])
+ else:
+ cur_doc.append_description(line)
+ else:
+ cur_doc.append_description(line)
+
+ line = fp.readline()
+
+def extract_tmpl(dirs, doc_dict=None):
+ if not doc_dict: doc_dict = {}
+ for dir in dirs:
+ for file in os.listdir(dir):
+ if file in ('.', '..'): continue
+ path = os.path.join(dir, file)
+ if os.path.isdir(path):
+ continue
+ if len(file) > 2 and file[-2:] == '.sgml':
+ parse_tmpl(open(path, 'r'), doc_dict)
+ return doc_dict