""" Scribus Scripter C++ docstrings vs. HTML documentation checker It works for functions and procedures only yet. Disclaimer: this code is very ugly. Don't read it if you want to keep your brain healthy. Petr Vanek """ import re import os import pydoc import difflib import traceback from pysqlite2 import dbapi2 as sqlite from scribus import * class HTMLDocs: """ Parse HTML documentation into dictionary { 'procedureName' : 'its docstring' } """ def __init__(self): self.path = 'en/' self.docs = {} self.ignore = ['scripterapi-ImageExport.html', 'scripterapi-PDFfile.html', 'scripterapi-Printer.html', 'scripterapi-pydoc.html', 'scripterapi-constants.html'] for i in os.listdir(self.path): if i.startswith('scripterapi-') and i not in self.ignore: self.parseFile(i) def strip(self, txt): tmp = re.sub('<.*?>', '', txt)#.replace('>', '>').replace('<', '<').replace('\n', ' ') ws = tmp.split() return ' '.join(ws) def parseFile(self, fname): print fname f = open(os.path.join(self.path, fname), 'r') key = '' doc = [] stateAppend = False for line in f: if line.find('
') != -1: doc.append(self.strip(line)) stateAppend = True continue if line.find('') != -1: doc.append(self.strip(line)) self.docs[key] = ' '.join(doc) doc = [] stateAppend = False continue if stateAppend: doc.append(self.strip(line)) f.close() class CPPDocs: """ Parse C++ code into dictionary { 'procedureName' : 'its docstring' } """ def __init__(self): self.path = os.path.join('..', 'plugins', 'scriptplugin') self.allProcedures = [] self.undocumented = [] self.mapping = [] self.currentDocs = {} self.getAllProcedures() for i in os.listdir(self.path): if i.endswith('.h'): self.mapping += self.getFileProcedures(i) # checking for i in self.allProcedures: if i[2] == 'PROCEDURE': find = False for j in self.mapping: if j == i[1]: find = True break if not find: self.undocumented.append(i) for i in self.allProcedures: try: # TODO: still only procs. if i[2] != 'PROCEDURE': continue # remove nultiple whitespaces - use only ' ' tmp = eval('pydoc.getdoc(' + i[0] + ')').replace('>', '>').replace('<', '<') ws = tmp.split() tmp = ' '.join(ws) self.currentDocs[i[0]] = tmp except NameError, e: self.currentDocs[i[0]] = e except: print 'Unhandled exception for: ', i[0] traceback.print_exc(file=sys.stdout) def getAllProcedures(self): f = open(os.path.join(self.path, 'scriptplugin.cpp'), 'r') for line in f: # procedures if line.find('{const_cast(') != -1: tmp = line.strip().split(',') # clean the components tmp[0] = tmp[0].replace('{const_cast("', '').replace('")', '') tmp[3] = tmp[3].replace('tr(', '').replace(')}', '').strip() self.allProcedures.append([tmp[0], tmp[3], 'PROCEDURE']) # objects if line.find('PyModule_AddObject') != -1: tmp = line.strip().replace('PyModule_AddObject(m, (char*)"', '').replace('"', '').replace('(PyObject *) &', '').split(',') self.allProcedures.append([tmp[0], tmp[1], 'OBJECT']) # constants if line.find('PyDict_SetItemString') != -1: tmp = line.strip().replace('PyDict_SetItemString(d, const_cast("', '').split('"') self.allProcedures.append([tmp[0], '', 'CONSTANT']) f.close() def getFileProcedures(self, fname): d = [] f = open(os.path.join(self.path, fname), 'r') for line in f: if line.find('PyDoc_STRVAR') != -1: d.append(line.replace('PyDoc_STRVAR(', '').split(',')[0]) f.close() return d def order(): """ dummy sql clausule """ return ' order by 1' def getHTMLDocs(name, doc): """ create a html template for scribus documentation. It looks like it's written in PERL ;) """ return """
<dt><a name="-%s"><strong>%s</strong>(...)</a></dt>
<dd><code></code>
<p>%s</p></dd>
""" % (name, name, doc.replace('&', '&')) print 'START' h = HTMLDocs() c = CPPDocs() out = [] out.append('

Remember: You have to "make install" after code change to promote the changes here.

') out.append('

Undocumented in C++ code

Code with no docstrings in *.h files

    ') for i in c.undocumented: out.append('
  • ' + i[0] + ' - ' + i[1] + '
  • ') out.append('
') conn = sqlite.connect(':memory:') #conn = sqlite.connect('scdoc.db') cur = conn.cursor() try: cur.execute('drop table cpp') cur.execute('drop table html') except: pass cur.execute('create table cpp (name varchar(30), doc text)') cur.execute('create table html (name varchar(30), doc text)') for i in c.currentDocs: cur.execute('insert into cpp values (?, ?)', (unicode(i), unicode(c.currentDocs[i]))) for i in h.docs: cur.execute('insert into html values (?, ?)', (unicode(i), unicode(h.docs[i]))) conn.commit() # undocumented in HTML vs. Code out.append('

HTML vs. Code

Is in HTML but not in the code

    ') for i in cur.execute('select h.name from html as h left outer join cpp as c on (c.name = h.name) where c.name is null' + order()): out.append('
  • ' + i[0] + '
  • ') out.append('
') # undocumented in Code vs HTML out.append('

Code vs HTML

Is in the code but not in HTML

') for i in cur.execute('select c.name, c.doc from cpp as c left outer join html as h on (c.name = h.name) where h.name is null' + order()): out.append('") out.append('
Namedocstring
' + i[0] + '' + getHTMLDocs(i[0], i[1]) + "
') # documented in the both places. Checking for diffs in docstrings out.append('

Doc diffs

') # I don't care about whitespaces diff = difflib.Differ(charjunk=difflib.IS_CHARACTER_JUNK) for i in cur.execute('select c.doc, h.doc, c.name, h.name from cpp as c, html as h where c.name = h.name and c.doc != h.doc' + order()): out.append('') out.append('\n') out.append('\n') out.append('
codehtmldiff
' + i[2] + '' + i[3] + '
' + '\n'.join(list(diff.compare([i[0]], [i[1]]))) + '
' + i[0] + '' + i[1] + '
') out.append('

Correct docs

') for i in cur.execute('select c.name, h.name from cpp as c, html as h where c.name = h.name and c.doc = h.doc' + order()): out.append('\n') out.append('
codehtml
' + i[0] + '' + i[1] + '
') print 'FINISHING...' conn.close() f = open('scribus-docs-check.html', 'w') f.write(''.join(out)) f.close() print 'END'