diff options
| author | Greg Hudson <ghudson@mit.edu> | 2012-09-24 16:39:39 -0400 |
|---|---|---|
| committer | Greg Hudson <ghudson@mit.edu> | 2012-10-04 10:35:14 -0400 |
| commit | 70a119d4dc7ed7a94cfc32c523352af1d000e1c7 (patch) | |
| tree | 96659ade53637e6cefb9b2621c329c1b78265516 /src | |
| parent | 123ff4cb9bdd2e13aa6b636c98a7fc3f9ee06f85 (diff) | |
| download | krb5-70a119d4dc7ed7a94cfc32c523352af1d000e1c7.tar.gz krb5-70a119d4dc7ed7a94cfc32c523352af1d000e1c7.tar.xz krb5-70a119d4dc7ed7a94cfc32c523352af1d000e1c7.zip | |
Add Python scripts to check for C style issues
util/cstyle-file.py checks a file for C style issues and displays
line-by-line output. It is not terribly sophisticated, and can
probably be improved upon (e.g. by doing an emacs batch-reindent of
the file and checking for differences in indentation).
util/cstyle.py produces diffs using git, runs the file checker on each
modified C source file in each diff, and displays the output lines
attribute to the diff.
Diffstat (limited to 'src')
| -rw-r--r-- | src/util/cstyle-file.py | 262 | ||||
| -rw-r--r-- | src/util/cstyle.py | 188 |
2 files changed, 450 insertions, 0 deletions
diff --git a/src/util/cstyle-file.py b/src/util/cstyle-file.py new file mode 100644 index 000000000..edf8a3917 --- /dev/null +++ b/src/util/cstyle-file.py @@ -0,0 +1,262 @@ +# Copyright (C) 2012 by the Massachusetts Institute of Technology. +# All rights reserved. +# +# Export of this software from the United States of America may +# require a specific license from the United States Government. +# It is the responsibility of any person or organization contemplating +# export to obtain such a license before exporting. +# +# WITHIN THAT CONSTRAINT, permission to use, copy, modify, and +# distribute this software and its documentation for any purpose and +# without fee is hereby granted, provided that the above copyright +# notice appear in all copies and that both that copyright notice and +# this permission notice appear in supporting documentation, and that +# the name of M.I.T. not be used in advertising or publicity pertaining +# to distribution of the software without specific, written prior +# permission. Furthermore if you modify this software you must label +# your software as modified software and not distribute it in such a +# fashion that it might be confused with the original M.I.T. software. +# M.I.T. makes no representations about the suitability of +# this software for any purpose. It is provided "as is" without express +# or implied warranty. + +# This program checks for some kinds of MIT krb5 coding style +# violations in a single file. Checked violations include: +# +# Line is too long +# Tabs violations +# Trailing whitespace and final blank lines +# Comment formatting errors +# Preprocessor statements in function bodies +# Misplaced braces +# Space before paren in function call, or no space after if/for/while +# Parenthesized return expression +# Space after cast operator, or no space before * in cast operator +# Line broken before binary operator +# Lack of spaces around binary operator (sometimes) +# Assignment at the beginning of an if conditional +# Use of prohibited string functions +# Lack of braces around 2+ line flow control body +# +# This program does not check for the following: +# +# Anything outside of a function body except line length/whitespace +# Anything non-syntactic (proper cleanup flow control, naming, etc.) +# Indentation or alignment of continuation lines +# UTF-8 violations +# Implicit tests against NULL or '\0' +# Inner-scope variable declarations +# Over- or under-parenthesization +# Long or deeply nested function bodies +# Syntax of function calls through pointers + +import re +import sys + +def warn(ln, msg): + print '%5d %s' % (ln, msg) + + +def check_length(line, ln): + if len(line) > 79 and not line.startswith(' * Copyright'): + warn(ln, 'Length exceeds 79 characters') + + +def check_tabs(line, ln, allow_tabs, seen_tab): + if not allow_tabs: + if '\t' in line: + warn(ln, 'Tab character in file which does not allow tabs') + else: + if ' \t' in line: + warn(ln, 'Tab character immediately following space') + if ' ' in line and seen_tab: + warn(ln, '8+ spaces in file which uses tabs') + + +def check_trailing_whitespace(line, ln): + if line and line[-1] in ' \t': + warn(ln, 'Trailing whitespace') + + +def check_comment(lines, ln): + align = lines[0].index('/*') + 1 + if not lines[0].lstrip().startswith('/*'): + warn(ln, 'Multi-line comment begins after code') + for line in lines[1:]: + ln += 1 + if len(line) <= align or line[align] != '*': + warn(ln, 'Comment line does not have * aligned with top') + elif line[:align].lstrip() != '': + warn(ln, 'Garbage before * in comment line') + if not lines[-1].rstrip().endswith('*/'): + warn(ln, 'Code after end of multi-line comment') + if len(lines) > 2 and (lines[0].strip() not in ('/*', '/**') or + lines[-1].strip() != '*/'): + warn(ln, 'Comment is 3+ lines but is not formatted as block comment') + + +def check_preprocessor(line, ln): + if line.startswith('#'): + warn(ln, 'Preprocessor statement in function body') + + +def check_braces(line, ln): + # Strip out one-line initializer expressions. + line = re.sub(r'=\s*{.*}', '', line) + if line.lstrip().startswith('{') and not line.startswith('{'): + warn(ln, 'Un-cuddled open brace') + if re.search(r'{\s*\S', line): + warn(ln, 'Code on line after open brace') + if re.search(r'\S.*}', line): + warn(ln, 'Code on line before close brace') + + +# This test gives false positives on function pointer type +# declarations or casts. Avoid this by using typedefs. +def check_space_before_paren(line, ln): + for m in re.finditer(r'([\w]+)(\s*)\(', line): + ident, ws = m.groups() + if ident in ('if', 'for', 'while', 'switch'): + if not ws: + warn(ln, 'No space after flow control keyword') + elif ident != 'return': + if ws: + warn(ln, 'Space before paren in function call') + + +def check_parenthesized_return(line, ln): + if re.search(r'return\s*\(.*\);', line): + warn(ln, 'Parenthesized return expression') + + +def check_cast(line, ln): + # We can't reliably distinguish cast operators from parenthesized + # expressions or function call parameters without a real C parser, + # so we use some heuristics. A cast operator is followed by an + # expression, which usually begins with an identifier or an open + # paren. A function call or parenthesized expression is never + # followed by an identifier and only rarely by an open paren. We + # won't detect a cast operator when it's followed by an expression + # beginning with '*', since it's hard to distinguish that from a + # multiplication operator. We will get false positives from + # "(*fp) (args)" and "if (condition) statement", but both of those + # are erroneous anyway. + for m in re.finditer(r'\(([^(]+)\)(\s+)[a-zA-Z_]', line): + if m.group(2): + warn(ln, 'Space after cast operator (or inline if/while body)') + # Check for casts like (char*) which should have a space. + if re.search(r'[^\s\*]\*+$', m.group(1)): + warn(ln, 'No space before * in cast operator') + + +def check_binary_operator(line, ln): + binop = r'(\+|-|\*|/|%|\^|==|=|!=|<=|<|>=|>|&&|&|\|\||\|)' + if re.match(r'\s*' + binop + r'\s', line): + warn(ln - 1, 'Line broken before binary operator') + if re.search(r'\w' + binop + r'\w', line): + warn(ln, 'No space before or after binary operator') + + +def check_assignment_in_conditional(line, ln): + # Check specifically for if statements; we allow assignments in + # loop expressions. + if re.search(r'if\s*\(+\w+\s*=[^=]', line): + warn(ln, 'Assignment in if conditional') + + +def check_unbraced_do(line, ln): + if re.match(r'\s*do$', line): + warn(ln, 'do statement without braces') + + +def check_bad_string_fn(line, ln): + # This is intentionally pretty fuzzy so that we catch the whole scanf + if re.search(r'\W(strcpy|strcat|sprintf|\w*scanf)\W', line): + warn(ln, 'Prohibited string function') + + +def check_file(lines): + # Check if this file allows tabs. + if len(lines) == 0: + return + allow_tabs = 'indent-tabs-mode: nil' not in lines[0] + seen_tab = False + + in_function = False + unbraced_flow_body_count = -1 + comment = [] + ln = 0 + for line in lines: + ln += 1 + line = line.rstrip('\r\n') + indent = len(re.match('\s*', line).group(0).expandtabs()) + seen_tab = seen_tab or ('\t' in line) + + # Check line structure issues before altering the line. + check_length(line, ln) + check_tabs(line, ln, allow_tabs, seen_tab) + check_trailing_whitespace(line, ln) + + # Strip out single-line comments the contents of string literals. + if not comment: + line = re.sub(r'/\*.*?\*/', '', line) + line = re.sub(r'"(\\.|[^"])*"', '""', line) + + # Parse out and check multi-line comments. (Ignore code on + # the first or last line; check_comment will warn about it.) + if comment or '/*' in line: + comment.append(line) + if '*/' in line: + check_comment(comment, ln - len(comment) + 1) + comment = [] + continue + + # Warn if we see a // comment and ignore anything following. + if '//' in line: + warn(ln, '// comment') + line = re.sub(r'//.*/', '', line) + + if line.startswith('{'): + in_function = True + elif line.startswith('}'): + in_function = False + + if in_function: + check_preprocessor(line, ln) + check_braces(line, ln) + check_space_before_paren(line, ln) + check_parenthesized_return(line, ln) + check_cast(line, ln) + check_binary_operator(line, ln) + check_assignment_in_conditional(line, ln) + check_unbraced_do(line, ln) + check_bad_string_fn(line, ln) + + # Check for unbraced flow statement bodies. + if unbraced_flow_body_count != -1: + if indent > flow_statement_indent: + unbraced_flow_body_count += 1 + else: + if unbraced_flow_body_count > 1: + warn(ln - 1, 'Body is 2+ lines but has no braces') + unbraced_flow_body_count = -1 + if (re.match('(\s*)(if|else if|for|while)\s*\(.*\)$', line) or + re.match('(\s*)else$', line)): + unbraced_flow_body_count = 0 + flow_statement_indent = indent + + if lines[-1] == '': + warn(ln, 'Blank line at end of file') + + +if len(sys.argv) == 1: + lines = sys.stdin.readlines() +elif len(sys.argv) == 2: + f = open(sys.argv[1]) + lines = f.readlines() + f.close() +else: + sys.stderr.write('Usage: cstyle-file [filename]\n') + sys.exit(1) + +check_file(lines) diff --git a/src/util/cstyle.py b/src/util/cstyle.py new file mode 100644 index 000000000..7c45335b0 --- /dev/null +++ b/src/util/cstyle.py @@ -0,0 +1,188 @@ +# Copyright (C) 2012 by the Massachusetts Institute of Technology. +# All rights reserved. +# +# Export of this software from the United States of America may +# require a specific license from the United States Government. +# It is the responsibility of any person or organization contemplating +# export to obtain such a license before exporting. +# +# WITHIN THAT CONSTRAINT, permission to use, copy, modify, and +# distribute this software and its documentation for any purpose and +# without fee is hereby granted, provided that the above copyright +# notice appear in all copies and that both that copyright notice and +# this permission notice appear in supporting documentation, and that +# the name of M.I.T. not be used in advertising or publicity pertaining +# to distribution of the software without specific, written prior +# permission. Furthermore if you modify this software you must label +# your software as modified software and not distribute it in such a +# fashion that it might be confused with the original M.I.T. software. +# M.I.T. makes no representations about the suitability of +# this software for any purpose. It is provided "as is" without express +# or implied warranty. + +# This program attempts to detect MIT krb5 coding style violations +# attributable to the changes a series of git commits. It can be run +# from anywhere within a git working tree. + +import getopt +import os +import re +import sys +from subprocess import Popen, PIPE, call + +def usage(): + u = ['Usage: cstyle [-w] [rev|rev1..rev2]', + '', + 'By default, checks working tree against HEAD, or checks changes in', + 'HEAD if the working tree is clean. With a revision option, checks', + 'changes in rev or the series rev1..rev2. With the -w option,', + 'checks working tree against rev (defaults to HEAD).'] + sys.stderr.write('\n'.join(u) + '\n') + sys.exit(1) + + +# Run a command and return a list of its output lines. +def run(args): + # subprocess.check_output would be ideal here, but requires Python 2.7. + p = Popen(args, stdout=PIPE, stderr=PIPE) + out, err = p.communicate() + if p.returncode != 0: + sys.stderr.write('Failed command: ' + ' '.join(args) + '\n') + if err != '': + sys.stderr.write('stderr:\n' + err) + sys.stderr.write('Unexpected command failure, exiting\n') + sys.exit(1) + return out.splitlines() + + +# Find the top level of the git working tree, or None if we're not in +# one. +def find_toplevel(): + # git doesn't seem to have a way to do this, so we search by hand. + dir = os.getcwd() + while True: + if os.path.exists(os.path.join(dir, '.git')): + break + parent = os.path.dirname(dir) + if (parent == dir): + return None + dir = parent + return dir + + +# Check for style issues in a file within rev (or within the current +# checkout if rev is None). Report only problems on line numbers in +# new_lines. +line_re = re.compile(r'^\s*(\d+) (.*)$') +def check_file(filename, rev, new_lines): + # Process only C source files under src. + root, ext = os.path.splitext(filename) + if not filename.startswith('src/') or ext not in ('.c', '.h', '.hin'): + return + dispname = filename[4:] + + if rev is None: + p1 = Popen(['cat', filename], stdout=PIPE) + else: + p1 = Popen(['git', 'show', rev + ':' + filename], stdout=PIPE) + p2 = Popen(['python', 'src/util/cstyle-file.py'], stdin=p1.stdout, + stdout=PIPE) + p1.stdout.close() + out, err = p2.communicate() + if p2.returncode != 0: + sys.exit(1) + + first = True + for line in out.splitlines(): + m = line_re.match(line) + if int(m.group(1)) in new_lines: + if first: + print ' ' + dispname + ':' + first = False + print ' ' + line + + +# Determine the lines of each file modified by diff (a sequence of +# strings) and check for style violations in those lines. rev +# indicates the version in which the new contents of each file can be +# found, or is None if the current contents are in the working copy. +chunk_header_re = re.compile(r'^@@ -\d+(,(\d+))? \+(\d+)(,(\d+))? @@') +def check_diff(diff, rev): + old_count, new_count, lineno = 0, 0, 0 + filename = None + for line in diff: + if not line or line.startswith('\\ No newline'): + continue + if old_count > 0 or new_count > 0: + # We're in a chunk. + if line[0] == '+': + new_lines.append(lineno) + if line[0] in ('+', ' '): + new_count = new_count - 1 + lineno = lineno + 1 + if line[0] in ('-', ' '): + old_count = old_count - 1 + elif line.startswith('+++ b/'): + # We're starting a new file. Check the last one. + if filename: + check_file(filename, rev, new_lines) + filename = line[6:] + new_lines = [] + else: + m = chunk_header_re.match(line) + if m: + old_count = int(m.group(2) or '1') + lineno = int(m.group(3)) + new_count = int(m.group(5) or '1') + + # Check the last file in the diff. + if filename: + check_file(filename, rev, new_lines) + + +# Check a sequence of revisions for style issues. +def check_series(revlist): + for rev in revlist: + sys.stdout.flush() + call(['git', 'show', '-s', '--oneline', rev]) + diff = run(['git', 'diff-tree', '--no-commit-id', '--root', '-M', + '--cc', rev]) + check_diff(diff, rev) + + +# Parse arguments. +try: + opts, args = getopt.getopt(sys.argv[1:], 'w') +except getopt.GetoptError, err: + print str(err) + usage() +if len(args) > 1: + usage() + +# Change to the top level of the working tree so we easily run the file +# checker and refer to working tree files. +toplevel = find_toplevel() +if toplevel is None: + sys.stderr.write('%s must be run within a git working tree') +os.chdir(toplevel) + +if ('-w', '') in opts: + # Check the working tree against a base revision. + arg = 'HEAD' + if args: + arg = args[0] + check_diff(run(['git', 'diff', arg]), None) +elif args: + # Check the differences in a rev or a series of revs. + if '..' in args[0]: + check_series(run(['git', 'rev-list', '--reverse', args[0]])) + else: + check_series([args[0]]) +else: + # No options or arguments. Check the differences against HEAD, or + # the differences in HEAD if the working tree is clean. + diff = run(['git', 'diff', 'HEAD']) + if diff: + check_diff(diff, None) + else: + check_series(['HEAD']) |
