summaryrefslogtreecommitdiffstats
path: root/src/util/cstyle-file.py
blob: 05078925eba1f7fcf7f5ff988fdf854846f4941d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
# Copyright (C) 2012 by the Massachusetts Institute of Technology.
# All rights reserved.
#
# Export of this software from the United States of America may
#   require a specific license from the United States Government.
#   It is the responsibility of any person or organization contemplating
#   export to obtain such a license before exporting.
#
# WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
# distribute this software and its documentation for any purpose and
# without fee is hereby granted, provided that the above copyright
# notice appear in all copies and that both that copyright notice and
# this permission notice appear in supporting documentation, and that
# the name of M.I.T. not be used in advertising or publicity pertaining
# to distribution of the software without specific, written prior
# permission.  Furthermore if you modify this software you must label
# your software as modified software and not distribute it in such a
# fashion that it might be confused with the original M.I.T. software.
# M.I.T. makes no representations about the suitability of
# this software for any purpose.  It is provided "as is" without express
# or implied warranty.

# This program checks for some kinds of MIT krb5 coding style
# violations in a single file.  Checked violations include:
#
#   Line is too long
#   Tabs violations
#   Trailing whitespace and final blank lines
#   Comment formatting errors
#   Preprocessor statements in function bodies
#   Misplaced braces
#   Space before paren in function call, or no space after if/for/while
#   Parenthesized return expression
#   Space after cast operator, or no space before * in cast operator
#   Line broken before binary operator
#   Lack of spaces around binary operator (sometimes)
#   Assignment at the beginning of an if conditional
#   Use of prohibited string functions
#   Lack of braces around 2+ line flow control body
#
# This program does not check for the following:
#
#   Anything outside of a function body except line length/whitespace
#   Anything non-syntactic (proper cleanup flow control, naming, etc.)
#   Indentation or alignment of continuation lines
#   UTF-8 violations
#   Implicit tests against NULL or '\0'
#   Inner-scope variable declarations
#   Over- or under-parenthesization
#   Long or deeply nested function bodies
#   Syntax of function calls through pointers

import re
import sys

def warn(ln, msg):
    print '%5d  %s' % (ln, msg)


def check_length(line, ln):
    if len(line) > 79 and not line.startswith(' * Copyright'):
        warn(ln, 'Length exceeds 79 characters')


def check_tabs(line, ln, allow_tabs, seen_tab):
    if not allow_tabs:
        if '\t' in line:
            warn(ln, 'Tab character in file which does not allow tabs')
    else:
        if ' \t' in line:
            warn(ln, 'Tab character immediately following space')
        if '        ' in line and seen_tab:
            warn(ln, '8+ spaces in file which uses tabs')


def check_trailing_whitespace(line, ln):
    if line and line[-1] in ' \t':
        warn(ln, 'Trailing whitespace')


def check_comment(lines, ln):
    align = lines[0].index('/*') + 1
    if not lines[0].lstrip().startswith('/*'):
        warn(ln, 'Multi-line comment begins after code')
    for line in lines[1:]:
        ln += 1
        if len(line) <= align or line[align] != '*':
            warn(ln, 'Comment line does not have * aligned with top')
        elif line[:align].lstrip() != '':
            warn(ln, 'Garbage before * in comment line')
    if not lines[-1].rstrip().endswith('*/'):
        warn(ln, 'Code after end of multi-line comment')
    if len(lines) > 2 and (lines[0].strip() not in ('/*', '/**') or
                           lines[-1].strip() != '*/'):
        warn(ln, 'Comment is 3+ lines but is not formatted as block comment')


def check_preprocessor(line, ln):
    if line.startswith('#'):
        warn(ln, 'Preprocessor statement in function body')


def check_braces(line, ln):
    # Strip out one-line initializer expressions.
    line = re.sub(r'=\s*{.*}', '', line)
    if line.lstrip().startswith('{') and not line.startswith('{'):
        warn(ln, 'Un-cuddled open brace')
    if re.search(r'{\s*\S', line):
        warn(ln, 'Code on line after open brace')
    if re.search(r'\S.*}', line):
        warn(ln, 'Code on line before close brace')


# This test gives false positives on some function pointer type
# declarations or casts.  Avoid this by using typedefs.
def check_space_before_paren(line, ln):
    for m in re.finditer(r'([\w]+)(\s*)\(', line):
        ident, ws = m.groups()
        if ident in ('void', 'char', 'int', 'long', 'unsigned'):
            pass
        elif ident in ('if', 'for', 'while', 'switch'):
            if not ws:
                warn(ln, 'No space after flow control keyword')
        elif ident != 'return':
            if ws:
                warn(ln, 'Space before parenthesis in function call')

    if re.search(r' \)', line):
        warn(ln, 'Space before close parenthesis')


def check_parenthesized_return(line, ln):
    if re.search(r'return\s*\(.*\);', line):
        warn(ln, 'Parenthesized return expression')


def check_cast(line, ln):
    # We can't reliably distinguish cast operators from parenthesized
    # expressions or function call parameters without a real C parser,
    # so we use some heuristics.  A cast operator is followed by an
    # expression, which usually begins with an identifier or an open
    # paren.  A function call or parenthesized expression is never
    # followed by an identifier and only rarely by an open paren.  We
    # won't detect a cast operator when it's followed by an expression
    # beginning with '*', since it's hard to distinguish that from a
    # multiplication operator.  We will get false positives from
    # "(*fp) (args)" and "if (condition) statement", but both of those
    # are erroneous anyway.
    for m in re.finditer(r'\(([^(]+)\)(\s*)[a-zA-Z_(]', line):
        if m.group(2):
            warn(ln, 'Space after cast operator (or inline if/while body)')
        # Check for casts like (char*) which should have a space.
        if re.search(r'[^\s\*]\*+$', m.group(1)):
            warn(ln, 'No space before * in cast operator')


def check_binary_operator(line, ln):
    binop = r'(\+|-|\*|/|%|\^|==|=|!=|<=|<|>=|>|&&|&|\|\||\|)'
    if re.match(r'\s*' + binop + r'\s', line):
        warn(ln - 1, 'Line broken before binary operator')
    for m in re.finditer(r'(\s|\w)' + binop + r'(\s|\w)', line):
        before, op, after = m.groups()
        if not before.isspace() and not after.isspace():
            warn(ln, 'No space before or after binary operator')
        elif not before.isspace():
            warn(ln, 'No space before binary operator')
        elif op not in ('-', '*', '&') and not after.isspace():
            warn(ln, 'No space after binary operator')


def check_assignment_in_conditional(line, ln):
    # Check specifically for if statements; we allow assignments in
    # loop expressions.
    if re.search(r'if\s*\(+\w+\s*=[^=]', line):
        warn(ln, 'Assignment in if conditional')


def indent(line):
    return len(re.match('\s*', line).group(0).expandtabs())


def check_unbraced_flow_body(line, ln, lines):
    if re.match(r'\s*do$', line):
        warn(ln, 'do statement without braces')
        return

    m = re.match(r'\s*(})?\s*else(\s*if\s*\(.*\))?\s*({)?\s*$', line)
    if m and (m.group(1) is None) != (m.group(3) is None):
        warn(ln, 'One arm of if/else statement braced but not the other')

    if (re.match('\s*(if|else if|for|while)\s*\(.*\)$', line) or
        re.match('\s*else$', line)):
        base = indent(line)
        # Look at the next two lines (ln is 1-based so lines[ln] is next).
        if indent(lines[ln]) > base and indent(lines[ln + 1]) > base:
            warn(ln, 'Body is 2+ lines but has no braces')


def check_bad_string_fn(line, ln):
    # This is intentionally pretty fuzzy so that we catch the whole scanf
    if re.search(r'\W(strcpy|strcat|sprintf|\w*scanf)\W', line):
        warn(ln, 'Prohibited string function')


def check_file(lines):
    # Check if this file allows tabs.
    if len(lines) == 0:
        return
    allow_tabs = 'indent-tabs-mode: nil' not in lines[0]
    seen_tab = False

    in_function = False
    comment = []
    ln = 0
    for line in lines:
        ln += 1
        line = line.rstrip('\r\n')
        seen_tab = seen_tab or ('\t' in line)

        # Check line structure issues before altering the line.
        check_length(line, ln)
        check_tabs(line, ln, allow_tabs, seen_tab)
        check_trailing_whitespace(line, ln)

        # Strip out single-line comments the contents of string literals.
        if not comment:
            line = re.sub(r'/\*.*?\*/', '', line)
            line = re.sub(r'"(\\.|[^"])*"', '""', line)

        # Parse out and check multi-line comments.  (Ignore code on
        # the first or last line; check_comment will warn about it.)
        if comment or '/*' in line:
            comment.append(line)
            if '*/' in line:
                check_comment(comment, ln - len(comment) + 1)
                comment = []
            continue

        # Warn if we see a // comment and ignore anything following.
        if '//' in line:
            warn(ln, '// comment')
            line = re.sub(r'//.*/', '', line)

        if line.startswith('{'):
            in_function = True
        elif line.startswith('}'):
            in_function = False

        if in_function:
            check_preprocessor(line, ln)
            check_braces(line, ln)
            check_space_before_paren(line, ln)
            check_parenthesized_return(line, ln)
            check_cast(line, ln)
            check_binary_operator(line, ln)
            check_assignment_in_conditional(line, ln)
            check_unbraced_flow_body(line, ln, lines)
            check_bad_string_fn(line, ln)

    if lines[-1] == '':
        warn(ln, 'Blank line at end of file')


if len(sys.argv) == 1:
    lines = sys.stdin.readlines()
elif len(sys.argv) == 2:
    f = open(sys.argv[1])
    lines = f.readlines()
    f.close()
else:
    sys.stderr.write('Usage: cstyle-file [filename]\n')
    sys.exit(1)

check_file(lines)