diff --git a/000-README.rst b/000-README.rst new file mode 100644 index 0000000..c4f74f9 --- /dev/null +++ b/000-README.rst @@ -0,0 +1,186 @@ +********************** +Easier static analysis +********************** + +Summary +======= + +This patch kit provides an easy way to make integrate 3rd-party static +analysis tools into gcc, and have them report through gcc's diagnostic +subsystem. + +Here's an example showing gcc running a bank of 3rd-party checkers on this +source file:: + + #include + + void test () + { + void *ptr_1; + void *ptr_2; + + ptr_1 = malloc (64); + if (!ptr_1) + return; + ptr_2 = malloc (64); + if (!ptr_2) + return; + + free (ptr_2); + free (ptr_1); + } + +via a simple command-line: + + $ ./xgcc -B. -c conditional-leak.c -frun-analyzers=policy.json + conditional-leak.c:13:5: error: Potential leak of memory pointed to by 'ptr_1' [clang-analyzer:Memory leak] + return; + ^ + conditional-leak.c:8:11: note: state 1 of 4: Memory is allocated + ptr_1 = malloc (64); + ^ + conditional-leak.c:9:7: note: state 2 of 4: Assuming 'ptr_1' is non-null + if (!ptr_1) + ^ + conditional-leak.c:12:7: note: state 3 of 4: Assuming 'ptr_2' is null + if (!ptr_2) + ^ + conditional-leak.c:13:5: note: state 4 of 4: Potential leak of memory pointed to by 'ptr_1' + return; + ^ + conditional-leak.c:13:0: error: Memory leak: ptr_1 [cppcheck:memleak] + return; + +Of the checkers, clang's static analyzer and cppcheck both identify the +memory leak; the former also identifies the control flow. + +Extensive metadata is captured about what checkers were run, and what +they emitted, using the "Firehose" interchange format: + + http://firehose.readthedocs.io/en/latest/index.html + +It should be easy to watermark the binaries with this information. + + +Statement of the problem +======================== + +Static analysis is done too late, if at all: static analysis tools are run +as an optional extra, "on the side", rather than in developers' normal +workflow. Analysis results are reviewed (if at all) in some kind of +on-the-side tool (e.g. Red Hat's internal errata tool, Fedora QA tooling, etc), +rather than when the code is being edited, or patches being prepared. + +It would be better to have an easy way for developers to run analyzer(s) +as they're doing development, as part of their edit-compile-test cycle +- analysis problems are reported immediately, and can be acted on +immediately. + +It would also be good to have a way to run analyzer(s) when packages are +built, with a variety of precanned policies for analyzers. For example, +network-facing daemons could be run with a higher strictness of checking. + +It would also be good to tag binaries with information on what analyzers +were run, what options they were invoked with, etc. +Potentially have "dump_file" information from optimization passes stored +in the metadata also. Have a tool to query all of this. + +Can/should we break the build if there are issues? + +Yes: but have a way to opt-in easily: if the tool is well-integrated with the + compiler: e.g. + -frun-analyzers=/usr/share/analyzers/userspace/network-facing-service +then upstream developers and packagers can turn on the setting, and see what +breaks, and fix it naturally within an compile-edit-test cycle + +This gives a relatively painless way to opt-in to increasing levels of +strictness (e.g. by an upstream project, or by an individual developer). + +Does this slow the build down? +Yes: but you can choose which analyzers run, and can choose to turn them off. +It ought to parallelize well. I believe users will prefer to turn them on, +and have Koji builders burn up the extra CPU cycles. +This may make much more sense for binary distributions (e.g. Fedora, Debian) +that it does for things like Gentoo. + +User stories: + +* upstream developer +* distribution packager (Fedora, Debian, Gentoo) +* RHEL packager +* Fedora security team (setting policy etc) + * setting policy + * after a vulnerability, looking to assess the scope +* RHEL security team + * setting policy + * after a vulnerability, looking to assess the scope + +Which analyzers? + * clang-analyzer + * cppcheck + * findbugs + * cpychecker??? + * anything else? + +Look at Firehose: + https://github.com/fedora-static-analysis/firehose + +and look at mock-with-analysis: + https://github.com/fedora-static-analysis/mock-with-analysis + +Do we run the risk of breaking "configure" tests? + +UI ideas: + +A new option in GCC 8: + -frun-analyzers=PATH_TO_POLICY_FILE + +e.g.: + + -frun-analyzers=/usr/share/analyzers/userspace/network-facing-service + -frun-analyzers=/usr/share/analyzers/userspace/application + -frun-analyzers=/usr/share/analyzers/userspace/default + -frun-analyzers=/usr/share/analyzers/kernel + +or whatnot. + +Idea is to provide mechanism, and for the distribution to decide on some standard policies. + +(mechanism vs policy) + + +$ rpm -qf /usr/share/analyzers/userspace/network-facing-service +analysis-policy-userspace-network-facing-service-0.1-1.noarch.fc27 + +hence would have: + + BuildRequires: analysis-policy-userspace-network-facing-service + +which would "Require" the analyzers themselves. + +See + https://fedoraproject.org/wiki/Toolchain/Watermark + + +Ability to sandbox a gcc plugin by running the plugin inside another cc1. + + +Known unknowns +============== + +How does one suppress a specific false-positive site? +Do we need a pragma for it? (though pragmas ought to already affect some of +the underlying checkers...) + +Do we really want .json for the policy format? +If we're expecting users to edit this, we need great error messages, +and probably support for comments. Would YAML or somesuch be better? + +Should the "checkers" subdirectory be its own project? + + +Notes to self +============= + +Working copy: + /home/david/coding-3/gcc-git-static-analysis/src diff --git a/checkers/Makefile b/checkers/Makefile new file mode 100644 index 0000000..c08cac4 --- /dev/null +++ b/checkers/Makefile @@ -0,0 +1,10 @@ +all: check-all + +check-all: + python checker.py + python clang_analyzer.py unittest + python cppcheck.py unittest + python flawfinder.py unittest + python splint.py unittest + python always_fails.py unittest + python ianal.py unittest diff --git a/checkers/always_fails.py b/checkers/always_fails.py new file mode 100755 index 0000000..3aae401 --- /dev/null +++ b/checkers/always_fails.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# Copyright 2012, 2013, 2015, 2017 David Malcolm +# Copyright 2012, 2013, 2015, 2017 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import sys +import tempfile +import unittest + +from gccinvocation import GccInvocation + +from checker import Checker, Context, CheckerTests, make_file, make_stats, \ + tool_main + +from firehose.model import Analysis, Generator, Metadata, Failure, \ + Location, File, Message, Issue, Trace + +class AlwaysFails(Checker): + """ + Checker subclass that always fails + """ + def __init__(self, ctxt): + Checker.__init__(self, 'always-fails', ctxt) + + def raw_invoke(self, gccinv, sourcefile): + args = ['/this/executable/does/not/exist', sourcefile] + return self._run_subprocess(sourcefile, args) + + def handle_output(self, result): + assert result.returncode + analysis = self._make_failed_analysis(result.sourcefile, result.timer, + msgtext='Bad exit code running %s' % self.name, + failureid='bad-exit-code') + self.set_custom_fields(result, analysis) + return analysis + + def set_custom_fields(self, result, analysis): + analysis.set_custom_field('flawfinder-invocation', + ' '.join(result.argv)) + result.set_custom_fields(analysis) + +class AlwaysFailsTests(CheckerTests): + def make_tool(self): + ctxt = self.make_ctxt(capture_exceptions=True) + return AlwaysFails(ctxt) + + def verify_basic_metadata(self, analysis, sourcefile): + # Verify basic metadata: + self.assert_metadata(analysis, 'always-fails', sourcefile) + + def test_harmless_file(self): + analysis = self.invoke('test-sources/harmless.c') + self.assertEqual(len(analysis.results), 1) + r0 = analysis.results[0] + self.assertIsInstance(r0, Failure) + self.assertEqual(r0.failureid, 'exception') + self.assertEqual(r0.location.file.givenpath, + 'test-sources/harmless.c') + self.assertNotEqual(r0.message.text, None) + +if __name__ == '__main__': + sys.exit(tool_main(sys.argv, AlwaysFails)) diff --git a/checkers/checker.py b/checkers/checker.py new file mode 100755 index 0000000..27489ca --- /dev/null +++ b/checkers/checker.py @@ -0,0 +1,556 @@ +#!/usr/bin/env python +# Copyright 2012, 2013, 2015, 2017 David Malcolm +# Copyright 2012, 2013, 2015, 2017 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +""" +A "checker" is an executable which takes GCC-style command-line +arguments and writes a Firehose JSON file to stdout. +""" + +import json +import logging +import os +import re +import StringIO +import sys +import tempfile +import time +import traceback +import unittest + +# http://pypi.python.org/pypi/subprocess32 +# so that we can use timeouts +from subprocess32 import Popen, PIPE, STDOUT, TimeoutExpired + +from firehose.model import Analysis, Generator, Metadata, Failure, \ + Location, File, Message, Issue, Trace + +from gccinvocation import GccInvocation + +def in_chroot(): + return os.path.exists('/builddir') + +def make_file(givenpath): + from firehose.model import File + return File(givenpath=givenpath, + abspath=None, + hash_=None) + +def make_stats(timer): + from firehose.model import Stats + return Stats(wallclocktime=timer.get_elapsed_time()) + +class Timer: + """ + Simple measurement of wallclock time taken + """ + def __init__(self): + self.starttime = time.time() + + def get_elapsed_time(self): + """Get elapsed time in seconds as a float""" + curtime = time.time() + return curtime - self.starttime + + def elapsed_time_as_str(self): + """Get elapsed time as a string (with units)""" + elapsed = self.get_elapsed_time() + result = '%0.3f seconds' % elapsed + if elapsed > 120: + result += ' (%i minutes)' % int(elapsed / 60) + return result + +class Context: + def __init__(self, enable_logging=False, capture_exceptions=True): + self.enable_logging = enable_logging + self.capture_exceptions = capture_exceptions + if self.enable_logging: + if in_chroot(): + logging.basicConfig(format='%(asctime)s %(message)s', + datefmt='%H:%M:%S', + level=logging.INFO, + filename='/builddir/fakegcc.log') + else: + logging.basicConfig(format='%(asctime)s %(message)s', + #datefmt='%H:%M:%S', + level=logging.INFO, + stream=sys.stdout) + self.log('logging initialized') + + self.stdout = sys.stdout + self.stderr = sys.stderr + self.returncode = None + + def log(self, msg): + if self.enable_logging: + logging.info(msg) + + def write_streams(self, toolname, out, err): + for line in out.splitlines(): + self.log('stdout from %r: %s\n' % (toolname, line)) + for line in err.splitlines(): + self.log('stderr from %r: %s\n' % (toolname, line)) + +class SubprocessResult: + """ + A bundle of information relating to a subprocess invocation. + """ + def __init__(self, sourcefile, argv, returncode, out, err, timer): + self.sourcefile = sourcefile + self.argv = argv + self.returncode = returncode + self.out = out + self.err = err + self.timer = timer + + def set_custom_fields(self, analysis): + analysis.set_custom_field('returncode', self.returncode) + analysis.set_custom_field('stdout', self.out.decode('utf-8')) + analysis.set_custom_field('stderr', self.err.decode('utf-8')) + +class Checker: + def __init__(self, name, ctxt): + self.name = name + self.timeout = 60 + self.ctxt = ctxt + + def log(self, msg): + self.ctxt.log(msg) + + def raw_invoke(self, gccinv, sourcefile): + """ + Run the tool, with a timeout, returning an Analysis instance. + May well raise an exception if something major went wrong. + """ + raise NotImplementedError + + def checked_invoke(self, gccinv, sourcefile): + """ + Call "invoke", handling exceptions. Return an Analysis instance. + """ + try: + self.log('about to invoke: %s with %r' % (self.name, gccinv)) + analysis = self.raw_invoke(gccinv, sourcefile) + except TimeoutExpired: + analysis = self._make_failed_analysis( + sourcefile, t, msgtext='Timeout running %s' % self.name, + failureid='timeout') + analysis.set_custom_field('timeout', TIMEOUT) + except Exception, exc: + # Capture the exception as a Failure instance. + # Alternatively when debugging such failures, it can + # be easier to re-raise the exception: + if not self.ctxt.capture_exceptions: + raise + analysis = \ + self._make_failed_analysis( + sourcefile, None, + msgtext=('Exception running %s: %s' + % (self.name, exc)), + failureid='exception') + tb_str = traceback.format_exc() + analysis.set_custom_field('traceback', tb_str) + if sourcefile: + analysis.metadata.file_.givenpath = sourcefile + analysis.metadata.file_.abspath = os.path.join(os.getcwd(), + sourcefile) + return analysis + + def handle_output(self, result): + """ + Given a SubprocessResult, return an Analysis instance. + """ + raise NotImplementedError + + def _make_failed_analysis(self, sourcefile, t, msgtext, failureid): + """ + Something went wrong; build a failure report. + """ + generator = Generator(name=self.name, + version=None) + if t: + stats = make_stats(t) + else: + stats = None + + metadata = Metadata(generator=generator, + sut=None, + file_ = make_file(sourcefile), + stats=stats) + file_ = File(givenpath=sourcefile, + abspath=None, + hash_=None) + location = Location(file=file_, + function=None, + point=None, + range_=None) + message = Message(msgtext) + results = [Failure(failureid=failureid, + location=location, + message=message, + customfields=None)] + analysis = Analysis(metadata, results) + return analysis + + def _run_subprocess(self, sourcefile, argv, env=None): + """ + Support for running the bulk of the side effect in a subprocess, + with timeout support. + """ + self.log('%s: _run_subprocess(%r, %r)' % (self.name, sourcefile, argv)) + if 0: + self.log('env: %s' % env) + p = Popen(argv, + stdout=PIPE, stderr=PIPE, env=env) + try: + t = Timer() + out, err = p.communicate(timeout=self.timeout) + self.ctxt.write_streams(argv[0], out, err) + result = SubprocessResult(sourcefile, argv, p.returncode, out, err, t) + analysis = self.handle_output(result) + return analysis + except TimeoutExpired: + analysis = self._make_failed_analysis(sourcefile, t, + msgtext='Timeout running %s' % self.name, + failureid='timeout') + analysis.set_custom_field('timeout', self.timeout) + analysis.set_custom_field('command-line', ' '.join(argv)) + return analysis + +############################################################################ +# Checker subclasses +############################################################################ + +class InvokeRealGcc(Checker): + """ + Checker subclass that invokes a real gcc compiler binary + """ + def __init__(self, executable, ctxt, extra_args=None, extra_env=None): + Checker.__init__(self, 'gcc', ctxt) + self.executable = executable + self.extra_args = extra_args + self.extra_env = extra_env + # We are only ever invoked with individual input files. + # Override any -o, to ensure we don't interfere with the output + # from the real compiler: + self.output_file = tempfile.NamedTemporaryFile() + + def raw_invoke(self, gccinv, sourcefile): + args = [self.executable] + gccinv.argv[1:] + if self.extra_args: + args += self.extra_args + args += ['-o', self.output_file.name] + + # The result parser requires the C locale + env = os.environ.copy() + env['LANG'] = 'C' + if self.extra_env: + for key, value in self.extra_env.iteritems(): + env[key] = value + return self._run_subprocess(sourcefile, args, env=env) + + def handle_output(self, result): + from firehose.parsers.gcc import parse_file + + f = StringIO.StringIO(result.err) + analysis = parse_file(f, stats=make_stats(result.timer)) + if result.sourcefile: + analysis.metadata.file_ = File(givenpath=result.sourcefile, + abspath=None) + self.set_custom_fields(result, analysis) + + self.result = result + + return analysis + + def set_custom_fields(self, result, analysis): + analysis.set_custom_field('gcc-invocation', + ' '.join(result.argv)) + result.set_custom_fields(analysis) + + +class InvokeCustomGcc(InvokeRealGcc): + """ + Checker subclass that invokes a custom build of gcc 6 I have. + This is a patched version of gcc that writes out a file + to DUMPBASE.custom-dump.txt, containing a series of + lines of the form + KEY: VALUE + """ + def __init__(self, executable, ctxt, extra_args=None, extra_env=None): + InvokeRealGcc.__init__(self, executable, ctxt, extra_args, extra_env) + self.name = 'custom-gcc' + + def handle_output(self, result): + analysis = InvokeRealGcc.handle_output(self, result) + analysis.metadata.generator.name = 'custom-gcc' + dumpbase = os.path.join(os.path.dirname(self.output_file.name), + os.path.basename(result.sourcefile)) + dumpfile_path = dumpbase + '.custom-dump.txt' + if os.path.exists(dumpfile_path): + self.log('found custom dumpfile: %s' % dumpfile_path) + with open(dumpfile_path) as f: + for line in f: + self.log(line) + # Expect lines of the form "KEY: VALUE" + m = re.match('^(.+): (.+)$', line) + self.log(str(m.groups())) + key, value = m.groups() + analysis.set_custom_field(key, value) + else: + self.log('could not find custom dumpfile: %s' % dumpfile_path) + return analysis + +class InvokeCpychecker(Checker): + """ + Checker subclass that invoke the gcc-python-plugin's "cpychecker" code. + + This currently requires the "firehose" branch of gcc-python-plugin + """ + + def __init__(self, ctxt): + Checker.__init__(self, 'cpychecker', ctxt) + + def raw_invoke(self, gccinv, sourcefile): + # Invoke the plugin, but for robustness, do it in an entirely + # separate gcc invocation + argv = gccinv.argv[:] + + self.outputxmlpath = '%s.firehose.xml' % sourcefile + + # The plugin needs to be able to find its own modules, or we get: + # ImportError: No module named libcpychecker + # We can either set PYTHONPATH in the environment, + # or provide a full path to the plugin in the invocation line: + # in the latter case, gcc-python.c:setup_sys sets up sys.path + # inside the plugin to include the directory containing the plugin + # if we provide a full path to the plugin here. + + # That said, the plugin's Makefile installs the plugin + # as "python.so" to $(GCCPLUGINS_DIR) + # and the support modules to $(GCCPLUGINS_DIR)/$(PLUGIN_DIR) + # So let's do it via PYTHONPATH + + # FIXME: hacked in path: + plugin_path = '/home/david/coding/gcc-python/gcc-python/cpychecker-firehose-output/' + plugin_gcc_c_api_path = '/home/david/coding/gcc-python/gcc-python/cpychecker-firehose-output/gcc-c-api' + plugin_sys_path = '/home/david/coding/gcc-python/gcc-python/cpychecker-firehose-output/' + plugin_full_name = os.path.join(plugin_path, 'python.so') + env = os.environ.copy() + env['PYTHONPATH'] = plugin_sys_path + env['LD_LIBRARY_PATH'] = plugin_gcc_c_api_path + + # We would use the regular keyword argument syntax: + # outputxmlpath='foo' + # but unfortunately gcc's option parser seems to not be able to + # cope with '=' within an option's value. So we do it using + # dictionary syntax instead: + pycmd = ('from libcpychecker import main, Options; ' + 'main(Options(**{"outputxmlpath":"%s", ' + '"verify_refcounting": True, ' + '"maxtrans": 1024, ' + '}))' % self.outputxmlpath) + # Note that some RPMs also rename the plugin from + # "python.so" to "python2.so", which would require further work. + argv += ['-fplugin=%s' % plugin_full_name, + '-fplugin-arg-python-command=%s' % pycmd] + + args = [get_real_executable(argv)] + argv[1:] + + return self._run_subprocess(sourcefile, args, env) + + def handle_output(self, result): + if os.path.exists(self.outputxmlpath): + with open(self.outputxmlpath) as f: + analysis = Analysis.from_xml(f) + analysis.metadata.file_ = make_file(result.sourcefile) + analysis.metadata.stats = make_stats(result.timer) + else: + analysis = \ + self._make_failed_analysis( + result.sourcefile, result.timer, + msgtext=('Unable to locate XML output from %s' + % self.name), + failureid='no-output-found') + analysis.set_custom_field('cpychecker-invocation', + ' '.join(result.argv)) + result.set_custom_fields(analysis) + return analysis + +############################################################################ +# Test suite +############################################################################ + +class CheckerTests(unittest.TestCase): + def make_ctxt(self, capture_exceptions=False): + return Context(enable_logging=0, capture_exceptions=capture_exceptions) + + def make_tool(self): + """Hook for self.make_compiler()""" + raise NotImplementedError + + def verify_basic_metadata(self, analysis, sourcefile): + """Hook for self.invoke()""" + raise NotImplementedError + + def invoke(self, sourcefile, extraargs = None): + """Invoke a tool and sanity-check the result""" + tool = self.make_tool() + argv = ['gcc', '-c', sourcefile] + if extraargs: + argv += extraargs + gccinv = GccInvocation(argv) + analysis = tool.checked_invoke(gccinv, sourcefile) + + if 0: + print(analysis) + + # Call a subclass hook to check basic metadata: + self.verify_basic_metadata(analysis, sourcefile) + + # Verify that we can serialize to XML: + xml_bytes = analysis.to_xml_bytes() + self.assert_(xml_bytes.startswith(b'')) + + # Verify it can roundtrip through JSON: + js_bytes = json.dumps(analysis.to_json(), indent=1) + other = Analysis.from_json(json.loads(js_bytes)) + #self.assertEqual(analysis, other) + + return analysis + + def assert_metadata(self, analysis, + expected_generator_name, expected_given_path): + self.assertEqual(analysis.metadata.generator.name, + expected_generator_name) + self.assertEqual(analysis.metadata.file_.givenpath, expected_given_path) + self.assertIn(expected_given_path, analysis.metadata.file_.abspath) + + def assert_has_custom_field(self, analysis, name): + self.assert_(analysis.customfields) + self.assert_(name in analysis.customfields) + +class BuggyCheckerTests(CheckerTests): + def make_tool(self): + """ + Override base class impl, so that we can enable + exception-capture (and provide a custom tool) + """ + class BuggyChecker(Checker): + def raw_invoke(self, gccinv, sourcefile): + raise ValueError('test of raising an exception') + + ctxt = self.make_ctxt(capture_exceptions=True) + tool = BuggyChecker('buggy', ctxt) + return tool + + def verify_basic_metadata(self, analysis, sourcefile): + self.assert_metadata(analysis, 'buggy', sourcefile) + + def test_exception_handling(self): + analysis = self.invoke('test-sources/harmless.c') + #print(analysis) + self.assertEqual(len(analysis.results), 1) + r0 = analysis.results[0] + self.assertIsInstance(r0, Failure) + self.assertEqual(r0.failureid, 'exception') + self.assertEqual(r0.message.text, + ('Exception running buggy:' + ' test of raising an exception')) + self.assert_(analysis.customfields['traceback'].startswith( + 'Traceback (most recent call last):\n')) + +class RealGccTests(CheckerTests): + def make_tool(self): + ctxt = self.make_ctxt() + return InvokeRealGcc('gcc', ctxt) + + def verify_basic_metadata(self, analysis, sourcefile): + # Verify basic metadata: + self.assert_metadata(analysis, 'gcc', sourcefile) + self.assert_has_custom_field(analysis, 'gcc-invocation') + self.assert_has_custom_field(analysis, 'stdout') + self.assert_has_custom_field(analysis, 'stderr') + + def test_file_not_found(self): + analysis = self.invoke('does-not-exist.c') + #print(analysis) + # Currently this gives no output: + self.assertEqual(len(analysis.results), 0) + + def test_timeout(self): + sourcefile = 'test-sources/harmless.c' + tool = self.make_tool() + tool.timeout = 0 + gccinv = GccInvocation(['gcc', sourcefile]) + analysis = tool.checked_invoke(gccinv, sourcefile) + self.assert_metadata(analysis, tool.name, sourcefile) + self.assertEqual(len(analysis.results), 1) + r0 = analysis.results[0] + self.assertIsInstance(r0, Failure) + self.assertEqual(r0.failureid, 'timeout') + self.assert_has_custom_field(analysis, 'timeout') + self.assert_has_custom_field(analysis, 'command-line') + + def test_harmless_file(self): + analysis = self.invoke('test-sources/harmless.c') + #print(analysis) + self.assertEqual(len(analysis.results), 0) + + def test_divide_by_zero(self): + analysis = self.invoke('test-sources/divide-by-zero.c', ['-Wall']) + self.assertEqual(len(analysis.results), 1) + r0 = analysis.results[0] + self.assertIsInstance(r0, Issue) + self.assertEqual(r0.testid, 'div-by-zero') + self.assertEqual(r0.location.file.givenpath, + 'test-sources/divide-by-zero.c') + self.assertEqual(r0.location.function.name, 'divide_by_zero') + self.assertEqual(r0.location.point.line, 3) + self.assertEqual(r0.message.text, 'division by zero') + self.assertEqual(r0.severity, None) + +############################################################################ +# Entrypoint +############################################################################ + +def tool_main(argv, tool_class): + """ + Entrypoint for use by the various per-tool scripts + """ + # If we're invoked with "unittest" as the first param, + # run the unit test suite: + if len(argv) >= 2: + if argv[1] == 'unittest': + sys.argv = [argv[0]] + argv[2:] + return unittest.main() + + ctxt = Context() + tool = tool_class(ctxt) + + gccinv = GccInvocation(argv) + ctxt.log(' gccinv.sources: %r' % gccinv.sources) + assert len(gccinv.sources) == 1 + sourcefile = gccinv.sources[0] + ctxt.log(' sourcefile: %r' % sourcefile) + analysis = tool.checked_invoke(gccinv, sourcefile) + json.dump(analysis.to_json(), sys.stdout, indent=1) + +# Run the unit tests + +if __name__ == '__main__': + sys.exit(unittest.main()) diff --git a/checkers/clang_analyzer.py b/checkers/clang_analyzer.py new file mode 100755 index 0000000..327cfe4 --- /dev/null +++ b/checkers/clang_analyzer.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +# Copyright 2012, 2013, 2015, 2017 David Malcolm +# Copyright 2012, 2013, 2015, 2017 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import glob +import os +import sys +import tempfile +import traceback +import unittest + +from gccinvocation import GccInvocation + +from checker import Checker, Context, CheckerTests, make_file, make_stats, \ + tool_main + +from firehose.model import Analysis, Generator, Metadata, Failure, \ + Location, File, Message, Issue, Trace +from firehose.parsers.clanganalyzer import parse_plist + +class InvokeClangAnalyzer(Checker): + """ + Checker subclass that invokes the clang analyzer + """ + def __init__(self, ctxt): + Checker.__init__(self, 'clang-analyzer', ctxt) + + def raw_invoke(self, gccinv, sourcefile): + self.resultdir = tempfile.mkdtemp() + args = ['scan-build', '-v', '-plist', + '--use-analyzer', '/usr/bin/clang', # rhbz 923834 + '-o', self.resultdir, + 'gcc'] + gccinv.argv[1:] + return self._run_subprocess(sourcefile, args) + + def handle_output(self, result): + if result.returncode: + analysis = self._make_failed_analysis(result.sourcefile, result.timer, + msgtext='Bad exit code running %s' % self.name, + failureid='bad-exit-code') + self.set_custom_fields(result, analysis) + return analysis + + # Given e.g. resultdir='/tmp/tmpQW2l2B', the plist files + # are an extra level deep e.g.: + # '/tmp/tmpQW2l2B/2013-01-22-1/report-MlwJri.plist' + self.log(self.resultdir) + for plistpath in glob.glob(os.path.join(self.resultdir, + '*/*.plist')): + analysis = parse_plist(plistpath, + file_=make_file(result.sourcefile), + stats=make_stats(result.timer)) + self.set_custom_fields(result, analysis) + analysis.set_custom_field('plistpath', plistpath) + return analysis # could there be more than one? + + # Not found? + analysis = self._make_failed_analysis( + result.sourcefile, result.timer, + msgtext='Unable to locate plist file', + failureid='plist-not-found') + self.set_custom_fields(result, analysis) + return analysis + + def set_custom_fields(self, result, analysis): + analysis.set_custom_field('scan-build-invocation', + ' '.join(result.argv)) + result.set_custom_fields(analysis) + +class ClangAnalyzerTests(CheckerTests): + def make_tool(self): + ctxt = self.make_ctxt() + return InvokeClangAnalyzer(ctxt) + + def verify_basic_metadata(self, analysis, sourcefile): + # Verify basic metadata: + self.assert_metadata(analysis, 'clang-analyzer', sourcefile) + self.assert_has_custom_field(analysis, 'scan-build-invocation') + self.assert_has_custom_field(analysis, 'stdout') + self.assert_has_custom_field(analysis, 'stderr') + + def test_file_not_found(self): + analysis = self.invoke('does-not-exist.c') + #print(analysis) + self.assertEqual(len(analysis.results), 1) + self.assertIsInstance(analysis.results[0], Failure) + self.assertEqual(analysis.results[0].failureid, 'bad-exit-code') + + def test_timeout(self): + sourcefile = 'test-sources/harmless.c' + tool = self.make_tool() + tool.timeout = 0 + gccinv = GccInvocation(['gcc', sourcefile]) + analysis = tool.checked_invoke(gccinv, sourcefile) + self.assert_metadata(analysis, 'clang-analyzer', sourcefile) + self.assertEqual(len(analysis.results), 1) + r0 = analysis.results[0] + self.assertIsInstance(r0, Failure) + self.assertEqual(r0.failureid, 'timeout') + self.assert_has_custom_field(analysis, 'timeout') + self.assert_has_custom_field(analysis, 'command-line') + + def test_harmless_file(self): + analysis = self.invoke('test-sources/harmless.c') + #print(analysis) + self.assertEqual(len(analysis.results), 0) + + def test_read_through_null(self): + analysis = self.invoke('test-sources/read-through-null.c') + #print(analysis) + #print(analysis.to_json()) + self.assertEqual(len(analysis.results), 1) + r0 = analysis.results[0] + self.assertIsInstance(r0, Issue) + self.assertEqual(r0.testid, None) + self.assertEqual(r0.location.file.givenpath, + 'test-sources/read-through-null.c') + self.assertEqual(r0.location.point.line, 3) + self.assertEqual(r0.message.text, + "Dereference of null pointer") + self.assertEqual(r0.severity, None) + self.assertIsInstance(r0.trace, Trace) + + def test_out_of_bounds(self): + analysis = self.invoke('test-sources/out-of-bounds.c') + #print(analysis) + self.assertEqual(len(analysis.results), 1) + + r0 = analysis.results[0] + self.assertIsInstance(r0, Issue) + self.assertEqual(r0.testid, None) + self.assertEqual(r0.location.file.givenpath, + 'test-sources/out-of-bounds.c') + self.assertEqual(r0.location.point.line, 5) + self.assertEqual(r0.message.text, + "Undefined or garbage value returned to caller") + self.assertEqual(r0.severity, None) + self.assertIsInstance(r0.trace, Trace) + +if __name__ == '__main__': + sys.exit(tool_main(sys.argv, InvokeClangAnalyzer)) diff --git a/checkers/cppcheck.py b/checkers/cppcheck.py new file mode 100755 index 0000000..61ab18f --- /dev/null +++ b/checkers/cppcheck.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python +# Copyright 2012, 2013, 2015, 2017 David Malcolm +# Copyright 2012, 2013, 2015, 2017 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import sys +import tempfile +import unittest + +from gccinvocation import GccInvocation + +from checker import Checker, Context, CheckerTests, make_file, make_stats, \ + tool_main + +from firehose.model import Analysis, Generator, Metadata, Failure, \ + Location, File, Message, Issue, Trace +from firehose.parsers.cppcheck import parse_file + +class InvokeCppcheck(Checker): + """ + Checker subclass that invokes "cppcheck" + """ + def __init__(self, ctxt): + Checker.__init__(self, 'cppcheck', ctxt) + + def raw_invoke(self, gccinv, sourcefile): + args = ['cppcheck', + '--xml', '--xml-version=2', + sourcefile] + return self._run_subprocess(sourcefile, args) + + def handle_output(self, result): + if result.returncode: + analysis = self._make_failed_analysis(result.sourcefile, result.timer, + msgtext='Bad exit code running %s' % self.name, + failureid='bad-exit-code') + self.set_custom_fields(result, analysis) + return analysis + + # (there doesn't seem to be a way to have cppcheck directly + # save its XML output to a given location) + + with tempfile.NamedTemporaryFile() as outfile: + outfile.write(result.err) + outfile.flush() + + with open(outfile.name) as infile: + # Parse stderr into firehose XML format and save: + analysis = parse_file(infile, + file_=make_file(result.sourcefile), + stats=make_stats(result.timer)) + self.set_custom_fields(result, analysis) + return analysis + + def set_custom_fields(self, result, analysis): + analysis.set_custom_field('cppcheck-invocation', + ' '.join(result.argv)) + result.set_custom_fields(analysis) + +class CppcheckTests(CheckerTests): + def make_tool(self): + ctxt = self.make_ctxt() + return InvokeCppcheck(ctxt) + + def verify_basic_metadata(self, analysis, sourcefile): + # Verify basic metadata: + self.assert_metadata(analysis, 'cppcheck', sourcefile) + self.assert_has_custom_field(analysis, 'cppcheck-invocation') + self.assert_has_custom_field(analysis, 'stdout') + self.assert_has_custom_field(analysis, 'stderr') + + def test_file_not_found(self): + analysis = self.invoke('does-not-exist.c') + #print(analysis) + self.assertEqual(len(analysis.results), 1) + self.assertIsInstance(analysis.results[0], Failure) + self.assertEqual(analysis.results[0].failureid, 'bad-exit-code') + + def test_timeout(self): + sourcefile = 'test-sources/harmless.c' + tool = self.make_tool() + tool.timeout = 0 + gccinv = GccInvocation(['gcc', sourcefile]) + analysis = tool.checked_invoke(gccinv, sourcefile) + self.assert_metadata(analysis, 'cppcheck', sourcefile) + self.assertEqual(len(analysis.results), 1) + r0 = analysis.results[0] + self.assertIsInstance(r0, Failure) + self.assertEqual(r0.failureid, 'timeout') + self.assert_has_custom_field(analysis, 'timeout') + self.assert_has_custom_field(analysis, 'command-line') + + def test_harmless_file(self): + analysis = self.invoke('test-sources/harmless.c') + #print(analysis) + self.assertEqual(len(analysis.results), 0) + + def test_read_through_null(self): + analysis = self.invoke('test-sources/read-through-null.c') + self.assertEqual(len(analysis.results), 1) + r0 = analysis.results[0] + self.assertIsInstance(r0, Issue) + self.assertEqual(r0.testid, 'nullPointer') + self.assertEqual(r0.location.file.givenpath, + 'test-sources/read-through-null.c') + self.assertEqual(r0.location.point.line, 3) + self.assertEqual(r0.message.text, + "Null pointer dereference") + self.assertEqual(r0.severity, 'error') + + def test_out_of_bounds(self): + analysis = self.invoke('test-sources/out-of-bounds.c') + #print(analysis) + self.assertEqual(len(analysis.results), 2) + + r0 = analysis.results[0] + self.assertIsInstance(r0, Issue) + self.assertEqual(r0.testid, 'arrayIndexOutOfBounds') + self.assertEqual(r0.location.file.givenpath, + 'test-sources/out-of-bounds.c') + self.assertEqual(r0.location.point.line, 5) + self.assertEqual( + r0.message.text, + "Array 'arr[10]' accessed at index 15, which is out of bounds.") + self.assertEqual(r0.severity, 'error') + + r1 = analysis.results[1] + self.assertIsInstance(r1, Issue) + self.assertEqual(r1.testid, 'uninitvar') + # etc + +if __name__ == '__main__': + sys.exit(tool_main(sys.argv, InvokeCppcheck)) diff --git a/checkers/flawfinder.py b/checkers/flawfinder.py new file mode 100755 index 0000000..0722e3e --- /dev/null +++ b/checkers/flawfinder.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +# Copyright 2012, 2013, 2015, 2017 David Malcolm +# Copyright 2012, 2013, 2015, 2017 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import sys +import tempfile +import unittest + +from gccinvocation import GccInvocation + +from checker import Checker, Context, CheckerTests, make_file, make_stats, \ + tool_main + +from firehose.model import Analysis, Generator, Metadata, Failure, \ + Location, File, Message, Issue, Trace +from firehose.parsers.flawfinder import parse_file + +class InvokeFlawfinder(Checker): + """ + Checker subclass that invokes "flawfinder" + """ + def __init__(self, ctxt): + Checker.__init__(self, 'flawfinder', ctxt) + + def raw_invoke(self, gccinv, sourcefile): + args = ['flawfinder', sourcefile] # FIXME + return self._run_subprocess(sourcefile, args) + + def handle_output(self, result): + if result.returncode: + analysis = self._make_failed_analysis(result.sourcefile, result.timer, + msgtext='Bad exit code running %s' % self.name, + failureid='bad-exit-code') + self.set_custom_fields(result, analysis) + return analysis + + if 0: + print('result.err: %r' % result.err) + print('result.out: %r' % result.out) + + # (there doesn't seem to be a way to have flawfinder directly + # save its output to a given location) + + with tempfile.NamedTemporaryFile() as outfile: + outfile.write(result.out) + outfile.flush() + + with open(outfile.name) as infile: + # Parse stderr into firehose XML format and save: + analysis = parse_file(infile) + analysis.metadata.file_ = make_file(result.sourcefile) + analysis.metadata.stats = make_stats(result.timer) + self.set_custom_fields(result, analysis) + + return analysis + + def set_custom_fields(self, result, analysis): + analysis.set_custom_field('flawfinder-invocation', + ' '.join(result.argv)) + result.set_custom_fields(analysis) + +class FlawfinderTests(CheckerTests): + def make_tool(self): + ctxt = self.make_ctxt() + return InvokeFlawfinder(ctxt) + + def verify_basic_metadata(self, analysis, sourcefile): + # Verify basic metadata: + self.assert_metadata(analysis, 'flawfinder', sourcefile) + self.assert_has_custom_field(analysis, 'flawfinder-invocation') + self.assert_has_custom_field(analysis, 'stdout') + self.assert_has_custom_field(analysis, 'stderr') + + def test_file_not_found(self): + analysis = self.invoke('does-not-exist.c') + #print(analysis) + self.assertEqual(len(analysis.results), 0) + + def test_timeout(self): + sourcefile = 'test-sources/harmless.c' + tool = self.make_tool() + tool.timeout = 0 + gccinv = GccInvocation(['gcc', sourcefile]) + analysis = tool.checked_invoke(gccinv, sourcefile) + self.assert_metadata(analysis, 'flawfinder', sourcefile) + self.assertEqual(len(analysis.results), 1) + r0 = analysis.results[0] + self.assertIsInstance(r0, Failure) + self.assertEqual(r0.failureid, 'timeout') + self.assert_has_custom_field(analysis, 'timeout') + self.assert_has_custom_field(analysis, 'command-line') + + def test_harmless_file(self): + analysis = self.invoke('test-sources/harmless.c') + self.assertEqual(len(analysis.results), 0) + + def test_use_of_random(self): + analysis = self.invoke('test-sources/cpychecker-demo.c') + self.assertEqual(len(analysis.results), 1) + r0 = analysis.results[0] + self.assertIsInstance(r0, Issue) + self.assertEqual(r0.testid, 'random') + self.assertEqual(r0.location.file.givenpath, + 'test-sources/cpychecker-demo.c') + self.assertEqual(r0.location.point.line, 97) + self.assertEqual(r0.message.text, + "This function is not sufficiently random for" + " security-related functions such as key and nonce" + " creation. use a more secure technique for" + " acquiring random values.") + self.assertEqual(r0.severity, '3') + +if __name__ == '__main__': + sys.exit(tool_main(sys.argv, InvokeFlawfinder)) diff --git a/checkers/ianal.py b/checkers/ianal.py new file mode 100755 index 0000000..3fd93ea --- /dev/null +++ b/checkers/ianal.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# Copyright 2017 David Malcolm +# Copyright 2017 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import re +import sys +import tempfile +import unittest + +from gccinvocation import GccInvocation + +from checker import Checker, Context, CheckerTests, make_file, \ + tool_main + +from firehose.model import Analysis, Generator, Metadata, Info, \ + Location, File, Message, Range, Point + +class NotALawyer(Checker): + """ + Checker subclass that looks for "Copyright" lines, as a demo + of handling "info" results. + """ + def __init__(self, ctxt): + Checker.__init__(self, 'not-a-lawyer', ctxt) + + def raw_invoke(self, gccinv, sourcefile): + results = [] + file_ = make_file(sourcefile) + with open(sourcefile) as f: + for lineidx, line in enumerate(f): + m = re.match('.*(Copyright).*', line) + if m: + start, end = m.span(1) + linenum = lineidx + 1 + range_ = Range(start=Point(linenum, start + 1), + end=Point(linenum, end)) + location = Location(file_, None, range_=range_) + info = Info(infoid='copyright-line', + location=location, + message=Message('I am not a lawyer'), + customfields=None) + results.append(info) + metadata = Metadata(generator=Generator(self.name), sut=None, + file_=file_, stats=None) + analysis = Analysis(metadata, results) + return analysis + +class NotALawyerTests(CheckerTests): + def make_tool(self): + ctxt = self.make_ctxt() + return NotALawyer(ctxt) + + def verify_basic_metadata(self, analysis, sourcefile): + # Verify basic metadata: + self.assert_metadata(analysis, 'not-a-lawyer', sourcefile) + + def test_basic(self): + analysis = self.invoke('test-sources/cpychecker-demo.c') + self.assertEqual(len(analysis.results), 2) + r0 = analysis.results[0] + self.assertIsInstance(r0, Info) + self.assertEqual(r0.infoid, 'copyright-line') + self.assertEqual(r0.location.file.givenpath, + 'test-sources/cpychecker-demo.c') + self.assertEqual(r0.message.text, 'I am not a lawyer') + self.assertEqual(r0.location.range_.start.line, 2) + self.assertEqual(r0.location.range_.start.column, 4) + self.assertEqual(r0.location.range_.end.line, 2) + self.assertEqual(r0.location.range_.end.column, 12) + +if __name__ == '__main__': + sys.exit(tool_main(sys.argv, NotALawyer)) diff --git a/checkers/splint.py b/checkers/splint.py new file mode 100755 index 0000000..4bb8035 --- /dev/null +++ b/checkers/splint.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# Copyright 2017 David Malcolm +# Copyright 2017 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import sys +import tempfile +import unittest + +from gccinvocation import GccInvocation + +from checker import Checker, Context, CheckerTests, make_file, make_stats, \ + tool_main + +from firehose.model import Analysis, Generator, Metadata, Failure, \ + Location, File, Message, Issue, Trace +from firehose.parsers.splint import parse_splint_csv, parse_splint_stderr + +class InvokeSplint(Checker): + """ + Checker subclass that invokes "splint -strict" + """ + def __init__(self, ctxt): + Checker.__init__(self, 'splint', ctxt) + self.tempfile = None + + def __del__(self): + del self.tempfile + + def raw_invoke(self, gccinv, sourcefile): + self.tempfile = tempfile.NamedTemporaryFile() + args = ['splint', '-csv', self.tempfile.name, '+csvoverwrite', '-strict', sourcefile] + # FIXME: why is overwrite needed? + return self._run_subprocess(sourcefile, args) + + def handle_output(self, result): + analysis = parse_splint_csv(self.tempfile.name) + analysis.metadata.file_ = File(result.sourcefile, None) + analysis.metadata.version = parse_splint_stderr(result.err) + self.set_custom_fields(result, analysis) + return analysis + + def set_custom_fields(self, result, analysis): + analysis.set_custom_field('splint-invocation', + ' '.join(result.argv)) + result.set_custom_fields(analysis) + +class SplintTests(CheckerTests): + def make_tool(self): + ctxt = self.make_ctxt() + return InvokeSplint(ctxt) + + def verify_basic_metadata(self, analysis, sourcefile): + # Verify basic metadata: + self.assert_metadata(analysis, 'splint', sourcefile) + self.assert_has_custom_field(analysis, 'splint-invocation') + self.assert_has_custom_field(analysis, 'stdout') + self.assert_has_custom_field(analysis, 'stderr') + + def test_unconditional_leak(self): + analysis = self.invoke('test-sources/unconditional-file-leak.c') + self.assertEqual(len(analysis.results), 8) + r0 = analysis.results[0] + self.assertIsInstance(r0, Issue) + self.assertEqual(r0.testid, 'internalglobs') + +if __name__ == '__main__': + sys.exit(tool_main(sys.argv, InvokeSplint)) diff --git a/checkers/test-sources/conditional-leak.c b/checkers/test-sources/conditional-leak.c new file mode 100644 index 0000000..2ab46f5 --- /dev/null +++ b/checkers/test-sources/conditional-leak.c @@ -0,0 +1,17 @@ +#include + +void test () +{ + void *ptr_1; + void *ptr_2; + + ptr_1 = malloc (64); + if (!ptr_1) + return; + ptr_2 = malloc (64); + if (!ptr_2) + return; + + free (ptr_2); + free (ptr_1); +} diff --git a/checkers/test-sources/cpychecker-demo.c b/checkers/test-sources/cpychecker-demo.c new file mode 100644 index 0000000..b379729 --- /dev/null +++ b/checkers/test-sources/cpychecker-demo.c @@ -0,0 +1,110 @@ +/* + Copyright 2011 David Malcolm + Copyright 2011 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +/* Examples of mistakes made using the Python API */ +#include + +extern uint16_t htons(uint16_t hostshort); + +PyObject * +socket_htons(PyObject *self, PyObject *args) +{ + unsigned long x1, x2; + + if (!PyArg_ParseTuple(args, "i:htons", &x1)) { + return NULL; + } + x2 = (int)htons((short)x1); + return PyInt_FromLong(x2); +} + +PyObject * +not_enough_varargs(PyObject *self, PyObject *args) +{ + if (!PyArg_ParseTuple(args, "i")) { + return NULL; + } + Py_RETURN_NONE; +} + +PyObject * +too_many_varargs(PyObject *self, PyObject *args) +{ + int i, j; + if (!PyArg_ParseTuple(args, "i", &i, &j)) { + return NULL; + } + Py_RETURN_NONE; +} + +PyObject * +kwargs_example(PyObject *self, PyObject *args, PyObject *kwargs) +{ + double x, y; + char *keywords[] = {"x", "y"}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "(ff):kwargs_example", keywords, &x, &y)) { + return NULL; + } + Py_RETURN_NONE; +} + + +extern int convert_to_ssize(PyObject *, Py_ssize_t *); + +PyObject * +buggy_converter(PyObject *self, PyObject *args) +{ + int i; + + if (!PyArg_ParseTuple(args, "O&", convert_to_ssize, &i)) { + return NULL; + } + + Py_RETURN_NONE; +} + +PyObject * +make_a_list_of_random_ints_badly(PyObject *self, + PyObject *args) +{ + PyObject *list, *item; + long count, i; + + if (!PyArg_ParseTuple(args, "i", &count)) { + return NULL; + } + + list = PyList_New(0); + + for (i = 0; i < count; i++) { + item = PyLong_FromLong(random()); + PyList_Append(list, item); + } + + return list; +} + +/* + PEP-7 +Local variables: +c-basic-offset: 4 +indent-tabs-mode: nil +End: +*/ diff --git a/checkers/test-sources/divide-by-zero.c b/checkers/test-sources/divide-by-zero.c new file mode 100644 index 0000000..f40692d --- /dev/null +++ b/checkers/test-sources/divide-by-zero.c @@ -0,0 +1,4 @@ +int divide_by_zero (int i) +{ + return i / 0; +} diff --git a/checkers/test-sources/harmless.c b/checkers/test-sources/harmless.c new file mode 100644 index 0000000..c29f0cc --- /dev/null +++ b/checkers/test-sources/harmless.c @@ -0,0 +1,9 @@ +#include + +int main (int argc, const char **argv) +{ + int i; + for (i = 0; i < argc; i++) + printf ("argv[%i]: %s\n", i, argv[i]); + return 0; +} diff --git a/checkers/test-sources/multiple-1.c b/checkers/test-sources/multiple-1.c new file mode 100644 index 0000000..8d26c69 --- /dev/null +++ b/checkers/test-sources/multiple-1.c @@ -0,0 +1,6 @@ +extern int helper (int argc, const char **argv); + +int main (int argc, const char **argv) +{ + return helper (argc, argv); +} diff --git a/checkers/test-sources/multiple-2.c b/checkers/test-sources/multiple-2.c new file mode 100644 index 0000000..a7a20ac --- /dev/null +++ b/checkers/test-sources/multiple-2.c @@ -0,0 +1,9 @@ +#include + +int helper (int argc, const char **argv) +{ + int i; + for (i = 0; i < argc; i++) + printf ("argv[%i]: %s\n", i, argv[i]); + return 0; +} diff --git a/checkers/test-sources/out-of-bounds.c b/checkers/test-sources/out-of-bounds.c new file mode 100644 index 0000000..4137389 --- /dev/null +++ b/checkers/test-sources/out-of-bounds.c @@ -0,0 +1,6 @@ +int out_of_bounds (void) +{ + int arr[10]; + + return arr[15]; +} diff --git a/checkers/test-sources/read-through-null.c b/checkers/test-sources/read-through-null.c new file mode 100644 index 0000000..2f0450c --- /dev/null +++ b/checkers/test-sources/read-through-null.c @@ -0,0 +1,4 @@ +int read_through_null (void) +{ + return *(int *)0; +} diff --git a/checkers/test-sources/return-of-stack-address.c b/checkers/test-sources/return-of-stack-address.c new file mode 100644 index 0000000..66c8893 --- /dev/null +++ b/checkers/test-sources/return-of-stack-address.c @@ -0,0 +1,6 @@ +void *test (void) +{ + char tmp[16]; + + return tmp; +} diff --git a/checkers/test-sources/unconditional-file-leak.c b/checkers/test-sources/unconditional-file-leak.c new file mode 100644 index 0000000..3c6655c --- /dev/null +++ b/checkers/test-sources/unconditional-file-leak.c @@ -0,0 +1,10 @@ +#include + +void test (const char *filename) +{ + int i; + FILE *f; + f = fopen (filename, "w"); + for (i = 0; i < 10; i++) + fprintf (f, "%i: %i", i, i * i); +} diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 6e0e55a..4608a88 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1046,10 +1046,11 @@ LIBDEPS= libcommon.a $(CPPLIB) $(LIBIBERTY) $(LIBINTL_DEP) $(LIBICONV_DEP) \ # even if we are cross-building GCC. BUILD_LIBDEPS= $(BUILD_LIBIBERTY) +# FIXME: add some configury for pthread # How to link with both our special library facilities # and the system's installed libraries. LIBS = @LIBS@ libcommon.a $(CPPLIB) $(LIBINTL) $(LIBICONV) $(LIBBACKTRACE) \ - $(LIBIBERTY) $(LIBDECNUMBER) $(HOST_LIBS) + $(LIBIBERTY) $(LIBDECNUMBER) $(HOST_LIBS) -lpthread BACKENDLIBS = $(ISLLIBS) $(GMPLIBS) $(PLUGINLIBS) $(HOST_LIBS) \ $(ZLIB) # Any system libraries needed just for GNAT. @@ -1213,6 +1214,8 @@ OBJS = \ ggc-page.o \ alias.o \ alloc-pool.o \ + annobin.o \ + x86_64.annobin.o \ auto-inc-dec.o \ auto-profile.o \ bb-reorder.o \ @@ -1232,6 +1235,7 @@ OBJS = \ cfgloopanal.o \ cfgloopmanip.o \ cfgrtl.o \ + checkers.o \ symtab.o \ cgraph.o \ cgraphbuild.o \ @@ -1278,6 +1282,7 @@ OBJS = \ expr.o \ fibonacci_heap.o \ final.o \ + firehose.o \ fixed-value.o \ fold-const.o \ fold-const-call.o \ @@ -1366,6 +1371,7 @@ OBJS = \ ira-color.o \ ira-emit.o \ ira-lives.o \ + json.o \ jump.o \ langhooks.o \ lcm.o \ diff --git a/gcc/annobin.cc b/gcc/annobin.cc new file mode 100644 index 0000000..f674ada --- /dev/null +++ b/gcc/annobin.cc @@ -0,0 +1,891 @@ +/* annobin - a gcc plugin for annotating binary files. + Copyright (c) 2017 Red Hat. + Created by Nick Clifton. + + This is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. */ + +#include "annobin.h" + +#include +#include +#include + +/* The version of the annotation specification supported by this plugin. */ +#define SPEC_VERSION 1 + +/* Required by the GCC plugin API. */ +int plugin_is_GPL_compatible; + +/* True if this plugin is enabled. Disabling is permitted so that build + systems can globally enable the plugin, and then have specific build + targets that disable the plugin because they do not want it. */ +static bool enabled = true; + +/* True if the symbols used to map addresses to file names should be global. + On some architectures these symbols have to be global so that they will + be preserved in object files. But doing so can prevent the build-id + mechanism from working, since the symbols contain build-date information. */ +static bool global_file_name_symbols = false; + +/* True if notes about the stack usage should be included. Doing can be useful + if stack overflow problems need to be diagnosed, but they do increase the size + of the note section quite a lot. */ +bool annobin_enable_stack_size_notes = false; +unsigned long annobin_total_static_stack_usage = 0; +unsigned long annobin_max_stack_size = 0; + +/* If a function's static stack size requirement is greater than STACK_THRESHOLD + then a function specific note will be generated indicating the amount of stack + that it needs. */ +#define DEFAULT_THRESHOLD (10240) +static unsigned long stack_threshold = DEFAULT_THRESHOLD; + +/* Internal variable, used by target specific parts of the annobin plugin as well + as this generic part. True if the object file being generated is for a 64-bit + target. */ +bool annobin_is_64bit = false; + +/* True if notes in the .note.gnu.property section should be produced. */ +static bool annobin_enable_dynamic_notes = true; + +/* True if notes in the .gnu.build.attributes section should be produced. */ +static bool annobin_enable_static_notes = true; + +static unsigned int global_GOWall_options = 0; +static int global_stack_prot_option = -1; +static int global_pic_option = -1; +static int global_short_enums = -1; +static char * compiler_version = NULL; +static unsigned verbose_level = 0; +static char * annobin_current_filename = NULL; +static unsigned char annobin_version = 1; /* NB. Keep in sync with version_string. */ +static const char * version_string = N_("Version 1"); +static const char * help_string = N_("Supported options:\n\ + disable Disable this plugin\n\ + enable Enable this plugin\n\ + help Print out this information\n\ + version Print out the version of the plugin\n\ + verbose Be talkative about what is going on\n\ + [no-]dynamic-notes Do [do not] create dynamic notes (default: do)\n\ + [no-]static-notes Do [do not] create static notes (default: do)\n\ + [no-]global-file-syms Create global [or local] file name symbols (default: local)\n\ + [no-]stack-size-notes Do [do not] create stack size notes (default: do not)\n\ + stack-threshold=N Only create function specific stack size notes when the size is > N."); + +static struct plugin_info annobin_info = +{ + version_string, + help_string +}; + +/* Create a symbol name to represent the sources we are annotating. + Since there can be multiple input files, we choose the main output + filename (stripped of any path prefixes). Since filenames can + contain characters that symbol names do not (eg '-') we have to + allocate our own name. */ + +static void +init_annobin_current_filename (void) +{ + char * name; + unsigned i; + + if (annobin_current_filename != NULL + || main_input_filename == NULL) + return; + + name = (char *) lbasename (main_input_filename); + if (global_file_name_symbols) + name = strcpy ((char *) xmalloc (strlen (name) + 20), name); + else + name = xstrdup (name); + + /* Convert any non-symbolic characters into underscores. */ + for (i = strlen (name); i--;) + { + char c = name[i]; + + if (! ISALNUM (c) && c != '_' && c != '.' && c != '$') + name[i] = '_'; + else if (i == 0 && ISDIGIT (c)) + name[i] = '_'; + } + + if (global_file_name_symbols) + { + /* A program can have multiple source files with the same name. + Or indeed the same source file can be included multiple times. + Or a library can be built from a sources which include file names + that match application file names. Whatever the reason, we need + to be ensure that we generate unique global symbol names. So we + append the time to the symbol name. This will of course break + the functionality of build-ids. That is why this option is off + by default. */ + struct timeval tv; + + if (gettimeofday (& tv, NULL)) + { + annobin_inform (0, "ICE: unable to get time of day."); + tv.tv_sec = tv.tv_usec = 0; + } + sprintf (name + strlen (name), + "_%8.8lx_%8.8lx", (long) tv.tv_sec, (long) tv.tv_usec); + } + + annobin_current_filename = name; +} + +void +annobin_inform (unsigned level, const char * format, ...) +{ + va_list args; + + if (level > 0 && level > verbose_level) + return; + + fflush (stdout); + fprintf (stderr, "annobin: "); + if (annobin_current_filename == NULL) + init_annobin_current_filename (); + if (annobin_current_filename) + fprintf (stderr, "%s: ", annobin_current_filename); + va_start (args, format); + vfprintf (stderr, format, args); + va_end (args); + putc ('\n', stderr); +} + +void +annobin_output_note (const void * name, unsigned namesz, bool name_is_string, + const char * name_description, + const void * desc, unsigned descsz, bool desc_is_string, + unsigned type) +{ + unsigned i; + + if (type == NT_GNU_BUILD_ATTRIBUTE_FUNC + || type == NT_GNU_BUILD_ATTRIBUTE_OPEN) + { + fprintf (asm_out_file, "\t.pushsection %s\n", GNU_BUILD_ATTRS_SECTION_NAME); + } + + if (name == NULL) + { + if (namesz) + annobin_inform (0, "ICE: null name with non-zero size"); + fprintf (asm_out_file, "\t.dc.l 0\t\t%s no name\n", ASM_COMMENT_START); + } + else if (name_is_string) + { + if (strlen ((char *) name) != namesz - 1) + annobin_inform (0, "ICE: name string '%s' does not match name size %d", name, namesz); + fprintf (asm_out_file, "\t.dc.l %u \t%s namesz = strlen (%s)\n", namesz, ASM_COMMENT_START, (char *) name); + } + else + fprintf (asm_out_file, "\t.dc.l %u\t\t%s size of name\n", namesz, ASM_COMMENT_START); + + if (desc == NULL) + { + if (descsz) + annobin_inform (0, "ICE: null desc with non-zero size"); + fprintf (asm_out_file, "\t.dc.l 0\t\t%s no description\n", ASM_COMMENT_START); + } + else if (desc_is_string) + { + if (descsz != (annobin_is_64bit ? 8 : 4)) + annobin_inform (0, "ICE: description string size (%d) not sizeof address 8/4", descsz); + fprintf (asm_out_file, "\t.dc.l %u\t\t%s descsz = sizeof (address)\n", descsz, ASM_COMMENT_START); + } + else + fprintf (asm_out_file, "\t.dc.l %u\t\t%s size of description\n", descsz, ASM_COMMENT_START); + + fprintf (asm_out_file, "\t.dc.l %#x\t%s type = %s\n", type, ASM_COMMENT_START, + type == NT_GNU_BUILD_ATTRIBUTE_OPEN ? "OPEN" : + type == NT_GNU_BUILD_ATTRIBUTE_FUNC ? "FUNC" : + type == NT_GNU_PROPERTY_TYPE_0 ? "PROPERTY_TYPE_0" : "*UNKNOWN*"); + + if (name) + { + if (name_is_string) + { + fprintf (asm_out_file, "\t.asciz \"%s\"", (char *) name); + } + else + { + fprintf (asm_out_file, "\t.dc.b"); + for (i = 0; i < namesz; i++) + fprintf (asm_out_file, " %#x%c", + ((unsigned char *) name)[i], + i < (namesz - 1) ? ',' : ' '); + } + + fprintf (asm_out_file, "\t%s name (%s)\n", + ASM_COMMENT_START, name_description); + + if (namesz % 4) + { + fprintf (asm_out_file, "\t.dc.b"); + while (namesz % 4) + { + namesz++; + fprintf (asm_out_file, " 0%c", namesz % 4 ? ',' : ' '); + } + fprintf (asm_out_file, "\t%s Padding\n", ASM_COMMENT_START); + } + } + + if (desc) + { + if (desc_is_string) + { + /* The DESCRIPTION string is the name of a symbol. We want to produce + a reference to this symbol of the appropriate size for the target + architecture. */ + if (annobin_is_64bit) + fprintf (asm_out_file, "\t.quad %s", (char *) desc); + else + fprintf (asm_out_file, "\t.dc.l %s", (char *) desc); + fprintf (asm_out_file, "\t%s description (symbol name)\n", ASM_COMMENT_START); + } + else + { + fprintf (asm_out_file, "\t.dc.b"); + + for (i = 0; i < descsz; i++) + { + fprintf (asm_out_file, " %#x", ((unsigned char *) desc)[i]); + if (i == (descsz - 1)) + fprintf (asm_out_file, "\t%s description\n", ASM_COMMENT_START); + else if ((i % 8) == 7) + fprintf (asm_out_file, "\t%s description\n\t.dc.b", ASM_COMMENT_START); + else + fprintf (asm_out_file, ","); + } + + if (descsz % 4) + { + fprintf (asm_out_file, "\t.dc.b"); + while (descsz % 4) + { + descsz++; + fprintf (asm_out_file, " 0%c", descsz % 4 ? ',' : ' '); + } + fprintf (asm_out_file, "\t%s Padding\n", ASM_COMMENT_START); + } + } + } + + if (type == NT_GNU_BUILD_ATTRIBUTE_FUNC + || type == NT_GNU_BUILD_ATTRIBUTE_OPEN) + { + fprintf (asm_out_file, "\t.popsection\n"); + fflush (asm_out_file); + } + + fprintf (asm_out_file, "\n"); +} + +void +annobin_output_bool_note (const char bool_type, + const bool bool_value, + const char * name_description, + const char * description, + unsigned note_type) +{ + char buffer [1024]; /* FIXME: Is this enough ? */ + + snprintf (buffer, sizeof buffer, "%c%c", + bool_value ? GNU_BUILD_ATTRIBUTE_TYPE_BOOL_TRUE : GNU_BUILD_ATTRIBUTE_TYPE_BOOL_FALSE, + bool_type); + + /* Include the NUL byte at the end of the name "string". + This is required by the ELF spec. */ + annobin_output_note (buffer, 3, false, name_description, + description, description == NULL ? 0 : (annobin_is_64bit ? 8 : 4), + description != NULL, note_type); +} + +void +annobin_output_string_note (const char string_type, + const char * string, + const char * name_description, + const char * description, + unsigned note_type) +{ + char buffer [1024]; /* FIXME: Is this enough ? */ + + snprintf (buffer, sizeof buffer, "%c%c%s", GNU_BUILD_ATTRIBUTE_TYPE_STRING, + string_type, string); + + annobin_output_note (buffer, strlen (buffer) + 1, true, name_description, + description, description == NULL ? 0 : (annobin_is_64bit ? 8 : 4), + description != NULL, note_type); +} + +void +annobin_output_numeric_note (const char numeric_type, + unsigned long value, + const char * name_description, + const char * description, + unsigned note_type) +{ + unsigned i; + char buffer [32]; + + sprintf (buffer, "%c%c", GNU_BUILD_ATTRIBUTE_TYPE_NUMERIC, numeric_type); + + if (value == 0) + { + /* We need to record *two* zero bytes for a zero value. One for + the value itself and one as a NUL terminator, since this is a + name field... */ + buffer [2] = buffer [3] = 0; + i = 3; + } + else + { + for (i = 2; i < sizeof buffer; i++) + { + buffer[i] = value & 0xff; + /* Note - The name field in ELF Notes must be NUL terminated, even if, + like here, it is not really being used as a name. Hence the test + for value being zero is performed here, rather than after the shift. */ + if (value == 0) + break; + value >>= 8; + } + } + + /* If the value needs more than 8 bytes, consumers are unlikely to be able + to handle it. */ + if (i > 10) + annobin_inform (0, "ICE: Numeric value for %s too big to fit into 8 bytes\n", name_description); + if (value) + annobin_inform (0, "ICE: Unable to record numeric value in note %s\n", name_description); + + annobin_output_note (buffer, i + 1, false, name_description, + description, description == NULL ? 0 : (annobin_is_64bit ? 8 : 4), true, + note_type); +} + +static int +compute_pic_option (void) +{ + if (flag_pie > 1) + return 4; + if (flag_pie) + return 3; + if (flag_pic > 1) + return 2; + if (flag_pic) + return 1; + return 0; +} + +/* Compute a numeric value representing the settings/levels of + the -O and -g options, and whether -Wall has been used. This + is to help verify the recommended hardening options for binaries. + The format of the number is as follows: + + bits 0 - 2 : debug type (from enum debug_info_type) + bit 3 : with GNU extensions + bits 4 - 5 : debug level (from enum debug_info_levels) + bits 6 - 8 : DWARF version level + bits 9 - 10 : optimization level + bit 11 : -Os + bit 12 : -Ofast + bit 13 : -Og + bit 14 : -Wall. */ + +static unsigned int +compute_GOWall_options (void) +{ + unsigned int val, i; + + /* FIXME: Keep in sync with changes to gcc/flag-types.h:enum debug_info_type. */ + if (write_symbols > VMS_AND_DWARF2_DEBUG) + { + annobin_inform (0, "ICE: unknown debug info type %d\n", write_symbols); + val = 0; + } + else + val = write_symbols; + + if (use_gnu_debug_info_extensions) + val |= (1 << 3); + + if (debug_info_level > DINFO_LEVEL_VERBOSE) + annobin_inform (0, "ICE: unknown debug info level %d\n", debug_info_level); + else + val |= (debug_info_level << 4); + + if (dwarf_version < 0 || dwarf_version > 7) + annobin_inform (0, "ICE: unknown dwarf version level %d\n", dwarf_version); + else + val |= (dwarf_version << 6); + + if (optimize > 3) + val |= (3 << 9); + else + val |= (optimize << 9); + + /* FIXME: It should not be possible to enable more than one of -Os/-Of/-Og, + so the tests below could be simplified. */ + if (optimize_size) + val |= (1 << 11); + if (optimize_fast) + val |= (1 << 12); + if (optimize_debug) + val |= (1 << 13); + + /* Unfortunately -Wall is not recorded by gcc. So we have to scan the + command line... */ + for (i = 0; i < save_decoded_options_count; i++) + { + if (save_decoded_options[i].opt_index == OPT_Wall) + { + val |= (1 << 14); + break; + } + } + + return val; +} + +static void +record_GOW_settings (unsigned int gow, bool local) +{ + char buffer [128]; + unsigned i; + + (void) sprintf (buffer, "%cGOW", GNU_BUILD_ATTRIBUTE_TYPE_NUMERIC); + + for (i = 5; i < sizeof buffer; i++) + { + buffer[i] = gow & 0xff; + /* Note - The name field in ELF Notes must be NUL terminated, even if, + like here, it is not really being used as a name. Hence the test + for value being zero is performed here, rather than after the shift. */ + if (gow == 0) + break; + gow >>= 8; + } + + if (local) + { + annobin_inform (1, "Record a change in -g/-O/-Wall status for %s", current_function_name ()); + annobin_output_note (buffer, i + 1, false, "numeric: -g/-O/-Wall", + current_function_name (), annobin_is_64bit ? 8 : 4, true, + NT_GNU_BUILD_ATTRIBUTE_FUNC); + } + else + { + annobin_inform (1, "Record status of -g/-O/-Wall"); + annobin_output_note (buffer, i + 1, false, "numeric: -g/-O/-Wall", + NULL, 0, false, NT_GNU_BUILD_ATTRIBUTE_OPEN); + } +} + +static void +annobin_create_function_notes (void * gcc_data, void * user_data) +{ + if (! annobin_enable_static_notes) + return; + + annobin_target_specific_function_notes (); + + if (global_stack_prot_option != flag_stack_protect) + { + annobin_inform (1, "Recording change in stack protection status for %s (from %d to %d)", + current_function_name (), global_stack_prot_option, flag_stack_protect); + + annobin_output_numeric_note (GNU_BUILD_ATTRIBUTE_STACK_PROT, flag_stack_protect, + "numeric: -fstack-protector status", + current_function_name (), NT_GNU_BUILD_ATTRIBUTE_FUNC); + } + + if (global_pic_option != compute_pic_option ()) + { + annobin_inform (1, "Recording change in PIC status for %s", current_function_name ()); + annobin_output_numeric_note (GNU_BUILD_ATTRIBUTE_PIC, compute_pic_option (), + "numeric: pic type", current_function_name (), + NT_GNU_BUILD_ATTRIBUTE_FUNC); + } + + if (global_GOWall_options != compute_GOWall_options ()) + record_GOW_settings (compute_GOWall_options (), true); + + if (global_short_enums != flag_short_enums) + { + annobin_inform (1, "Recording change in enum size for %s", current_function_name ()); + annobin_output_bool_note (GNU_BUILD_ATTRIBUTE_SHORT_ENUM, flag_short_enums, + flag_short_enums ? "bool: short-enums: on" : "bool: short-enums: off", + current_function_name (), NT_GNU_BUILD_ATTRIBUTE_FUNC); + } + + + if (annobin_enable_stack_size_notes && flag_stack_usage_info) + { + if ((unsigned long) current_function_static_stack_size > stack_threshold) + { + annobin_inform (1, "Recording stack usage of %lu for %s", + current_function_static_stack_size, current_function_name ()); + + annobin_output_numeric_note (GNU_BUILD_ATTRIBUTE_STACK_SIZE, current_function_static_stack_size, + "numeric: stack-size", current_function_name (), + NT_GNU_BUILD_ATTRIBUTE_FUNC); + } + + annobin_total_static_stack_usage += current_function_static_stack_size; + + if ((unsigned long) current_function_static_stack_size > annobin_max_stack_size) + annobin_max_stack_size = current_function_static_stack_size; + } +} + +static void +record_fortify_level (int level) +{ + char buffer [128]; + unsigned len = sprintf (buffer, "%cFORTIFY", GNU_BUILD_ATTRIBUTE_TYPE_NUMERIC); + + buffer[++len] = level; + buffer[++len] = 0; + annobin_output_note (buffer, len + 1, false, "FORTIFY SOURCE level", + NULL, 0, false, NT_GNU_BUILD_ATTRIBUTE_OPEN); + annobin_inform (1, "Record a FORTIFY SOURCE level of %d", level); +} + +static void +annobin_create_global_notes (void * gcc_data, void * user_data) +{ + int i; + char buffer [1024]; /* FIXME: Is this enough ? */ + + if (! annobin_enable_static_notes) + return; + + /* Record global information. + Note - we do this here, rather than in plugin_init() as some + information, PIC status or POINTER_SIZE, may not be initialised + until after the target backend has had a chance to process its + command line options, and this happens *after* plugin_init. */ + + /* Compute the default data size. */ + switch (POINTER_SIZE) + { + case 16: + case 32: + annobin_is_64bit = false; break; + case 64: + annobin_is_64bit = true; break; + default: + annobin_inform (0, _("Unknown target pointer size: %d"), POINTER_SIZE); + } + + if (annobin_enable_stack_size_notes) + /* We must set this flag in order to obtain per-function stack usage info. */ + flag_stack_usage_info = 1; + + global_stack_prot_option = flag_stack_protect; + global_pic_option = compute_pic_option (); + global_short_enums = flag_short_enums; + global_GOWall_options = compute_GOWall_options (); + + /* Output a file name symbol to be referenced by the notes... */ + if (annobin_current_filename == NULL) + init_annobin_current_filename (); + if (annobin_current_filename == NULL) + { + annobin_inform (0, "ICE: Could not find output filename"); + /* We need a filename, so invent one. */ + annobin_current_filename = (char *) "unknown_source"; + } + if (global_file_name_symbols) + fprintf (asm_out_file, ".global %s\n", annobin_current_filename); + fprintf (asm_out_file, ".type %s STT_OBJECT\n", annobin_current_filename); + fprintf (asm_out_file, "%s:\n", annobin_current_filename); + + /* Create the static notes section. */ +#ifdef OLD_GAS + /* GAS prior to version 2.27 did not support setting section flags via a numeric value. */ + fprintf (asm_out_file, "\t.pushsection %s, \"\", %%note\n", + GNU_BUILD_ATTRS_SECTION_NAME); +#else + fprintf (asm_out_file, "\t.pushsection %s, \"%#x\", %%note\n", + GNU_BUILD_ATTRS_SECTION_NAME, SHF_GNU_BUILD_NOTE); +#endif + fprintf (asm_out_file, "\t.balign 4\n"); + + /* Output the version of the specification supported. */ + sprintf (buffer, "%dp%d", SPEC_VERSION, annobin_version); + annobin_output_string_note (GNU_BUILD_ATTRIBUTE_VERSION, buffer, + "string: version", annobin_current_filename, NT_GNU_BUILD_ATTRIBUTE_OPEN); + + /* Record the version of the compiler. */ + annobin_output_string_note (GNU_BUILD_ATTRIBUTE_TOOL, compiler_version, + "string: build-tool", NULL, NT_GNU_BUILD_ATTRIBUTE_OPEN); + + /* Record optimization level, -W setting and -g setting */ + record_GOW_settings (global_GOWall_options, false); + + /* Record -fstack-protector option. */ + annobin_output_numeric_note (GNU_BUILD_ATTRIBUTE_STACK_PROT, global_stack_prot_option, + "numeric: -fstack-protector status", + NULL, NT_GNU_BUILD_ATTRIBUTE_OPEN); + + /* Look for -D _FORTIFY_SOURCE= on the original gcc command line. + Scan backwards so that we record the last version of the option, + should multiple versions be set. */ + for (i = save_decoded_options_count; i--;) + { + if (save_decoded_options[i].opt_index == OPT_D + && save_decoded_options[i].arg != NULL + && strncmp (save_decoded_options[i].arg, "_FORTIFY_SOURCE=", strlen ("_FORTIFY_SOURCE=")) == 0) + { + int level = atoi (save_decoded_options[i].arg + strlen ("_FORTIFY_SOURCE=")); + + if (level < 0 || level > 3) + { + annobin_inform (0, "Unexpected value for FORIFY SOURCE: %s", + save_decoded_options[i].arg); + level = 0; + } + record_fortify_level (level); + break; + } + else if (save_decoded_options[i].opt_index == OPT_fpreprocessed) + { + /* Preprocessed sources *might* have had -D_FORTIFY_SOURCE= + applied, but we cannot tell from here. Well not without a + deep inspection of the preprocessed sources. So instead we + record a level of -1 to let the user known that we do not know. + Note: preprocessed sources includes the use of --save-temps. */ + record_fortify_level (-1); + break; + } + } + if (i < 0) + record_fortify_level (0); + + /* Record the PIC status. */ + annobin_output_numeric_note (GNU_BUILD_ATTRIBUTE_PIC, global_pic_option, + "numeric: PIC", NULL, NT_GNU_BUILD_ATTRIBUTE_OPEN); + + /* Record enum size. */ + annobin_output_bool_note (GNU_BUILD_ATTRIBUTE_SHORT_ENUM, global_short_enums, + global_short_enums ? "bool: short-enums: on" : "bool: short-enums: off", + NULL, NT_GNU_BUILD_ATTRIBUTE_OPEN); + + /* Record target specific notes. */ + annobin_record_global_target_notes (); + + fprintf (asm_out_file, "\t.popsection\n"); + fflush (asm_out_file); +} + +static void +annobin_create_loader_notes (void * gcc_data, void * user_data) +{ + if (! annobin_enable_dynamic_notes) + return; + + if (annobin_enable_stack_size_notes && annobin_total_static_stack_usage) + { + annobin_inform (1, "Recording total static usage of %ld", annobin_total_static_stack_usage); + + fprintf (asm_out_file, "\t.pushsection %s\n", GNU_BUILD_ATTRS_SECTION_NAME); + annobin_output_numeric_note (GNU_BUILD_ATTRIBUTE_STACK_SIZE, annobin_total_static_stack_usage, + "numeric: stack-size", NULL, NT_GNU_BUILD_ATTRIBUTE_OPEN); + fprintf (asm_out_file, "\t.popsection\n"); + } + + annobin_target_specific_loader_notes (); +} + +static bool +parse_args (unsigned argc, struct plugin_argument * argv) +{ + while (argc--) + { + char * key = argv[argc].key; + + while (*key == '-') + ++ key; + + /* These options allow the plugin to be enabled/disabled by a build + system without having to change the option that loads the plugin + itself. */ + if (strcmp (key, "disable") == 0) + enabled = false; + + else if (strcmp (key, "enable") == 0) + enabled = true; + + else if (strcmp (key, "help") == 0) + annobin_inform (0, help_string); + + else if (strcmp (key, "version") == 0) + annobin_inform (0, version_string); + + else if (strcmp (key, "verbose") == 0) + verbose_level ++; + + else if (strcmp (key, "global-file-syms") == 0) + global_file_name_symbols = true; + else if (strcmp (key, "no-global-file-syms") == 0) + global_file_name_symbols = false; + + else if (strcmp (key, "stack-size-notes") == 0) + annobin_enable_stack_size_notes = true; + else if (strcmp (key, "no-stack-size-notes") == 0) + annobin_enable_stack_size_notes = false; + + else if (strcmp (key, "dynamic-notes") == 0) + annobin_enable_dynamic_notes = true; + else if (strcmp (key, "no-dynamic-notes") == 0) + annobin_enable_dynamic_notes = false; + + else if (strcmp (key, "static-notes") == 0) + annobin_enable_static_notes = true; + else if (strcmp (key, "no-static-notes") == 0) + annobin_enable_static_notes = false; + + else if (strcmp (key, "stack-threshold") == 0) + { + stack_threshold = strtoul (argv[argc].value, NULL, 0); + if (stack_threshold == 0) + stack_threshold = DEFAULT_THRESHOLD; + } + + else + { + annobin_inform (0, "unrecognised option: %s", argv[argc].key); + return false; + } + } + + return true; +} + +int +plugin_init (struct plugin_name_args * plugin_info, + struct plugin_gcc_version * version) +{ +#if 0 + if (!plugin_default_version_check (version, & gcc_version)) + { + if (strcmp (version->basever, gcc_version.basever)) + annobin_inform (0, _("ICE: plugin built for compiler version (%s) but run with compiler version (%s)"), + version->basever, gcc_version.basever); + else if (strcmp (version->datestamp, gcc_version.datestamp)) + annobin_inform (0, _("ICE: plugin datestamp (%s) different from compiler datestamp (%s)"), + version->datestamp, gcc_version.datestamp); + else if (strcmp (version->devphase, gcc_version.devphase)) + annobin_inform (0, _("ICE: plugin built for development phase (%s) not (%s)"), + version->devphase, gcc_version.devphase); + else if (strcmp (version->revision, gcc_version.revision)) + annobin_inform (0, _("ICE: plugin built for compiler revision (%s) not (%s)"), + version->revision, gcc_version.revision); + else + { + const char * plugin_target; + const char * gcc_target; + const char * plugin_target_end; + const char * gcc_target_end; + + /* The entire configuration string can be very verbose, + so try to catch the case of compiler and plugin being + built for different targets and tell the user just that. */ + plugin_target = strstr (version->configuration_arguments, "target="); + gcc_target = strstr (gcc_version.configuration_arguments, "target="); + if (plugin_target) + { + plugin_target += 7; /* strlen ("target=") */ + plugin_target_end = strchr (plugin_target, ' '); + } + else + { + plugin_target = "native"; + plugin_target_end = gcc_target + 6; /* strlen ("native") */ + } + if (gcc_target) + { + gcc_target += 7; + gcc_target_end = strchr (gcc_target, ' '); + } + else + { + gcc_target = "native"; + gcc_target_end = gcc_target + 6; + } + + if (plugin_target_end && gcc_target_end + && strncmp (plugin_target, gcc_target, plugin_target_end - plugin_target)) + { + annobin_inform (0, _("ICE: plugin run on a %.*s compiler but built on a %.*s compiler"), + plugin_target_end - plugin_target, plugin_target, + gcc_target_end - gcc_target, gcc_target); + } + else + { + annobin_inform (0, _("ICE: plugin run on a compiler configured as (%s) not (%s)"), + version->configuration_arguments, gcc_version.configuration_arguments); + } + } + return 1; + } +#endif + + if (! parse_args (plugin_info->argc, plugin_info->argv)) + { + annobin_inform (1, _("failed to parse arguments to plugin")); + return 1; + } + + if (! enabled) + return 0; + + if (! annobin_enable_dynamic_notes && ! annobin_enable_static_notes) + { + annobin_inform (1, _("nothing to be done")); + return 0; + } + + /* Record global compiler options. */ + compiler_version = (char *) xmalloc (strlen (version->basever) + strlen (version->datestamp) + 6); + sprintf (compiler_version, "gcc %s %s", version->basever, version->datestamp); + + annobin_save_target_specific_information (); + +#if 0 + register_callback (plugin_info->base_name, + PLUGIN_INFO, + NULL, + & annobin_info); + + register_callback ("Generate global annotations", + PLUGIN_START_UNIT, + annobin_create_global_notes, + NULL); + + register_callback ("Generate per-function annotations", + PLUGIN_ALL_PASSES_END, + annobin_create_function_notes, + NULL); + + register_callback ("Generate final annotations", + PLUGIN_FINISH_UNIT, + annobin_create_loader_notes, + NULL); +#endif + return 0; +} diff --git a/gcc/annobin.h b/gcc/annobin.h new file mode 100644 index 0000000..5f8bd64 --- /dev/null +++ b/gcc/annobin.h @@ -0,0 +1,99 @@ +/* annobin - Header file for the gcc plugin for annotating binary files. + Copyright (c) 2017 Red Hat. + Created by Nick Clifton. + + This is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. */ + +/* What a mess. All of this is so that we can include gcc-plugin.h. */ + +#include +#undef PACKAGE_NAME +#undef PACKAGE_STRING +#undef PACKAGE_TARNAME +#undef PACKAGE_VERSION +#include +#undef PACKAGE_NAME +#undef PACKAGE_STRING +#undef PACKAGE_TARNAME +#undef PACKAGE_VERSION +#include + +/* These are necessary so that we can call examine the target's options. */ +#include +extern struct plugin_gcc_version gcc_version ATTRIBUTE_UNUSED; +#include +#include +#include +#include +#include +#include + +#include + +#if 0 /* This would be the correct thing to do if elf/common.h did not conflict with elf.h. */ +#include "elf/common.h" +#else +#define SHF_GNU_BUILD_NOTE (1 << 20) /* Section contains GNU BUILD ATTRIBUTE notes. */ +#define NT_GNU_PROPERTY_TYPE_0 5 /* Generated by gcc. */ + +#define NT_GNU_BUILD_ATTRIBUTE_OPEN 0x100 +#define NT_GNU_BUILD_ATTRIBUTE_FUNC 0x101 + +#define GNU_BUILD_ATTRIBUTE_TYPE_NUMERIC '*' +#define GNU_BUILD_ATTRIBUTE_TYPE_STRING '$' +#define GNU_BUILD_ATTRIBUTE_TYPE_BOOL_TRUE '+' +#define GNU_BUILD_ATTRIBUTE_TYPE_BOOL_FALSE '!' + +#define GNU_BUILD_ATTRIBUTE_VERSION 1 +#define GNU_BUILD_ATTRIBUTE_STACK_PROT 2 +#define GNU_BUILD_ATTRIBUTE_RELRO 3 +#define GNU_BUILD_ATTRIBUTE_STACK_SIZE 4 +#define GNU_BUILD_ATTRIBUTE_TOOL 5 +#define GNU_BUILD_ATTRIBUTE_ABI 6 +#define GNU_BUILD_ATTRIBUTE_PIC 7 +#define GNU_BUILD_ATTRIBUTE_SHORT_ENUM 8 + +#define NOTE_GNU_PROPERTY_SECTION_NAME ".note.gnu.property" +#define GNU_BUILD_ATTRS_SECTION_NAME ".gnu.build.attributes" + +/* Values used in GNU .note.gnu.property notes (NT_GNU_PROPERTY_TYPE_0). */ +#define GNU_PROPERTY_STACK_SIZE 1 +#define GNU_PROPERTY_NO_COPY_ON_PROTECTED 2 +#endif /* Copy of elf/common.h */ + +/* Called during plugin_init(). */ +extern void annobin_save_target_specific_information (void); + +/* Called during PLUGIN_START_UNIT. + Should only produce notes for the static tools, ie + notes in the .gnu.build.attributes section. */ +extern void annobin_record_global_target_notes (void); + +/* Called during PLUGIN_ALL_PASSES_END. + Should only produce notes for the static tools, ie + notes in the .gnu.build.attributes section. */ +extern void annobin_target_specific_function_notes (void); + +/* Called during PLUGIN_FINISH_UNIT. + Should only produce notes for the dynamic loader, ie + notes in the .note.gnu.property section. */ +extern void annobin_target_specific_loader_notes (void); + +extern void annobin_inform (unsigned, const char *, ...); +extern void annobin_output_note (const void *, unsigned, bool, const char *, const void *, unsigned, bool, unsigned); +extern void annobin_output_bool_note (const char , const bool, const char *, const char *, unsigned); +extern void annobin_output_string_note (const char, const char *, const char *, const char *, unsigned); +extern void annobin_output_numeric_note (const char, unsigned long, const char *, const char *, unsigned); + +extern bool annobin_is_64bit; +extern bool annobin_enable_stack_size_notes; +extern unsigned long annobin_total_static_stack_usage; +extern unsigned long annobin_max_stack_size; diff --git a/gcc/checkers.c b/gcc/checkers.c new file mode 100644 index 0000000..be47d85 --- /dev/null +++ b/gcc/checkers.c @@ -0,0 +1,734 @@ +/* Running 3rd-party code analysis tools. + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "options.h" +#include "diagnostic.h" +#include "selftest.h" +#include "firehose.h" +#include "json.h" +#include +#include "checkers.h" +#include "annobin.h" + +/* FIXME. */ + +static void +print_any_trace (const firehose::issue &issue) +{ + if (!issue.m_trace) + return; + if (0) + inform (UNKNOWN_LOCATION, "got trace"); + + /* Filter out any states within the trace that don't have text. */ + issue.m_trace->filter (); + + /* If we're just left with a single state that duplicate what we + already printed for the issue, don't bother printing it. */ + if (issue.m_trace->is_redundant_p (issue)) + return; + + int i; + firehose::state *s; + int num_states = issue.m_trace->m_states.length (); + FOR_EACH_VEC_ELT (issue.m_trace->m_states, i, s) + { + if (s->m_notes) + inform (s->m_location, "state %i of %i: %s", i + 1, num_states, + s->m_notes); + else + inform (s->m_location, "state %i of %i", i + 1, num_states); + } +} + +/* FIXME. */ + +bool +diagnostic_at_rich_loc_va (rich_location *richloc, + diagnostic_info *diagnostic, + const char *gmsgid, + va_list *ap) +{ + gcc_assert (richloc); + gcc_assert (diagnostic); + gcc_assert (gmsgid); + gcc_assert (ap); + + diagnostic_t kind = diagnostic->kind; +#if 0 + if (kind == DK_PERMERROR) + { + diagnostic_set_info (diagnostic, gmsgid, ap, richloc, + permissive_error_kind (global_dc)); + diagnostic.option_index = permissive_error_option (global_dc); + } + else +#endif + { + diagnostic_set_info (diagnostic, gmsgid, ap, richloc, kind); +#if 0 + if (kind == DK_WARNING || kind == DK_PEDWARN) + diagnostic.option_index = opt; +#endif + } + return diagnostic_report_diagnostic (global_dc, diagnostic); +} + +/* FIXME. */ + +void +diagnostic_at (location_t loc, diagnostic_info *diagnostic, + const char *gmsgid, ...) +{ + va_list ap; + va_start (ap, gmsgid); + rich_location richloc (line_table, loc); + diagnostic_at_rich_loc_va (&richloc, diagnostic, gmsgid, &ap); + va_end (ap); +} + +/* Emit a diagnostic for ISSUE. */ + +static void +handle_issue (const firehose::analysis &analysis, + const firehose::issue &issue) +{ + // FIXME: generate "In function", perhaps based on blt_node ? + // FIXME: ^^^ better to get it from the JSON + diagnostic_info diagnostic; + diagnostic.kind = DK_ERROR; + diagnostic.external_tool = analysis.m_metadata.m_generator.m_name; + diagnostic.external_test_id = issue.m_testid; + diagnostic_at (issue.m_location, &diagnostic, "%s", + issue.m_message); + print_any_trace (issue); +} + +/* Emit a diagnostic for INFO. */ + +static void +handle_info (const firehose::analysis &analysis, + const firehose::info &info) +{ + diagnostic_info diagnostic; + diagnostic.kind = DK_NOTE; + diagnostic.external_tool = analysis.m_metadata.m_generator.m_name; + diagnostic.external_test_id = info.m_infoid; + diagnostic_at (info.m_location, &diagnostic, "%s", + info.m_message); +} + +/* Emit a diagnostic for FAILURE. */ + +static void +handle_failure (const firehose::analysis &analysis, + const firehose::failure &failure) +{ + diagnostic_info diagnostic; + diagnostic.kind = DK_ERROR; + diagnostic.external_tool = analysis.m_metadata.m_generator.m_name; + diagnostic.external_test_id = failure.m_failureid; + diagnostic_at (failure.m_location, &diagnostic, "%s", + failure.m_message); +} + +/* FIXME: taken from jit-playback.c. */ + +/* A subclass of auto_vec that frees all of its elements on + deletion. */ + +class auto_argvec : public auto_vec +{ + public: + ~auto_argvec (); +}; + +/* auto_argvec's dtor, freeing all contained strings, automatically + chaining up to ~auto_vec , which frees the internal buffer. */ + +auto_argvec::~auto_argvec () +{ + int i; + char *str; + FOR_EACH_VEC_ELT (*this, i, str) + free (str); +} + +/* FIXME. */ + +struct thread_result +{ + thread_result (char *utf8_buffer, char *err) + : m_utf8_buffer (utf8_buffer), m_err (err) {} + + ~thread_result () { free (m_utf8_buffer); free (m_err); } + + char *m_utf8_buffer; + char *m_err; +}; + +/* A particular checker to run. + A "checker" is an executable which takes GCC-style command-line + arguments and writes a Firehose JSON file to stdout. */ + +class checker +{ + public: + checker (); + ~checker (); + + static checker *from_json (const json::value *jv, char *&out_err); + + void start (); + void finish (); + void run_single_threaded (); + + const char *get_executable () const { return m_executable; } + + const char *get_output () const { return m_utf8_buffer; } + const json::value *get_json_output () const { return m_jv; } + + private: + static void *run_checker_thread (void *ptr); + thread_result *run_in_thread (); + char *capture_stdout (char *&out_err); + void make_args (auto_argvec &out) const; + void handle_json (); + + char *m_executable; + pthread_t m_tid; + + char *m_utf8_buffer; + json::value *m_jv; +}; + +/* A policy, listing which checkers to run. */ + +class policy +{ + public: + ~policy (); + + bool read_from_file (const char *path, char *&out_err); + + auto_vec m_checkers; +}; + +/* FIXME. */ + +checker::checker () : m_executable (NULL), m_utf8_buffer (NULL), m_jv (NULL) +{ +} + +/* FIXME. */ + +checker::~checker () +{ + free (m_executable); + free (m_utf8_buffer); + delete m_jv; +} + +/* FIXME. */ + +checker * +checker::from_json (const json::value *jv, char *&out_err) +{ + checker *ch = new checker (); + + const char *executable; + if (!jv->get_string_by_key ("executable", executable, out_err)) + { + delete ch; + return NULL; + } + ch->m_executable = xstrdup (executable); + + // FIXME: languages + + return ch; +} + +/* FIXME. */ +// This is run within the per-checker thread + +void * +checker::run_checker_thread (void *ptr) +{ + checker *ch = static_cast (ptr); + return ch->run_in_thread (); +} + +/* Create a thread for this checker, calling run_in_thread within it. + This is run on the main thread, called by checkers_start. */ + +void +checker::start () +{ + pthread_create (&m_tid, + NULL, + run_checker_thread, + this); +} + +/* Wait for this checker's thread to finish, and call + handle_json on the UTF-8 JSON result. + + This is run on the main thread, called by checkers_finish. */ + +void +checker::finish () +{ + /* Wait for the thread to finish. */ + void *retval; + if (!pthread_join (m_tid, &retval)) + ; // FIXME: error-handling + thread_result *result = static_cast (retval); + + /* Process the output. */ + if (!result->m_utf8_buffer) + { + error_at (UNKNOWN_LOCATION, "error invoking checker %qs: %qs", + m_executable, result->m_err); + delete result; + return; + } + + m_utf8_buffer = result->m_utf8_buffer; + result->m_utf8_buffer = NULL; + handle_json (); + delete result; +} + +/* This is the "main" routine of the per-checker thread when + run in multithreaded mode. + Attempt to run the checker in a subprocess, and return the stdout + and any error messages. */ + +thread_result * +checker::run_in_thread () +{ + char *err = NULL; + char *utf8_buffer = capture_stdout (err); + + return new thread_result (utf8_buffer, err); +} + +/* This is the single-threaded way to invoke a checker. + Run the checker in a subprocess, capture its stdout as UTF-8 JSON, + and call handle_json on it. */ + +void +checker::run_single_threaded () +{ + char *err = NULL; + m_utf8_buffer = capture_stdout (err); + if (!m_utf8_buffer) + { + error_at (UNKNOWN_LOCATION, "error invoking checker %qs: %qs", + m_executable, err); + free (err); + return; + } + handle_json (); +} + +/* Run the checker, capturing its stdout. + + Return a buffer containing the captured stdout, which must be freed + by the caller. + + This can be run either on the main thread, or within the + per-checker thread. */ + +// FIXME: this assumes that pex is thread-safe; is it? + +char * +checker::capture_stdout (char *&out_err) +{ + auto_argvec argvec; + + make_args (argvec); + + /* pex argv arrays are NULL-terminated. */ + argvec.safe_push (NULL); + + struct pex_obj *obj; + const char *errmsg; + int exit_status = 0; + int err = 0; + + obj = pex_init (0, progname, NULL); + + errmsg = pex_run (obj, + PEX_SEARCH | PEX_USE_PIPES, /* int flags, */ + m_executable, /* const char *executable, */ + const_cast (argvec.address ()), /* argv, */ + NULL, /* const char *outname */ + NULL, /* const char *errname */ + &err); /* int *err*/ + if (errmsg == NULL) + { + if (!pex_get_status (obj, 1, &exit_status)) + { + err = 0; + out_err = xstrdup ("pex_get_status failed"); + } + } + + FILE *outf = pex_read_output (obj, 0); + if (!outf) + { + out_err = xstrdup ("unable to read stdout"); + return NULL; + } + /* "outf" is owned by "obj". */ + + // FIXME: use something other than a selftest API for this! + char *utf8_buffer = selftest::read_file (SELFTEST_LOCATION, outf, + "stdout from checker"); + + pex_free (obj); + + if (errmsg) + { + out_err = xstrdup (errmsg); + return NULL; + } + if (exit_status || err) + { + out_err = xasprintf ("exit_status: %i err: %i", + exit_status, err); + return NULL; + } + + return utf8_buffer; +} + +/* Subroutine of checker::capture_stdout. + This can be run either on the main thread, or within the + per-checker thread. */ + +void +checker::make_args (auto_argvec &out) const +{ +#define ADD_ARG(arg) out.safe_push (xstrdup (arg)) + + ADD_ARG (m_executable); + ADD_ARG ("-B."); // FIXME + // FIXME: supply at least -I and -D args + ADD_ARG ("-c"); + ADD_ARG (main_input_filename); + +#undef ADD_ARG +} + +/* Given UTF8_BUFFER, a non-NULL UTF-8-encoded buffer of JSON output + from a tool, in Firehose JSON format, emit the results through GCC's + diagnostic subsystem. + This must be run on the main thread (firehose::analysis::from_json can + touch the global line_table, and error_at and the other diagnostic + emission requires the main thread). */ + +void +checker::handle_json () +{ + /* Attempt to parse the buffer as UTF-8-encoded JSON. */ + char *err = NULL; + m_jv = json::parse_utf8_string (m_utf8_buffer, &err); + if (!m_jv) + { + gcc_assert (err); + error_at (UNKNOWN_LOCATION, + "unable to parse tool output as UTF-8 JSON: %s", err); + free (err); + return; + } + + gcc_assert (err == NULL); + + if (0) + { + m_jv->dump(stderr); + fprintf (stderr, "\n"); + } + + /* Attempt to parse the JSON values into Firehose objects. */ + firehose::analysis analysis; + analysis.from_json (m_jv, err); + if (err) + { + error_at (UNKNOWN_LOCATION, "error parsing JSON output: %qs", err); + free (err); + return; + } + + gcc_assert (err == NULL); + + /* Emit the results as GCC diagnostics. */ + int i; + firehose::result *result; + FOR_EACH_VEC_ELT (analysis.m_results, i, result) + { + switch (result->get_kind ()) + { + case firehose::result::FIREHOSE_ISSUE: + handle_issue (analysis, *static_cast (result)); + break; + case firehose::result::FIREHOSE_INFO: + handle_info (analysis, *static_cast (result)); + break; + case firehose::result::FIREHOSE_FAILURE: + handle_failure (analysis, *static_cast (result)); + break; + default: + gcc_unreachable (); + } + } +} + +/* FIXME. */ + +policy::~policy () +{ + int i; + checker *checker; + FOR_EACH_VEC_ELT (m_checkers, i, checker) + delete checker; +} + +/* Load policy from the JSON file at PATH. + If successful, return true. + Othewise, return false, writing to out_err (the caller + must free the string). */ + +bool +policy::read_from_file (const char *path, char *&out_err) +{ + // FIXME: this shouldn't be just in the selftests + char *utf8_buffer = selftest::read_file (SELFTEST_LOCATION, path); + // FIXME: error-checking + + /* Attempt to parse the buffer as UTF-8-encoded JSON. */ + json::value *jv = json::parse_utf8_string (utf8_buffer, &out_err); + if (!jv) + { + free (utf8_buffer); + return false; + } + + /* Convert to a policy object. */ + const json::array *arr = jv->as_array (); + if (!arr) + { + out_err = xstrdup ("not an array"); + delete jv; + return false; + } + + for (unsigned i = 0; i < arr->get_length (); i++) + { + checker *ch = checker::from_json (arr->get (i), out_err); + if (!ch) + { + delete jv; + return false; + } + m_checkers.safe_push (ch); + } + + delete jv; + return true; +} + +/* Interface for use by toplev.c */ + +static bool use_threads = true; // FIXME; move to class policy? +static policy *the_policy = NULL; + +/* Called near the beginning of toplev.c. + + Load a policy file from PATH. + If using threads, invoke the checkers specified by the policy, + each with their own thread reading the stdout from the checker. */ + +void +checkers_start (const char *path) +{ + the_policy = new policy (); + + /* Try to load a policy file. */ + char *err = NULL; + if (!the_policy->read_from_file (path, err)) + { + error_at (UNKNOWN_LOCATION, + "unable to load checker policy %qs: %qs", + path, err); + free (err); + return; + } + + /* If using threads, start the checkers specified by the policy now, + each on their own thread. */ + if (use_threads) + { + int i; + checker *ch; + FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch) + ch->start (); + } +} + +class note_buffer : public auto_vec +{ + public: + void push_string (const char *str) + { + while (char ch = *str++) + safe_push (ch); + } +}; + +/* Called near the end of toplev.c. + + If using threads, wait for each checker thread to finish, and + process the results. + Otherwise, run each checker now in the main thread, sequentially, + processing the results. */ + +void +checkers_finish () +{ + int i; + checker *ch; + + /* This should have been created in checkers_start. */ + gcc_assert (the_policy); + + if (use_threads) + { + FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch) + ch->finish (); + } + else + { + FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch) + ch->run_single_threaded (); + } + + /* Watermark the binary with the analysis results/metadata. */ + + /* annobin_output_string_note imposes a limit of 1024 bytes; avoid this + by hand-rolling our own implementation on top of annobin_output_note. */ + note_buffer buffer; + buffer.safe_push (GNU_BUILD_ATTRIBUTE_TYPE_STRING); + + /* annobin_output_note uses strlen on the name, so we can't embed '\0' in + the name as described in: + https://fedoraproject.org/wiki/Toolchain/Watermark#Proposed_Specification_for_non-loaded_notes + For now, use a numeric value: + */ + buffer.safe_push (9); + + /* Append a UTF-8 JSON string consisting of an array of all of the checker + results. */ +#if 0 + buffer.push_string ("["); + FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch) + { + if (i) + buffer.push_string (", "); + buffer.push_string (ch->get_output ()); + } + buffer.push_string ("]"); +#else + /* FIXME: annobin_output_note does no escaping on "NAME" and so can't + handle newlines in the data (it generates bogus asm files). + Workaround this by building one big JSON value containing the array + of results. Dump it ourselves (which adds no newlines). */ + { + json::array *all_results = new json::array (); + FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch) + all_results->append (ch->get_json_output ()->clone ()); + char *all_results_str = all_results->to_str (); + + /* Bother; we also need to escape quote characters, and backslashes. */ + for (const char *ch = all_results_str; *ch; ch++) + { + if (*ch == '"' || *ch == '\\') + buffer.safe_push ('\\'); + buffer.safe_push (*ch); + } + + free (all_results_str); + } +#endif + + /* 0-terminate the buffer. */ + buffer.safe_push ('\0'); + + /* FIXME: ultimately will probably want to gzip the contents. */ + + annobin_output_note (&buffer[0], buffer.length (), true, + "name_description dummy value", /* name_description */ + NULL, 0, false, /* bool desc_is_string,*/ + NT_GNU_BUILD_ATTRIBUTE_OPEN); + + delete the_policy; + the_policy = NULL; +} + +#if CHECKING_P + +namespace selftest { + +/* Selftests. */ + +/* Verify that we can load a policy file. */ + +static void +test_policy_parsing () +{ + char *filename = locate_file ("checker-policy/test-policy.json"); + char *err = NULL; + policy p; + bool success = p.policy::read_from_file (filename, err); + ASSERT_TRUE (success); + ASSERT_NE (NULL, err); + free (filename); + + ASSERT_EQ (4, p.m_checkers.length ()); + ASSERT_STREQ ("../../src/checkers/clang_analyzer.py", + p.m_checkers[0]->get_executable ()); +} + +/* Run all of the selftests within this file. */ + +void +checkers_c_tests () +{ + test_policy_parsing (); +} + +} // namespace selftest + +#endif /* #if CHECKING_P */ diff --git a/gcc/checkers.h b/gcc/checkers.h new file mode 100644 index 0000000..f023871 --- /dev/null +++ b/gcc/checkers.h @@ -0,0 +1,26 @@ +/* Running 3rd-party code analysis tools. + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_CHECKERS_H +#define GCC_CHECKERS_H + +extern void checkers_start (const char *path); +extern void checkers_finish (); + +#endif /* GCC_CHECKERS_H. */ diff --git a/gcc/common.opt b/gcc/common.opt index 1330555..07a4738 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2108,6 +2108,10 @@ frounding-math Common Report Var(flag_rounding_math) Optimization SetByCombined Disable optimizations that assume default FP rounding behavior. +frun-analyzers= +Common Joined Var(flag_run_analyzers) +FIXME. + fsched-interblock Common Report Var(flag_schedule_interblock) Init(1) Optimization Enable scheduling across basic blocks. diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c index 1585196..65f97a8 100644 --- a/gcc/diagnostic.c +++ b/gcc/diagnostic.c @@ -67,7 +67,17 @@ const char *progname; /* A diagnostic_context surrogate for stderr. */ static diagnostic_context global_diagnostic_context; diagnostic_context *global_dc = &global_diagnostic_context; + + +/* FIXME. */ + +diagnostic_info::diagnostic_info () +: message (), richloc (NULL), x_data (NULL), kind (DK_UNSPECIFIED), + option_index (0), external_tool (NULL), external_test_id (NULL) +{ +} + /* Return a malloc'd string containing MSG formatted a la printf. The caller is responsible for freeing the memory. */ char * @@ -843,6 +853,28 @@ print_option_information (diagnostic_context *context, const diagnostic_info *diagnostic, diagnostic_t orig_diag_kind) { + pretty_printer *pp = context->printer; + const char *cs = colorize_start (pp_show_color (pp), + diagnostic_kind_color[diagnostic->kind]); + const char *ce = colorize_stop (pp_show_color (pp)); + + if (diagnostic->external_tool) + { + pp_string (pp, " ["); + pp_string (pp, cs); + pp_string (pp, diagnostic->external_tool); + pp_string (pp, ce); + if (diagnostic->external_test_id) + { + pp_character (pp, ':'); + pp_string (pp, cs); + pp_string (pp, diagnostic->external_test_id); + pp_string (pp, ce); + } + pp_character (pp, ']'); + return; + } + char *option_text; option_text = context->option_name (context, diagnostic->option_index, @@ -850,12 +882,10 @@ print_option_information (diagnostic_context *context, if (option_text) { - pretty_printer *pp = context->printer; pp_string (pp, " ["); - pp_string (pp, colorize_start (pp_show_color (pp), - diagnostic_kind_color[diagnostic->kind])); + pp_string (pp, cs); pp_string (pp, option_text); - pp_string (pp, colorize_stop (pp_show_color (pp))); + pp_string (pp, ce); pp_character (pp, ']'); free (option_text); } @@ -875,6 +905,8 @@ diagnostic_report_diagnostic (diagnostic_context *context, location_t location = diagnostic_location (diagnostic); diagnostic_t orig_diag_kind = diagnostic->kind; + gcc_assert (diagnostic->kind != DK_UNSPECIFIED); + /* Give preference to being able to inhibit warnings, before they get reclassified to something else. */ if ((diagnostic->kind == DK_WARNING || diagnostic->kind == DK_PEDWARN) diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h index dbd1703..0127a6c 100644 --- a/gcc/diagnostic.h +++ b/gcc/diagnostic.h @@ -29,6 +29,8 @@ along with GCC; see the file COPYING3. If not see list in diagnostic.def. */ struct diagnostic_info { + diagnostic_info (); + /* Text to be formatted. */ text_info message; @@ -41,6 +43,9 @@ struct diagnostic_info diagnostic_t kind; /* Which OPT_* directly controls this diagnostic. */ int option_index; + + const char *external_tool; + const char *external_test_id; }; /* Each time a diagnostic's classification is changed with a pragma, diff --git a/gcc/firehose.c b/gcc/firehose.c new file mode 100644 index 0000000..844e132 --- /dev/null +++ b/gcc/firehose.c @@ -0,0 +1,679 @@ +/* Serialization format for checker results. + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "firehose.h" +#include "selftest.h" +#include "selftest-input.h" + +namespace firehose { + +// FIXME: move this to line-map.c + +static location_t +get_location (const char *file, int line, int column) +{ + for (unsigned int i = 0; i < LINEMAPS_ORDINARY_USED (line_table); i++) + { + line_map_ordinary *ord_map = LINEMAPS_ORDINARY_MAP_AT (line_table, i); + if (0 == strcmp (file, ord_map->to_file)) + { + location_t loc + = linemap_position_for_line_and_column (line_table, ord_map, + line, column); + /* Check that it's a valid location within ord_map. */ + if (i + 1 < LINEMAPS_ORDINARY_USED (line_table)) + { + line_map_ordinary *next_ord_map + = LINEMAPS_ORDINARY_MAP_AT (line_table, i + 1); + if (loc >= next_ord_map->start_location) + continue; + } + + return loc; + } + } + + // FIXME: create an ordmap when this is the case. + return UNKNOWN_LOCATION; + + // FIXME: there could be multiple ord_maps for FILE. +} + +/* FIXME. */ + +static bool +get_location_from_point (const char *givenpath, const json::value *jv, + location_t &out_value, char *&out_err) +{ + int line; + if (!jv->get_int_by_key ("line", line, out_err)) + return false; + + int column; + if (!jv->get_int_by_key ("column", column, out_err)) + return false; + + out_value = get_location (givenpath, line, column); + return true; +} + +/* FIXME. */ + +static bool +get_location_from_range (const char *givenpath, const json::value *jv, + location_t &out_value, char *&out_err) +{ + const json::value *jv_start; + if (!jv->get_value_by_key ("start", jv_start, out_err)) + return false; + + location_t start; + if (!get_location_from_point (givenpath, jv_start, + start, out_err)) + return false; + + const json::value *jv_end; + if (!jv->get_value_by_key ("end", jv_end, out_err)) + return false; + location_t end; + if (!get_location_from_point (givenpath, jv_end, + end, out_err)) + return false; + + out_value = make_location (start, start, end); + return true; +} + +/* FIXME. */ + +static bool +get_location_by_key (const json::value *jv, const char *name, + location_t &out_value, char *&out_err) +{ + const json::value *location; + if (!jv->get_value_by_key (name, location, out_err)) + return false; + + const json::value *file; + if (!location->get_value_by_key ("file", file, out_err)) + return false; + const char *givenpath; + if (!file->get_string_by_key ("givenpath", givenpath, out_err)) + return false; + + const json::value *point = location->as_object ()->get_if_nonnull ("point"); + if (point) + { + if (!get_location_from_point (givenpath, point, out_value, out_err)) + return false; + } + else + { + const json::value *range + = location->as_object ()->get_if_nonnull ("range_"); + + if (range) + { + if (!get_location_from_range (givenpath, range, out_value, + out_err)) + return false; + } + } + + // ignore "function" for now + return true; +} + +/* FIXME. */ + +state::state () : m_location (UNKNOWN_LOCATION), m_notes (NULL) +{ +} + +/* FIXME. */ + +state::~state () +{ + free (m_notes); +} + +/* FIXME. */ + +state * +state::from_json (const json::value *jv, char *&out_err) +{ + state *s = new state (); + + /* Extract the state's location to m_location. */ + if (!get_location_by_key (jv, "location", s->m_location, out_err)) + { + delete s; + return NULL; + } + + /* Get any notes. */ + json::value *notes = jv->as_object ()->get_if_nonnull ("notes"); + if (notes) + { + const char *text; + if (!notes->get_string_by_key ("text", text, out_err)) + { + delete s; + return NULL; + } + s->m_notes = xstrdup (text); + } + + return s; +} + +/* FIXME. */ + +trace::~trace () +{ + int i; + state *state; + FOR_EACH_VEC_ELT (m_states, i, state) + delete state; +} + +/* FIXME. */ + +trace * +trace::from_json (const json::value *jv, char *&out_err) +{ + const json::array *states; + if (!jv->get_array_by_key ("states", states, out_err)) + return NULL; + + trace *t = new trace (); + for (unsigned idx = 0; idx < states->get_length (); idx++) + { + const json::value *item = states->get (idx); + if (0) + { + fprintf (stderr, "got state %i: ", idx); + item->dump (stderr); + fprintf (stderr, "\n"); + } + firehose::state *state = state::from_json (item, out_err); + if (!state) + { + delete t; + return NULL; + } + t->m_states.safe_push (state); + } + + return t; +} + +/* Filter out the states to just those with notes. */ + +void +trace::filter () +{ + unsigned idx = 0; + while (idx < m_states.length ()) + { + if (m_states[idx]->m_notes == NULL) + { + delete m_states[idx]; + m_states.ordered_remove (idx); + } + else + idx++; + } +} + +/* Determine if THIS trace is merely a single state that duplicates + the information within ISSUE. */ + +bool +trace::is_redundant_p (const issue& issue) const +{ + if (m_states.length () > 1) + return false; + if (m_states.length () < 1) + return true; + + state *s0 = m_states[0]; + + if (s0->m_location != issue.m_location) + return false; + if (s0->m_notes) + if (0 != strcmp (s0->m_notes, issue.m_message)) + return false; + + /* Single state, with same location, and same message as ISSUE. */ + return true; +} + +/* FIXME. */ + +result::result () +: m_message (NULL), m_location (UNKNOWN_LOCATION) +{ +} + +/* FIXME. */ + +result::~result () +{ + free (m_message); +} + +/* FIXME. */ + +result * +result::from_json (const json::value *jv, char *&out_err) +{ + const char *type; + if (!jv->get_string_by_key ("type", type, out_err)) + return NULL; + result *result = NULL; + if (0 == strcmp (type, "Issue")) + { + result = issue::from_json (jv, out_err); + } + if (0 == strcmp (type, "Info")) + { + result = info::from_json (jv, out_err); + } + if (0 == strcmp (type, "Failure")) + { + result = failure::from_json (jv, out_err); + } + if (!result) + { + out_err = xstrdup ("unrecognized type of result"); + delete result; + return NULL; + } + + /* Extract the results's message's text to m_message. */ + const json::value *message; + if (!jv->get_value_by_key ("message", message, out_err)) + { + delete result; + return NULL; + } + const char *message_text; + if (!message->get_string_by_key ("text", message_text, out_err)) + { + delete result; + return NULL; + } + result->m_message = xstrdup (message_text); + + /* Extract the result's location to m_location. */ + if (!get_location_by_key (jv, "location", result->m_location, out_err)) + { + delete result; + return NULL; + } + + return result; +} + +/* FIXME. */ + +issue::issue () : result (), m_testid (NULL), m_trace (NULL) +{ +} + +/* FIXME. */ + +issue::~issue () +{ + free (m_testid); + delete m_trace; +} + +/* FIXME. */ + +issue * +issue::from_json (const json::value *jv, char *&out_err) +{ + issue *r = new issue (); + + /* FIXME: get any testid. */ + const char *testid_text = NULL; + if (!jv->get_optional_string_by_key ("testid", testid_text, out_err)) + { + delete r; + return NULL; + } + if (testid_text) + r->m_testid = xstrdup (testid_text); + + /* Get any trace as m_trace. */ + const json::value *trace = jv->as_object ()->get_if_nonnull ("trace"); + if (trace) + { + r->m_trace = trace::from_json (trace, out_err); + if (!r->m_trace) + { + delete r; + return NULL; + } + } + + return r; +} + +/* FIXME. */ + +info::info () : result (), m_infoid (NULL) +{ +} + +/* FIXME. */ + +info::~info () +{ + free (m_infoid); +} + +/* FIXME. */ + +info * +info::from_json (const json::value *jv, char *&out_err) +{ + info *r = new info (); + + /* FIXME: get any infoid. */ + const char *infoid_text = NULL; + if (!jv->get_optional_string_by_key ("infoid", infoid_text, out_err)) + { + delete r; + return NULL; + } + if (infoid_text) + r->m_infoid = xstrdup (infoid_text); + + return r; +} + +/* FIXME. */ + +failure::failure () : result (), m_failureid (NULL) +{ +} + +/* FIXME. */ + +failure::~failure () +{ + free (m_failureid); +} + +/* FIXME. */ + +failure * +failure::from_json (const json::value *jv, char *&out_err) +{ + failure *r = new failure (); + + /* FIXME: get any failureid. */ + const char *failureid_text = NULL; + if (!jv->get_optional_string_by_key ("failureid", failureid_text, out_err)) + { + delete r; + return NULL; + } + if (failureid_text) + r->m_failureid = xstrdup (failureid_text); + + return r; +} + +/* FIXME. */ + +generator::generator () +: m_name (NULL), m_version (NULL) +{ +} + +/* FIXME. */ + +generator::~generator () +{ + free (m_name); + free (m_version); +} + +/* FIXME. */ + +bool +generator::from_json (const json::value *jv, char *&out_err) +{ + const char *name; + if (!jv->get_string_by_key ("name", name, out_err)) + return false; + m_name = xstrdup (name); + + const char *version = NULL; + if (!jv->get_optional_string_by_key ("version", version, out_err)) + return false; + if (version) + m_version = xstrdup (version); + + return true; +} + +/* FIXME. */ + +bool +metadata::from_json (const json::value *jv, char *&out_err) +{ + const json::value *jv_generator = NULL; + if (!jv->get_value_by_key ("generator", jv_generator, out_err)) + return false; + if (!m_generator.from_json (jv_generator, out_err)) + return false; + + return true; +} + +/* FIXME. */ + +analysis::~analysis () +{ + int i; + result *result; + FOR_EACH_VEC_ELT (m_results, i, result) + delete result; +} + +/* FIXME. */ + +bool +analysis::from_json (const json::value *jv, char *&out_err) +{ + const json::value *jv_metadata = NULL; + if (!jv->get_value_by_key ("metadata", jv_metadata, out_err)) + return false; + if (!m_metadata.from_json (jv_metadata, out_err)) + return false; + + const json::array *results; + if (!jv->get_array_by_key ("results", results, out_err)) + return false; + + for (unsigned i = 0; i < results->get_length (); i++) + { + json::value *item = results->get (i); + //error ("%s", item->to_str ()); + result *r = result::from_json (item, out_err); + if (!r) + return false; + m_results.safe_push (r); + } + + // FIXME: custom fields + // FIXME: selftests for all of this + + return true; +} + +} // namespace firehose + + +#if CHECKING_P + +namespace selftest { + +/* Selftests. */ + +/* Given JSONFILE, a path relative to SRCDIR/gcc/testsuite/selftests, + load the json Firehose file there, populating OUT. + Fail if any errors occur. */ + +static void +get_analysis (firehose::analysis &out, const char *jsonfile) +{ + char *filename = locate_file (jsonfile); + char *buffer = selftest::read_file (SELFTEST_LOCATION, filename); + ASSERT_TRUE (buffer != NULL); + free (filename); + + char *err = NULL; + json::value *jv = json::parse_utf8_string (buffer, &err); + free (buffer); + ASSERT_TRUE (err == NULL); + ASSERT_TRUE (jv != NULL); + + //jv->dump(stderr); + out.from_json (jv, err); + ASSERT_TRUE (err == NULL); + delete jv; +} + +/* FIXME. */ + +static void +test_parsing_clang_analyzer () +{ + firehose::analysis analysis; + get_analysis (analysis, "checker-output/test-clang-analyzer.json"); + + ASSERT_STREQ ("clang-analyzer", analysis.m_metadata.m_generator.m_name); + ASSERT_EQ (NULL, analysis.m_metadata.m_generator.m_version); + + ASSERT_EQ (1, analysis.m_results.length ()); + firehose::result *r = analysis.m_results[0]; + ASSERT_EQ (r->get_kind (), firehose::result::FIREHOSE_ISSUE); + + firehose::issue *issue = (firehose::issue *)r; + ASSERT_STREQ ("Address of stack memory associated with" + " local variable 'tmp' returned to caller", + issue->m_message); + ASSERT_EQ (NULL, issue->m_testid); + + //ASSERT_LOCEQ ("../../src/bogus.c", 5, 3, issue->m_location); + + ASSERT_TRUE (issue->m_trace != NULL); + ASSERT_EQ (3, issue->m_trace->m_states.length ()); + firehose::state *state0 = issue->m_trace->m_states[0]; + //ASSERT_LOCEQ ("../../src/bogus.c", 3, 3, state0->m_location); + ASSERT_EQ (NULL, state0->m_notes); + + firehose::state *state1 = issue->m_trace->m_states[1]; + //ASSERT_LOCEQ ("../../src/bogus.c", 5, 3, state1->m_location); + ASSERT_EQ (NULL, state1->m_notes); + + firehose::state *state2 = issue->m_trace->m_states[2]; + //ASSERT_LOCEQ ("../../src/bogus.c", 5, 3, state2->m_location); + ASSERT_STREQ ("Address of stack memory associated with" + " local variable 'tmp' returned to caller", + state2->m_notes); + ASSERT_FALSE (issue->m_trace->is_redundant_p (*issue)); + + /* Verify filtering out non-textual states from the trace. */ + issue->m_trace->filter (); + ASSERT_EQ (1, issue->m_trace->m_states.length ()); + + /* Verify that the filtered trace is redundant. */ + ASSERT_TRUE (issue->m_trace->is_redundant_p (*issue)); +} + +/* FIXME. */ + +static void +test_parsing_cppcheck () +{ + firehose::analysis analysis; + get_analysis (analysis, "checker-output/test-cppcheck.json"); + + ASSERT_STREQ ("cppcheck", analysis.m_metadata.m_generator.m_name); + ASSERT_STREQ ("1.63", analysis.m_metadata.m_generator.m_version); + + ASSERT_EQ (1, analysis.m_results.length ()); + firehose::result *r = analysis.m_results[0]; + ASSERT_EQ (r->get_kind (), firehose::result::FIREHOSE_ISSUE); + + firehose::issue *issue = (firehose::issue *)r; + ASSERT_STREQ ("Memory leak: ptr_1", issue->m_message); + ASSERT_STREQ ("memleak", issue->m_testid); + + /* ASSERT_LOCEQ ("../../src/test-sources/conditional-leak.c", 11. 0, + issue->m_location); */ + + ASSERT_TRUE (issue->m_trace == NULL); +} + +/* FIXME. */ + +static void +test_parsing_failure () +{ + firehose::analysis analysis; + get_analysis (analysis, "checker-output/test-failure.json"); + + ASSERT_STREQ ("always-fails", analysis.m_metadata.m_generator.m_name); + + ASSERT_EQ (1, analysis.m_results.length ()); + firehose::result *r = analysis.m_results[0]; + ASSERT_EQ (r->get_kind (), firehose::result::FIREHOSE_FAILURE); + + firehose::failure *failure = (firehose::failure *)r; + ASSERT_STREQ ("Exception running always-fails: [Errno 2]" + " No such file or directory:" + " '/this/executable/does/not/exist'", failure->m_message); + ASSERT_STREQ ("exception", failure->m_failureid); +} + +/* Run all of the selftests within this file. */ + +void +firehose_c_tests () +{ + test_parsing_clang_analyzer (); + test_parsing_cppcheck (); + //test_parsing_info (); + test_parsing_failure (); +} + +} // namespace selftest + +#endif /* #if CHECKING_P */ diff --git a/gcc/firehose.h b/gcc/firehose.h new file mode 100644 index 0000000..d732fb1 --- /dev/null +++ b/gcc/firehose.h @@ -0,0 +1,199 @@ +/* Serialization format for checker results. + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_FIREHOSE_H +#define GCC_FIREHOSE_H + +/* "Firehose" is a serialization format for results from code + analysis tools: + + http://firehose.readthedocs.io/en/latest/index.html + + (along with a Python module for working with the format). + + This file implements a set of C++ classes modeling the format, + with support for populating them from a JSON dump, so that we + can lossly serialize diagnostics and other static analysis results. */ + +#include "json.h" + +namespace firehose { + +/* Forward decls. */ + +struct issue; + +/* A state within a firehose::trace. */ + +struct state +{ + state (); + ~state (); + + static state *from_json (const json::value *jv, char *&out_err); + + location_t m_location; + char *m_notes; +}; + +/* An optional list of events within an issue that describe the circumstances + leading up to a problem. */ + +struct trace +{ + ~trace (); + + static trace *from_json (const json::value *jv, char *&out_err); + + void filter (); + + /* If we're just left with a single state that duplicate what we + already printed for the issue, don't bother printing it. */ + bool is_redundant_p (const issue& issue) const; + + auto_vec m_states; +}; + +/* firehose::result is a base class. + + There are three subclasses: + + - a firehose::issue represents a report from an analyzer about a possible + problem with the software under test. + - a firehose::info represents additional kinds of information generated by + an analyzer that isn't a problem per-se e.g. code metrics, licensing info, + etc. + - a firehose::failure represents a report about a failure of the analyzer + itself (e.g. if the analyzer crashed). */ + +struct result +{ + enum kind + { + FIREHOSE_ISSUE, + FIREHOSE_INFO, + FIREHOSE_FAILURE + }; + + result (); + virtual ~result (); + + static result *from_json (const json::value *jv, char *&out_err); + + virtual enum kind get_kind () const = 0; + + char *m_message; + location_t m_location; +}; + +/* An issue represents a report from an analyzer about a possible problem + with the software under test. */ + +struct issue : public result +{ + issue (); + ~issue (); + + static issue *from_json (const json::value *jv, char *&out_err); + enum kind get_kind () const FINAL OVERRIDE { return FIREHOSE_ISSUE; } + + char *m_testid; + trace *m_trace; +}; + +/* An info represents additional kinds of information generated by an analyzer + that isn't a problem per-se e.g. code metrics, licensing info, + cross-referencing information, etc. */ + +struct info : public result +{ + info (); + ~info (); + + static info *from_json (const json::value *jv, char *&out_err); + enum kind get_kind () const FINAL OVERRIDE { return FIREHOSE_INFO; } + + char *m_infoid; +}; + +/* A failure represents a report about a failure of the analyzer itself + (e.g. if the analyzer crashed). + + If any of these are present then we don't have full coverage. + + For some analyzers this is an all-or-nothing affair: we either get + issues reported, or a failure happens (e.g. a segfault of the + analysis tool). + + Other analyzers may be more fine-grained: able to report some + issues, but choke on some subset of the code under analysis. + For example cpychecker runs once per function, and any unhandled + Python exceptions only affect one function. */ + +struct failure : public result +{ + failure (); + ~failure (); + + static failure *from_json (const json::value *jv, char *&out_err); + enum kind get_kind () const FINAL OVERRIDE { return FIREHOSE_FAILURE; } + + char *m_failureid; +}; + +/* A class describing a static analyzer, for use within firehose::metadata. */ + +struct generator +{ + generator (); + ~generator (); + + bool from_json (const json::value *jv, char *&out_err); + + char *m_name; + char *m_version; +}; + +/* The firehose::metadata class contains metadata about a static analyzer + invocation. */ + +struct metadata +{ + bool from_json (const json::value *jv, char *&out_err); + + generator m_generator; +}; + +/* The firehose::analysis class represents one invocation of a code analysis + tool. */ + +struct analysis +{ + ~analysis (); + + bool from_json (const json::value *jv, char *&out_err); + + metadata m_metadata; + auto_vec m_results; + //custom_fields *m_custom_fields; +}; + +} // namespace firehose + +#endif /* GCC_FIREHOSE_H */ diff --git a/gcc/input.c b/gcc/input.c index 8071810..28db43e 100644 --- a/gcc/input.c +++ b/gcc/input.c @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see #include "intl.h" #include "diagnostic-core.h" #include "selftest.h" +#include "selftest-input.h" #include "cpplib.h" #ifndef HAVE_ICONV @@ -1579,21 +1580,23 @@ test_should_have_column_data_p () } /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN - on LOC. */ + on LOC. Use REPORT_LOC as the effective location when reporting + any issues. */ -static void -assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum, +void +assert_loceq (const location &report_loc, + const char *exp_filename, int exp_linenum, int exp_colnum, location_t loc) { - ASSERT_STREQ (exp_filename, LOCATION_FILE (loc)); - ASSERT_EQ (exp_linenum, LOCATION_LINE (loc)); + ASSERT_STREQ_AT (report_loc, exp_filename, LOCATION_FILE (loc)); + ASSERT_EQ_AT (report_loc, exp_linenum, LOCATION_LINE (loc)); /* If location_t values are sufficiently high, then column numbers will be unavailable and LOCATION_COLUMN (loc) will be 0. When close to the threshold, column numbers *may* be present: if the final linemap before the threshold contains a line that straddles the threshold, locations in that line have column information. */ if (should_have_column_data_p (loc)) - ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc)); + ASSERT_EQ_AT (report_loc, exp_colnum, LOCATION_COLUMN (loc)); } /* Various selftests involve constructing a line table and one or more @@ -1727,23 +1730,23 @@ test_accessing_ordinary_linemaps (const line_table_case &case_) linemap_add (line_table, LC_LEAVE, false, NULL, 0); /* Verify that we can recover the location info. */ - assert_loceq ("foo.c", 1, 1, loc_a); - assert_loceq ("foo.c", 1, 23, loc_b); - assert_loceq ("foo.c", 2, 1, loc_c); - assert_loceq ("foo.c", 2, 17, loc_d); - assert_loceq ("foo.c", 3, 700, loc_e); - assert_loceq ("foo.c", 4, 100, loc_back_to_short); + ASSERT_LOCEQ ("foo.c", 1, 1, loc_a); + ASSERT_LOCEQ ("foo.c", 1, 23, loc_b); + ASSERT_LOCEQ ("foo.c", 2, 1, loc_c); + ASSERT_LOCEQ ("foo.c", 2, 17, loc_d); + ASSERT_LOCEQ ("foo.c", 3, 700, loc_e); + ASSERT_LOCEQ ("foo.c", 4, 100, loc_back_to_short); /* In the very wide line, the initial location should be fully tracked. */ - assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line); + ASSERT_LOCEQ ("foo.c", 5, 2000, loc_start_of_very_long_line); /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should be disabled. */ - assert_loceq ("foo.c", 5, 0, loc_too_wide); - assert_loceq ("foo.c", 5, 0, loc_too_wide_2); + ASSERT_LOCEQ ("foo.c", 5, 0, loc_too_wide); + ASSERT_LOCEQ ("foo.c", 5, 0, loc_too_wide_2); /*...and column-tracking should be re-enabled for subsequent lines. */ - assert_loceq ("foo.c", 6, 10, loc_sane_again); + ASSERT_LOCEQ ("foo.c", 6, 10, loc_sane_again); - assert_loceq ("bar.c", 1, 150, loc_f); + ASSERT_LOCEQ ("bar.c", 1, 150, loc_f); ASSERT_FALSE (is_location_from_builtin_token (loc_a)); ASSERT_TRUE (pure_location_p (line_table, loc_a)); @@ -1773,7 +1776,7 @@ test_unknown_location () static void test_builtins () { - assert_loceq (_(""), 0, 0, BUILTINS_LOCATION); + ASSERT_LOCEQ (_(""), 0, 0, BUILTINS_LOCATION); ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION); } diff --git a/gcc/json.c b/gcc/json.c new file mode 100644 index 0000000..c2dc565 --- /dev/null +++ b/gcc/json.c @@ -0,0 +1,2007 @@ +/* JSON parsing + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include // for selftests +#include "json.h" +#include "pretty-print.h" +#include "math.h" +#include "selftest.h" + +using namespace json; + +/* class json::value. */ + +/* Generate a char * for this json::value tree. + The returned value must be freed by the caller. */ + +char * +value::to_str () const +{ + pretty_printer pp; + print (&pp); + return xstrdup (pp_formatted_text (&pp)); +} + +/* Dump this json::value tree to OUTF. + No formatting is done. There are no guarantees about the order + in which the key/value pairs of json::objects are printed. */ + +void +value::dump (FILE *outf) const +{ + pretty_printer pp; + pp_buffer (&pp)->stream = outf; + print (&pp); + pp_flush (&pp); +} + +/* If this json::value is a json::object, return it, + otherwise return NULL. */ + +const object * +value::as_object () const +{ + if (get_kind () != JSON_OBJECT) + return NULL; + return static_cast (this); +} + +/* If this json::value is a json::array, return it, + otherwise return NULL. */ + +const array * +value::as_array () const +{ + if (get_kind () != JSON_ARRAY) + return NULL; + return static_cast (this); +} + +/* If this json::value is a json::number, return it, + otherwise return NULL. */ + +const number * +value::as_number () const +{ + if (get_kind () != JSON_NUMBER) + return NULL; + return static_cast (this); +} + +/* If this json::value is a json::string, return it, + otherwise return NULL. */ + +const string * +value::as_string () const +{ + if (get_kind () != JSON_STRING) + return NULL; + return static_cast (this); +} + +/* Attempt to get the value of a key/value pair from this value + as if THIS value were an object. + + If THIS is not a json::object, return write an error message to OUT_ERR + (which must be freed by the caller) and return false. + + Otherwise write the value ptr (possibly NULL) to OUT_VALUE and + return true. */ + +bool +value::get_optional_value_by_key (const char *name, const value *&out_value, + char *&out_err) const +{ + const json::object *obj = as_object (); + if (!obj) + { + out_err = xstrdup ("not an object"); + return false; + } + out_value = obj->get (name); + return true; +} + +/* Attempt to get a string value of a key/value pair from this value + as if THIS value were an object. + + If THIS is a json::object, and KEY is either not present, is a string, + or is the "null" JSON literal, then return true, and write to OUT_VALUE. + If a string, then the ptr is written to OUT_VALUE, otherwise NULL + is written to OUT_VALUE. + + If THIS is not a json::object, or KEY is not a string/"null", + return false and write an error message to OUT_ERR + (which must be freed by the caller). */ + +bool +value::get_optional_string_by_key (const char *name, const char *&out_value, + char *&out_err) const +{ + const json::value *v; + if (!get_optional_value_by_key (name, v, out_err)) + return false; + if (v && v->get_kind () != JSON_NULL) + { + const json::string *s = v->as_string (); + if (!s) + { + out_err = xasprintf ("not a string: \"%s\"", name); + return false; + } + out_value = s->get_string (); + return true; + } + else + { + out_value = NULL; + return true; + } +} + +/* Attempt to get lookup the value of a key/value pair from this value + as if this value were an object. + + To succeed, THIS must be a json::object, and it must have a key named + NAME. + + On success, return true and write the value to OUT_VALUE. + On failure, return false and write an error message to OUT_ERR + (which must be freed by the caller). */ + +bool +value::get_value_by_key (const char *name, const value *&out_value, + char *&out_err) const +{ + const json::object *obj = as_object (); + if (!obj) + { + out_err = xstrdup ("not an object"); + return false; + } + const json::value *v = obj->get (name); + if (!v) + { + out_err = xasprintf ("missing attribute: \"%s\"", name); + return false; + } + out_value = v; + return true; +} + +/* As value::get_value_by_key, but the value must be a number; + if successful, write it as an int to OUT_VALUE. */ + +bool +value::get_int_by_key (const char *name, int &out_value, char *&out_err) const +{ + const json::value *v; + if (!get_value_by_key (name, v, out_err)) + return false; + const json::number *n = v->as_number (); + if (!n) + { + out_err = xasprintf ("not a number: \"%s\"", name); + return false; + } + out_value = n->get (); + return true; +} + +/* As value::get_value_by_key, but the value must be a string; + if successful, write it as const char * to OUT_VALUE. */ + +bool +value::get_string_by_key (const char *name, const char *&out_value, + char *&out_err) const +{ + const json::value *v; + if (!get_value_by_key (name, v, out_err)) + return false; + const json::string *s = v->as_string (); + if (!s) + { + out_err = xasprintf ("not a string: \"%s\"", name); + return false; + } + out_value = s->get_string (); + return true; +} + +/* As value::get_value_by_key, but the value must be an array; + if successful, write it as a json::array * to OUT_VALUE. */ + +bool +value::get_array_by_key (const char *name, const array *&out_value, + char *&out_err) const +{ + const json::value *v; + if (!get_value_by_key (name, v, out_err)) + return false; + const json::array *arr = v->as_array (); + if (!arr) + { + out_err = xasprintf ("not an array: \"%s\"", name); + return false; + } + out_value = arr; + return true; +} + +/* class json::object, a subclass of json::value, representing + an unordered collection of key/value pairs. */ + +/* json:object's dtor. */ + +object::~object () +{ + for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it) + { + free (const_cast ((*it).first)); + delete ((*it).second); + } +} + +/* Implementation of json::value::print for json::object. */ + +void +object::print (pretty_printer *pp) const +{ + /* Note that the order is not guaranteed. */ + pp_character (pp, '{'); + for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it) + { + if (it != m_map.begin ()) + pp_string (pp, ", "); + const char *key = const_cast ((*it).first); + value *value = (*it).second; + pp_printf (pp, "\"%s\": ", key); // FIXME: escaping? + value->print (pp); + } + pp_character (pp, '}'); +} + +/* Implementation of json::value::clone for json::object. */ + +value * +object::clone () const +{ + object *other = new object (); + for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it) + { + const char *key = const_cast ((*it).first); + value *value = (*it).second; + other->set (key, value->clone ()); + } + return other; +} + +/* Get the json::value * for KEY, or NULL if the key is not present. */ + +value * +object::get (const char *key) const +{ + value **slot = const_cast (this)->m_map.get (key); + if (slot) + return *slot; + return NULL; +} + +/* As object::get (KEY), but return NULL if the value of the key + is the "null" JSON literal. */ + +value * +object::get_if_nonnull (const char *key) const +{ + value *result = get (key); + if (!result) + return NULL; + if (result->get_kind () == JSON_NULL) + return NULL; + return result; +} + +/* Set the json::value * for KEY, taking ownership of VALUE + (and taking a copy of KEY if necessary). */ + +void +object::set (const char *key, value *v) +{ + value **ptr = m_map.get (key); + if (ptr) + { + /* If the key is already present, delete the existing value + and overwrite it. */ + delete *ptr; + *ptr = v; + } + else + /* If the key wasn't already present, take a copy of the key, + and store the value. */ + m_map.put (xstrdup (key), v); +} + +/* class json::array, a subclass of json::value, representing + an ordered collection of values. */ + +/* json::array's dtor. */ + +array::~array () +{ + unsigned i; + value *v; + FOR_EACH_VEC_ELT (m_elements, i, v) + delete v; +} + +/* Implementation of json::value::print for json::array. */ + +void +array::print (pretty_printer *pp) const +{ + pp_character (pp, '['); + unsigned i; + value *v; + FOR_EACH_VEC_ELT (m_elements, i, v) + { + if (i) + pp_string (pp, ", "); + v->print (pp); + } + pp_character (pp, ']'); +} + +/* Implementation of json::value::clone for json::array. */ + +value * +array::clone () const +{ + array *other = new array (); + unsigned i; + value *v; + FOR_EACH_VEC_ELT (m_elements, i, v) + other->append (v->clone ()); + return other; +} + +/* class json::number, a subclass of json::value, wrapping a double. */ + +/* Implementation of json::value::print for json::number. */ + +void +number::print (pretty_printer *pp) const +{ + char tmp[1024]; + snprintf (tmp, sizeof (tmp), "%g", m_value); + pp_string (pp, tmp); +} + +/* Implementation of json::value::clone for json::number. */ + +value * +number::clone () const +{ + return new number (m_value); +} + +/* class json::string, a subclass of json::value. */ + +void +string::print (pretty_printer *pp) const +{ + pp_character (pp, '"'); + for (const char *ptr = m_utf8; *ptr; ptr++) + { + char ch = *ptr; + switch (ch) + { + case '"': + pp_string (pp, "\\\""); + break; + case '\\': + pp_string (pp, "\\n"); + break; + case '\b': + pp_string (pp, "\\b"); + break; + case '\f': + pp_string (pp, "\\f"); + break; + case '\n': + pp_string (pp, "\\n"); + break; + case '\r': + pp_string (pp, "\\r"); + break; + case '\t': + pp_string (pp, "\\t"); + break; + + default: + pp_character (pp, ch); + } + } + pp_character (pp, '"'); +} + +/* Implementation of json::value::clone for json::string. */ + +value * +string::clone () const +{ + return new string (m_utf8); +} + +/* class json::literal, a subclass of json::value. */ + +/* Implementation of json::value::print for json::literal. */ + +void +literal::print (pretty_printer *pp) const +{ + switch (m_kind) + { + case JSON_TRUE: + pp_string (pp, "true"); + break; + case JSON_FALSE: + pp_string (pp, "false"); + break; + case JSON_NULL: + pp_string (pp, "null"); + break; + default: + gcc_unreachable (); + } +} + +/* Implementation of json::value::clone for json::literal. */ + +value * +literal::clone () const +{ + return new literal (m_kind); +} + + +/* Declarations relating to parsing JSON, all within an + anonymous namespace. */ + +namespace { + +/* A typedef representing a single unicode character. */ + +typedef unsigned unichar; + +/* An enum for discriminating different kinds of JSON token. */ + +enum token_id +{ + TOK_ERROR, + + TOK_EOF, + + /* Punctuation. */ + TOK_OPEN_SQUARE, + TOK_OPEN_CURLY, + TOK_CLOSE_SQUARE, + TOK_CLOSE_CURLY, + TOK_COLON, + TOK_COMMA, + + /* Literal names. */ + TOK_TRUE, + TOK_FALSE, + TOK_NULL, + + TOK_STRING, + TOK_NUMBER +}; + +/* Human-readable descriptions of enum token_id. */ + +static const char *token_id_name[] = { + "error", + "EOF", + "'['", + "'{'", + "']'", + "'}'", + "':'", + "','", + "'true'", + "'false'", + "'null'", + "string", + "number" +}; + +/* Tokens within the JSON lexer. */ + +struct token +{ + /* The kind of token. */ + enum token_id id; + + /* The location of this token within the unicode + character stream. */ + int index; + + union + { + /* Value for TOK_ERROR and TOK_STRING. */ + char *string; + + /* Value for TOK_NUMBER. */ + double number; + } u; +}; + +/* A class for lexing JSON. */ + +class lexer +{ + public: + lexer (); + ~lexer (); + bool add_utf8 (size_t length, const char *utf8_buf, char **err_out); + + const token *peek (); + void consume (); + + private: + bool get_char (unichar &out); + void unget_char (); + static void dump_token (FILE *outf, const token *tok); + void lex_token (token *out); + void lex_string (token *out); + void lex_number (token *out, unichar first_char); + bool rest_of_literal (const char *suffix); + + private: + auto_vec m_buffer; + int m_next_char_idx; + + static const int MAX_TOKENS = 1; + token m_next_tokens[MAX_TOKENS]; + int m_num_next_tokens; +}; + +/* A class for parsing JSON. */ + +class parser +{ + public: + parser (char **err_out); + bool add_utf8 (size_t length, const char *utf8_buf, char **err_out); + value *parse_value (int depth); + object *parse_object (int depth); + array *parse_array (int depth); + + bool seen_error_p () const { return *m_err_out; } + void require_eof (); + + private: + void require (enum token_id tok_id); + void error_at (int, const char *, ...); + + private: + lexer m_lexer; + char **m_err_out; +}; + +} // anonymous namespace for parsing implementation + +/* Parser implementation. */ + +/* lexer's ctor. */ + +lexer::lexer () +: m_buffer (), m_next_char_idx (0), m_num_next_tokens (0) +{ +} + +/* lexer's dtor. */ + +lexer::~lexer () +{ + while (m_num_next_tokens > 0) + consume (); +} + +/* Peek the next token. */ + +const token * +lexer::peek () +{ + if (m_num_next_tokens == 0) + { + lex_token (&m_next_tokens[0]); + m_num_next_tokens++; + } + return &m_next_tokens[0]; +} + +/* Consume the next token. */ + +void +lexer::consume () +{ + if (m_num_next_tokens == 0) + peek (); + + gcc_assert (m_num_next_tokens > 0); + gcc_assert (m_num_next_tokens <= MAX_TOKENS); + + if (0) + { + fprintf (stderr, "consuming token: "); + dump_token (stderr, &m_next_tokens[0]); + fprintf (stderr, "\n"); + } + + if (m_next_tokens[0].id == TOK_ERROR + || m_next_tokens[0].id == TOK_STRING) + free (m_next_tokens[0].u.string); + + m_num_next_tokens--; + memmove (&m_next_tokens[0], &m_next_tokens[1], + sizeof (token) * m_num_next_tokens); +} + +/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this lexer's + buffer. */ + +bool +lexer::add_utf8 (size_t length, const char *utf8_buf, char **err_out) +{ + /* FIXME: adapted from charset.c:one_utf8_to_cppchar. */ + static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 }; + static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + + const uchar *inbuf = (const unsigned char *) (utf8_buf); + const uchar **inbufp = &inbuf; + size_t *inbytesleftp = &length; + + while (length > 0) + { + unichar c; + const uchar *inbuf = *inbufp; + size_t nbytes, i; + + c = *inbuf; + if (c < 0x80) + { + m_buffer.safe_push (c); + *inbytesleftp -= 1; + *inbufp += 1; + continue; + } + + /* The number of leading 1-bits in the first byte indicates how many + bytes follow. */ + for (nbytes = 2; nbytes < 7; nbytes++) + if ((c & ~masks[nbytes-1]) == patns[nbytes-1]) + goto found; + *err_out = xstrdup ("ill-formed UTF-8 sequence"); + return false; + found: + + if (*inbytesleftp < nbytes) + { + *err_out = xstrdup ("ill-formed UTF-8 sequence"); + return false; + } + + c = (c & masks[nbytes-1]); + inbuf++; + for (i = 1; i < nbytes; i++) + { + unichar n = *inbuf++; + if ((n & 0xC0) != 0x80) + { + *err_out = xstrdup ("ill-formed UTF-8 sequence"); + return false; + } + c = ((c << 6) + (n & 0x3F)); + } + + /* Make sure the shortest possible encoding was used. */ + if (( c <= 0x7F && nbytes > 1) + || (c <= 0x7FF && nbytes > 2) + || (c <= 0xFFFF && nbytes > 3) + || (c <= 0x1FFFFF && nbytes > 4) + || (c <= 0x3FFFFFF && nbytes > 5)) + { + *err_out = xstrdup ("ill-formed UTF-8:" + " shortest possible encoding not used"); + return false; + } + + /* Make sure the character is valid. */ + if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF)) + { + *err_out = xstrdup ("ill-formed UTF-8: invalid character"); + return false; + } + + m_buffer.safe_push (c); + *inbufp = inbuf; + *inbytesleftp -= nbytes; + } + return true; +} + +/* Attempt to get the next unicode character from this lexer's buffer. + If successful, write it to OUT and return true. + Otherwise, return false. */ + +bool +lexer::get_char (unichar &out) +{ + if (m_next_char_idx >= (int)m_buffer.length ()) + return false; + + out = m_buffer[m_next_char_idx++]; + return true; +} + +/* FIXME. */ + +void +lexer::unget_char () +{ + --m_next_char_idx; +} + +/* Print a textual representation of TOK to OUTF. + This is intended for debugging the lexer and parser, + rather than for user-facing output. */ + +void +lexer::dump_token (FILE *outf, const token *tok) +{ + switch (tok->id) + { + case TOK_ERROR: + fprintf (outf, "TOK_ERROR (\"%s\")", tok->u.string); + break; + + case TOK_EOF: + fprintf (outf, "TOK_EOF"); + break; + + case TOK_OPEN_SQUARE: + fprintf (outf, "TOK_OPEN_SQUARE"); + break; + + case TOK_OPEN_CURLY: + fprintf (outf, "TOK_OPEN_CURLY"); + break; + + case TOK_CLOSE_SQUARE: + fprintf (outf, "TOK_CLOSE_SQUARE"); + break; + + case TOK_CLOSE_CURLY: + fprintf (outf, "TOK_CLOSE_CURLY"); + break; + + case TOK_COLON: + fprintf (outf, "TOK_COLON"); + break; + + case TOK_COMMA: + fprintf (outf, "TOK_COMMA"); + break; + + case TOK_TRUE: + fprintf (outf, "TOK_TRUE"); + break; + + case TOK_FALSE: + fprintf (outf, "TOK_FALSE"); + break; + + case TOK_NULL: + fprintf (outf, "TOK_NULL"); + break; + + case TOK_STRING: + fprintf (outf, "TOK_STRING (\"%s\")", tok->u.string); + break; + + case TOK_NUMBER: + fprintf (outf, "TOK_NUMBER (%f)", tok->u.number); + break; + + default: + gcc_unreachable (); + break; + } +} + +/* Attempt to lex the input buffer, writing the next token to OUT. + On errors, TOK_ERROR (or TOK_EOF) is written to OUT. */ + +void +lexer::lex_token (token *out) +{ + /* Skip to next non-whitespace char. */ + unichar next_char; + while (1) + { + out->index = m_next_char_idx; + if (!get_char (next_char)) + { + out->id = TOK_EOF; + return; + } + if (next_char != ' ' + && next_char != '\t' + && next_char != '\n' + && next_char != '\r') + break; + } + + switch (next_char) + { + case '[': + out->id = TOK_OPEN_SQUARE; + break; + + case '{': + out->id = TOK_OPEN_CURLY; + break; + + case ']': + out->id = TOK_CLOSE_SQUARE; + break; + + case '}': + out->id = TOK_CLOSE_CURLY; + break; + + case ':': + out->id = TOK_COLON; + break; + + case ',': + out->id = TOK_COMMA; + break; + + case '"': + lex_string (out); + break; + + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + lex_number (out, next_char); + break; + + case 't': + /* Handle literal "true". */ + if (rest_of_literal ("rue")) + { + out->id = TOK_TRUE; + break; + } + else + goto err; + + case 'f': + /* Handle literal "false". */ + if (rest_of_literal ("alse")) + { + out->id = TOK_FALSE; + break; + } + else + goto err; + + case 'n': + /* Handle literal "null". */ + if (rest_of_literal ("ull")) + { + out->id = TOK_NULL; + break; + } + else + goto err; + + err: + default: + out->id = TOK_ERROR; + out->u.string = xasprintf ("unexpected character: %c", next_char); + break; + } +} + +/* Having consumed an open-quote character from the lexer's buffer, attempt + to lex the rest of a JSON string, writing the result to OUT (or TOK_ERROR) + if an error occurred. + (ECMA-404 section 9; RFC 7159 section 7). */ + +void +lexer::lex_string (token *out) +{ + auto_vec content; + bool still_going = true; + while (still_going) + { + unichar uc; + if (!get_char (uc)) + { + out->id = TOK_ERROR; + out->u.string = xstrdup ("EOF within string"); + return; + } + switch (uc) + { + case '"': + still_going = false; + break; + case '\\': + { + unichar next_char; + if (!get_char (next_char)) + { + out->id = TOK_ERROR; + out->u.string = xstrdup ("EOF within string");; + return; + } + switch (next_char) + { + case '"': + case '\\': + case '/': + content.safe_push (next_char); + break; + + case 'b': + content.safe_push ('\b'); + break; + + case 'f': + content.safe_push ('\f'); + break; + + case 'n': + content.safe_push ('\n'); + break; + + case 'r': + content.safe_push ('\r'); + break; + + case 't': + content.safe_push ('\t'); + break; + + case 'u': + { + unichar result = 0; + for (int i = 0; i < 4; i++) + { + unichar hexdigit; + if (!get_char (hexdigit)) + { + out->id = TOK_ERROR; + out->u.string = xstrdup ("EOF within string"); + return; + } + result <<= 4; + if (hexdigit >= '0' && hexdigit <= '9') + result += hexdigit - '0'; + else if (hexdigit >= 'a' && hexdigit <= 'f') + result += (hexdigit - 'a') + 10; + else if (hexdigit >= 'A' && hexdigit <= 'F') + result += (hexdigit - 'A') + 10; + else + { + out->id = TOK_ERROR; + out->u.string = xstrdup ("bogus hex char"); + return; + } + } + content.safe_push (result); + } + break; + + // FIXME: implement other chars + + default: + out->id = TOK_ERROR; + out->u.string = xstrdup ("unrecognized escape char"); + return; + } + } + break; + + default: + /* Reject unescaped control characters U+0000 through U+001F + (ECMA-404 section 9 para 1; RFC 7159 section 7 para 1). */ + if (uc <= 0x1f) + { + out->id = TOK_ERROR; + out->u.string = xstrdup ("unescaped control char"); + return; + } + + /* Otherwise, add regular unicode code point. */ + content.safe_push (uc); + break; + } + } + + out->id = TOK_STRING; + + auto_vec utf8_buf; + // FIXME: adapted from libcpp/charset.c:one_cppchar_to_utf8 + for (unsigned i = 0; i < content.length (); i++) + { + static const uchar masks[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE }; + size_t nbytes; + uchar buf[6], *p = &buf[6]; + unichar c = content[i]; + + nbytes = 1; + if (c < 0x80) + *--p = c; + else + { + do + { + *--p = ((c & 0x3F) | 0x80); + c >>= 6; + nbytes++; + } + while (c >= 0x3F || (c & limits[nbytes-1])); + *--p = (c | masks[nbytes-1]); + } + + while (p < &buf[6]) + utf8_buf.safe_push (*p++); + } + + out->u.string = XNEWVEC (char, utf8_buf.length () + 1); + for (unsigned i = 0; i < utf8_buf.length (); i++) + out->u.string[i] = utf8_buf[i]; + out->u.string[utf8_buf.length ()] = '\0'; + + // FIXME: leaks? have a json_context do the allocation +} + +/* Having consumed FIRST_CHAR, an initial digit or '-' character from + the lexer's buffer attempt to lex the rest of a JSON number, writing + the result to OUT (or TOK_ERROR) if an error occurred. + (ECMA-404 section 8; RFC 7159 section 6). */ + +void +lexer::lex_number (token *out, unichar first_char) +{ + bool negate = false; + double value = 0.0; + if (first_char == '-') + { + negate = true; + if (!get_char (first_char)) + { + out->id = TOK_ERROR; + out->u.string = xstrdup ("expected digit"); + return; + } + } + + if (first_char == '0') + value = 0.0; + else if (!ISDIGIT (first_char)) + { + out->id = TOK_ERROR; + out->u.string = xstrdup ("expected digit"); + return; + } + else + { + /* Got a nonzero digit; expect zero or more digits. */ + value = first_char - '0'; + while (1) + { + unichar uc; + if (!get_char (uc)) + break; + if (ISDIGIT (uc)) + { + value *= 10; + value += uc -'0'; + continue; + } + else + { + unget_char (); + break; + } + } + } + + /* Optional '.', followed by one or more decimals. */ + unichar next_char; + if (get_char (next_char)) + { + if (next_char == '.') + { + /* Parse decimal digits. */ + bool had_digit = false; + // FIXME: does this lose too much precision? + double digit_factor = 0.1; + while (get_char (next_char)) + { + if (!ISDIGIT (next_char)) + { + unget_char (); + break; + } + value += (next_char - '0') * digit_factor; + digit_factor *= 0.1; + had_digit = true; + } + if (!had_digit) + { + out->id = TOK_ERROR; + out->u.string = xstrdup ("expected digit"); + return; + } + } + else + unget_char (); + } + + /* Parse 'e' and 'E'. */ + unichar exponent_char; + if (get_char (exponent_char)) + { + if (exponent_char == 'e' || exponent_char == 'E') + { + /* Optional +/-. */ + unichar sign_char; + int exponent = 0; + bool negate_exponent = false; + bool had_exponent_digit = false; + if (!get_char (sign_char)) + { + out->id = TOK_ERROR; + out->u.string = xstrdup ("EOF within exponent"); + return; + } + if (sign_char == '-') + negate_exponent = true; + else if (sign_char == '+') + ; + else if (ISDIGIT (sign_char)) + { + exponent = sign_char - '0'; + had_exponent_digit = true; + } + else + { + out->id = TOK_ERROR; + out->u.string + = xstrdup ("expected '-','+' or digit within exponent"); + return; + } + + /* One or more digits (we might have seen the digit above, + though). */ + while (1) + { + unichar uc; + if (!get_char (uc)) + break; + if (ISDIGIT (uc)) + { + exponent *= 10; + exponent += uc -'0'; + had_exponent_digit = true; + continue; + } + else + { + unget_char (); + break; + } + } + if (!had_exponent_digit) + { + out->id = TOK_ERROR; + out->u.string = xstrdup ("expected digit within exponent"); + return; + } + if (negate_exponent) + exponent = -exponent; + /* FIXME: better way to do this? */ + value = value * pow (10, exponent); + } + else + unget_char (); + } + + if (negate) + value = -value; + + out->id = TOK_NUMBER; + out->u.number = value; +} + +/* Determine if the next characters to be lexed match SUFFIX. + SUFFIX must be pure ASCII. + If so, consume the characters and return true. + Otherwise, return false. */ + +bool +lexer::rest_of_literal (const char *suffix) +{ + int suffix_idx = 0; + int buf_idx = m_next_char_idx; + while (1) + { + if (suffix[suffix_idx] == '\0') + { + m_next_char_idx += suffix_idx; + return true; + } + if (buf_idx >= (int)m_buffer.length ()) + return false; + /* This assumes that suffix is ASCII. */ + if (m_buffer[buf_idx] != (unichar)suffix[suffix_idx]) + return false; + buf_idx++; + suffix_idx++; + } +} + +/* parser's ctor. */ + +parser::parser (char **err_out) +: m_lexer (), m_err_out (err_out) +{ + gcc_assert (err_out); + gcc_assert (*err_out == NULL); + *err_out = NULL; +} + +/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this parser's + lexer's buffer. */ + +bool +parser::add_utf8 (size_t length, const char *utf8_buf, char **err_out) +{ + return m_lexer.add_utf8 (length, utf8_buf, err_out); +} + +/* Parse a JSON value (object, array, number, string, or literal). + (ECMA-404 section 5; RFC 7159 section 3). */ + +value * +parser::parse_value (int depth) +{ + const token *tok = m_lexer.peek (); + + /* Avoid stack overflow with deeply-nested inputs; RFC 7159 section 9 + states: "An implementation may set limits on the maximum depth + of nesting.". + + Ideally we'd avoid this limit (e.g. by rewriting parse_value, + parse_object, and parse_array into a single function with a vec of + state). */ + const int MAX_DEPTH = 100; + if (depth >= MAX_DEPTH) + { + error_at (tok->index, "maximum nesting depth exceeded: %i", MAX_DEPTH); + return NULL; + } + + switch (tok->id) + { + case TOK_OPEN_CURLY: + return parse_object (depth); + + case TOK_STRING: + { + string *result = new string (tok->u.string); + m_lexer.consume (); + return result; + } + + case TOK_OPEN_SQUARE: + return parse_array (depth); + + case TOK_NUMBER: + { + number *result = new number (tok->u.number); + m_lexer.consume (); + return result; + } + + case TOK_TRUE: + { + literal *result = new literal (JSON_TRUE); + m_lexer.consume (); + return result; + } + + case TOK_FALSE: + { + literal *result = new literal (JSON_FALSE); + m_lexer.consume (); + return result; + } + + case TOK_NULL: + { + literal *result = new literal (JSON_NULL); + m_lexer.consume (); + return result; + } + + default: + error_at (tok->index, "unexpected token: %s", token_id_name[tok->id]); + return NULL; + } +} + +/* Parse a JSON object. + (ECMA-404 section 6; RFC 7159 section 4). */ + +object * +parser::parse_object (int depth) +{ + require (TOK_OPEN_CURLY); + + object *result = new object (); + + const token *tok = m_lexer.peek (); + if (tok->id == TOK_CLOSE_CURLY) + { + require (TOK_CLOSE_CURLY); + return result; + } + if (tok->id != TOK_STRING) + { + error_at (tok->index, "expected string for object key"); + return result; + } + while (!seen_error_p ()) + { + tok = m_lexer.peek (); + if (tok->id != TOK_STRING) + { + error_at (tok->index, "expected string for object key"); + return result; + } + char *key = xstrdup (tok->u.string); + m_lexer.consume (); + + require (TOK_COLON); + + value *v = parse_value (depth + 1); + if (!v) + { + free (key); + return result; + } + /* We don't enforce uniqueness for keys. */ + result->set (key, v); + free (key); + + tok = m_lexer.peek (); + if (tok->id == TOK_COMMA) + { + m_lexer.consume (); + continue; + } + else + { + require (TOK_CLOSE_CURLY); + break; + } + } + return result; +} + +/* Parse a JSON array. + (ECMA-404 section 7; RFC 7159 section 5). */ + +array * +parser::parse_array (int depth) +{ + require (TOK_OPEN_SQUARE); + + array *result = new array (); + + const token *tok = m_lexer.peek (); + if (tok->id == TOK_CLOSE_SQUARE) + { + m_lexer.consume (); + return result; + } + + while (!seen_error_p ()) + { + value *v = parse_value (depth + 1); + if (!v) + return result; + + result->append (v); + + tok = m_lexer.peek (); + if (tok->id == TOK_COMMA) + { + m_lexer.consume (); + continue; + } + else + { + require (TOK_CLOSE_SQUARE); + break; + } + } + + return result; +} + +/* Require an EOF, or fail if there is surplus input. */ + +void +parser::require_eof () +{ + require (TOK_EOF); +} + +/* Consume the next token, issuing an error if it is not of kind TOK_ID. */ + +void +parser::require (enum token_id tok_id) +{ + const token *tok = m_lexer.peek (); + if (tok->id != tok_id) + { + if (tok->id == TOK_ERROR) + error_at (tok->index, "expected %s; got bad token: %s", + token_id_name[tok_id], tok->u.string); + else + error_at (tok->index, "expected %s; got %s", token_id_name[tok_id], + token_id_name[tok->id]); + } + m_lexer.consume (); +} + +/* Issue a parsing error. If this is the first error that has occurred on + the parser, store it within the parser's m_err_out (the buffer will + eventually need to be free by the caller of the parser). + Otherwise the error is discarded. + + TODO: maybe provide a callback so that client code can print all errors? */ + +void +parser::error_at (int index, const char *fmt, ...) +{ + va_list ap; + va_start (ap, fmt); + char *formatted = xvasprintf (fmt, ap); + va_end (ap); + + char *msg_with_index = xasprintf ("error at index %i: %s", + index, formatted); + free (formatted); + + if (0) + fprintf (stderr, "%s\n", msg_with_index); + if (*m_err_out == NULL) + *m_err_out = msg_with_index; + else + free (msg_with_index); +} + +/* Attempt to parse the UTF-8 encoded buffer at UTF8_BUF + of the given LENGTH. + If successful, return a non-NULL json::value *. + if there was a problem, return NULL and write an error + message to err_out, which must be freed by the caller. */ + +value * +json::parse_utf8_string (size_t length, const char *utf8_buf, + char **err_out) +{ + gcc_assert (err_out); + gcc_assert (*err_out == NULL); + + parser p (err_out); + if (!p.add_utf8 (length, utf8_buf, err_out)) + return NULL; + value *result = p.parse_value (0); + if (!p.seen_error_p ()) + p.require_eof (); + if (p.seen_error_p ()) + { + gcc_assert (*err_out); + delete result; + return NULL; + } + return result; +} + +/* Attempt to parse the nil-terminated UTF-8 encoded buffer at + UTF8_BUF. + If successful, return a non-NULL json::value *. + if there was a problem, return NULL and write an error + message to err_out, which must be freed by the caller. */ + +value * +json::parse_utf8_string (const char *utf8, char **err_out) +{ + return parse_utf8_string (strlen (utf8), utf8, err_out); +} + + +#if CHECKING_P + +namespace selftest { + +/* Selftests. */ + +/* Verify that JV->to_str () equals EXPECTED_JSON. */ + +static void +assert_to_str_eq (const char *expected_json, json::value *jv) +{ + char *json = jv->to_str (); + ASSERT_STREQ (expected_json, json); + free (json); +} + +/* FIXME. */ + +static void +test_parse_string () +{ + char *err = NULL; + json::value *jv = parse_utf8_string ("\"foo\"", &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_STRING, jv->get_kind ()); + ASSERT_STREQ ("foo", ((json::string *)jv)->get_string ()); + assert_to_str_eq ("\"foo\"", jv); + + json::value *clone = jv->clone (); + ASSERT_EQ (JSON_STRING, clone->get_kind ()); + ASSERT_STREQ ("foo", ((json::string *)clone)->get_string ()); + assert_to_str_eq ("\"foo\"", clone); + delete clone; + delete jv; + + const char *contains_quotes = "\"before \\\"quoted\\\" after\""; + jv = parse_utf8_string (contains_quotes, &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_STRING, jv->get_kind ()); + ASSERT_STREQ ("before \"quoted\" after", ((json::string *)jv)->get_string ()); + assert_to_str_eq (contains_quotes, jv); + delete jv; + + /* Test of non-ASCII input. This string is the Japanese word "mojibake", + written as C octal-escaped UTF-8. */ + const char *mojibake = (/* Opening quote. */ + "\"" + /* U+6587 CJK UNIFIED IDEOGRAPH-6587 + UTF-8: 0xE6 0x96 0x87 + C octal escaped UTF-8: \346\226\207. */ + "\346\226\207" + /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57 + UTF-8: 0xE5 0xAD 0x97 + C octal escaped UTF-8: \345\255\227. */ + "\345\255\227" + /* U+5316 CJK UNIFIED IDEOGRAPH-5316 + UTF-8: 0xE5 0x8C 0x96 + C octal escaped UTF-8: \345\214\226. */ + "\345\214\226" + /* U+3051 HIRAGANA LETTER KE + UTF-8: 0xE3 0x81 0x91 + C octal escaped UTF-8: \343\201\221. */ + "\343\201\221" + /* Closing quote. */ + "\""); + jv = parse_utf8_string (mojibake, &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_STRING, jv->get_kind ()); + /* Result of get_string should be UTF-8 encoded, without quotes. */ + ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221", + ((json::string *)jv)->get_string ()); + /* Result of dump should be UTF-8 encoded, with quotes. */ + assert_to_str_eq (mojibake, jv); + delete jv; + + /* Test of \u-escaped unicode. This is "mojibake" again, as above. */ + const char *escaped_unicode = "\"\\u6587\\u5b57\\u5316\\u3051\""; + jv = parse_utf8_string (escaped_unicode, &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_STRING, jv->get_kind ()); + /* Result of get_string should be UTF-8 encoded, without quotes. */ + ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221", + ((json::string *)jv)->get_string ()); + /* Result of dump should be UTF-8 encoded, with quotes. */ + assert_to_str_eq (mojibake, jv); + delete jv; +} + +/* FIXME. */ + +static void +test_parse_number () +{ + json::value *jv, *clone; + + char *err = NULL; + jv = parse_utf8_string ("42", &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_NUMBER, jv->get_kind ()); + ASSERT_EQ (42.0, ((json::number *)jv)->get ()); + assert_to_str_eq ("42", jv); + clone = jv->clone (); + ASSERT_EQ (JSON_NUMBER, clone->get_kind ()); + delete clone; + delete jv; + + /* Negative number. */ + jv = parse_utf8_string ("-17", &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_NUMBER, jv->get_kind ()); + ASSERT_EQ (-17.0, ((json::number *)jv)->get ()); + assert_to_str_eq ("-17", jv); + delete jv; + + /* Decimal. */ + jv = parse_utf8_string ("3.141", &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_NUMBER, jv->get_kind ()); + ASSERT_EQ (3.141, ((json::number *)jv)->get ()); + assert_to_str_eq ("3.141", jv); + delete jv; + + /* Exponents. */ + jv = parse_utf8_string ("3.141e+0", &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_NUMBER, jv->get_kind ()); + ASSERT_EQ (3.141, ((json::number *)jv)->get ()); + assert_to_str_eq ("3.141", jv); + delete jv; + + jv = parse_utf8_string ("42e2", &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_NUMBER, jv->get_kind ()); + ASSERT_EQ (4200, ((json::number *)jv)->get ()); + assert_to_str_eq ("4200", jv); + delete jv; + + jv = parse_utf8_string ("42e-1", &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_NUMBER, jv->get_kind ()); + ASSERT_EQ (4.2, ((json::number *)jv)->get ()); + assert_to_str_eq ("4.2", jv); + delete jv; + +} + +/* FIXME. */ + +static void +test_parse_array () +{ + json::value *jv, *clone; + + char *err = NULL; + jv = parse_utf8_string ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_ARRAY, jv->get_kind ()); + json::array *arr = static_cast (jv); + ASSERT_EQ (10, arr->get_length ()); + for (int i = 0; i < 10; i++) + { + json::value *element = arr->get (i); + ASSERT_EQ (JSON_NUMBER, element->get_kind ()); + ASSERT_EQ (i, ((json::number *)element)->get ()); + } + assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", jv); + + clone = jv->clone (); + ASSERT_EQ (JSON_ARRAY, clone->get_kind ()); + arr = static_cast (clone); + ASSERT_EQ (10, arr->get_length ()); + for (int i = 0; i < 10; i++) + { + json::value *element = arr->get (i); + ASSERT_EQ (JSON_NUMBER, element->get_kind ()); + ASSERT_EQ (i, ((json::number *)element)->get ()); + } + assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", clone); + delete clone; + + delete jv; +} + +/* FIXME. */ + +static void +test_parse_object () +{ + char *err = NULL; + json::value *jv + = parse_utf8_string ("{\"foo\": \"bar\", \"baz\": [42, null]}", &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + ASSERT_EQ (JSON_OBJECT, jv->get_kind ()); + json::object *jo = static_cast (jv); + + json::value *foo_value = jo->get ("foo"); + ASSERT_TRUE (foo_value != NULL); + ASSERT_EQ (JSON_STRING, foo_value->get_kind ()); + ASSERT_STREQ ("bar", ((json::string *)foo_value)->get_string ()); + + json::value *baz_value = jo->get ("baz"); + ASSERT_TRUE (baz_value != NULL); + ASSERT_EQ (JSON_ARRAY, baz_value->get_kind ()); + + json::array *baz_array = (json::array *)baz_value; + ASSERT_EQ (2, baz_array->get_length ()); + ASSERT_EQ (42, baz_array->get (0)->as_number ()->get ()); + ASSERT_EQ (JSON_NULL, baz_array->get (1)->get_kind ()); + + // TODO: error-handling + // TODO: partial document + + /* We can't use assert_to_str_eq since ordering is not guaranteed. */ + + json::value *clone = jv->clone (); + ASSERT_EQ (JSON_OBJECT, clone->get_kind ()); + ASSERT_EQ (JSON_STRING, clone->as_object ()->get ("foo")->get_kind ()); + delete clone; + + delete jv; +} + +/* Verify that the literals "true", "false" and "null" are parsed, + dumped, and are clonable. */ + +static void +test_parse_literals () +{ + json::value *jv, *clone; + char *err = NULL; + jv = parse_utf8_string ("true", &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + ASSERT_EQ (JSON_TRUE, jv->get_kind ()); + assert_to_str_eq ("true", jv); + clone = jv->clone (); + ASSERT_EQ (JSON_TRUE, clone->get_kind ()); + delete clone; + delete jv; + + jv = parse_utf8_string ("false", &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + ASSERT_EQ (JSON_FALSE, jv->get_kind ()); + assert_to_str_eq ("false", jv); + clone = jv->clone (); + ASSERT_EQ (JSON_FALSE, clone->get_kind ()); + delete clone; + delete jv; + + jv = parse_utf8_string ("null", &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + ASSERT_EQ (JSON_NULL, jv->get_kind ()); + assert_to_str_eq ("null", jv); + clone = jv->clone (); + ASSERT_EQ (JSON_NULL, clone->get_kind ()); + delete clone; + delete jv; +} + +/* FIXME. */ + +static void +test_parse_jsonrpc () +{ + char *err = NULL; + const char *request + = ("{\"jsonrpc\": \"2.0\", \"method\": \"subtract\"," + " \"params\": [42, 23], \"id\": 1}"); + json::value *jv = parse_utf8_string (request, &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + delete jv; +} + +/* FIXME. */ + +static void +test_parse_empty_object () +{ + char *err = NULL; + json::value *jv = parse_utf8_string ("{}", &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + ASSERT_EQ (JSON_OBJECT, jv->get_kind ()); + assert_to_str_eq ("{}", jv); + delete jv; +} + +/* FIXME. */ + +static void +test_error_empty_string () +{ + char *err = NULL; + json::value *jv = parse_utf8_string ("", &err); + ASSERT_STREQ ("error at index 0: unexpected token: EOF", err); + ASSERT_TRUE (jv == NULL); + free (err); +} + +/* FIXME. */ + +static void +test_error_missing_comma () +{ + char *err = NULL; + /* 01234567. */ + const char *json = "[0, 1 2]"; + json::value *jv = parse_utf8_string (json, &err); + ASSERT_STREQ ("error at index 6: expected ']'; got number", + err); + // FIXME: unittest the lexer? + ASSERT_TRUE (jv == NULL); + free (err); +} + +/* FIXME. */ + +static int +is_json_file (const struct dirent *entry) +{ + // FIXME: should be a regular file, ending in ".json" + if (!strstr (entry->d_name, ".json")) + return 0; + return 1; +} + +/* FIXME. */ + +static void +test_file (const char *path, const char *filename, bool verbose) +{ + if (verbose) + fprintf (stderr, "%s\n", filename); + + /* This selftest routine can't yet cope with embedded NIL characters, + since it calls selftest::read_file and then parses up to the first + embedded NIL. Skip such tests for now. */ + if (0 == strcmp (filename, "n_multidigit_number_then_00.json")) + return; + + const char *full_path = ACONCAT ((path, "/", filename, NULL)); // FIXME + + char *content = read_file (SELFTEST_LOCATION, full_path); + if (verbose) + fprintf (stderr, " content: %s\n", content); + + char *err = NULL; + json::value *jv = parse_utf8_string (content, &err); + + if (verbose && err) + fprintf (stderr, " err: %s\n", err); + + if (filename[0] == 'y') + { + /* We expect filenames starting with "y_" to be successfully parsed. */ + ASSERT_NE (NULL, jv); + ASSERT_EQ (NULL, err); + + /* Verify that we can dump the value. */ + char *json = jv->to_str (); + free (json); + + delete jv; + } + else if (filename[0] == 'n') + { + /* We expect filenames starting with "n_" to be rejected, with an + error message. */ + ASSERT_EQ (NULL, jv); + ASSERT_NE (NULL, err); + } + else if (filename[0] == 'i') + { + /* Filenames that start with "i_" have implementation-defined + results; we may or may not cope with them. */ + delete jv; + } + free (err); + + free (content); +} + +/* FIXME. */ + +static void +test_seriot_testsuite (bool verbose) +{ + struct dirent **json_files; + const char *path = "../../JSONTestSuite/test_parsing"; // FIXME + int num_entries = scandir (path, &json_files, is_json_file, alphasort); + if (num_entries < 0) + { + perror ("scandir"); + fail (SELFTEST_LOCATION, "scandir failed"); + } + + for (int i = 0; i < num_entries; i++) + { + test_file (path, json_files[i]->d_name, verbose); + free (json_files[i]); + } + free (json_files); +} + +/* Run all of the selftests within this file. */ + +void +json_c_tests () +{ + test_parse_string (); + test_parse_number (); + test_parse_array (); + test_parse_object (); + test_parse_literals (); + test_parse_jsonrpc (); + test_parse_empty_object (); + test_error_empty_string (); + test_error_missing_comma (); + + /* FIXME: tests for roundtripping (noting that we don't preserve + object key ordering). */ + + /* FIXME: cloning. */ + test_seriot_testsuite (false); +} + +} // namespace selftest + +#endif /* #if CHECKING_P */ diff --git a/gcc/json.h b/gcc/json.h new file mode 100644 index 0000000..aedf84a --- /dev/null +++ b/gcc/json.h @@ -0,0 +1,214 @@ +/* JSON parsing + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_JSON_H +#define GCC_JSON_H + +/* Implementation of JSON, a lightweight data-interchange format. + + See http://www.json.org/ + and http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf + and https://tools.ietf.org/html/rfc7159 + + Supports parsing text into a DOM-like tree of json::value *, dumping + json::value * to text. */ + +namespace json +{ + +/* Forward decls of json::value and its subclasses (using indentation + to denote inheritance. */ + +class value; + class object; + class array; + class number; + class string; + class literal; + +/* An enum for discriminating the subclasses of json::value. */ + +enum kind +{ + /* class json::object. */ + JSON_OBJECT, + + /* class json::array. */ + JSON_ARRAY, + + /* class json::number. */ + JSON_NUMBER, + + /* class json::string. */ + JSON_STRING, + + /* class json::literal uses these three values to identify the + particular literal. */ + JSON_TRUE, + JSON_FALSE, + JSON_NULL +}; + +/* Base class of JSON value. */ + +class value +{ + public: + virtual ~value () {} + virtual enum kind get_kind () const = 0; + virtual void print (pretty_printer *pp) const = 0; + + /* Create a deep copy of the value, returning a value which must be + deleted by the caller. */ + virtual value *clone () const = 0; + + char *to_str () const; + void dump (FILE *) const; + + /* Methods for dynamically casting a value to one of the subclasses, + returning NULL if the value is of the wrong kind. */ + const object *as_object () const; + const array *as_array () const; + const number *as_number () const; + const string *as_string () const; + + /* Convenience accessors for attempting to perform key/value lookups + on this value as if it were an json::object. + + On success, return true and write the value to OUT_VALUE. + On failure, return false and write an error message to OUT_ERR + (which must be freed by the caller). */ + bool get_value_by_key (const char *name, const value *&out_value, + char *&out_err) const; + bool get_int_by_key (const char *name, int &out_value, char *&out_err) const; + bool get_string_by_key (const char *name, const char *&out_value, + char *&out_err) const; + bool get_array_by_key (const char *name, const array *&out_value, + char *&out_err) const; + + /* As above, but the key is optional. THIS must still be an object, + though. */ + bool get_optional_value_by_key (const char *name, const value *&out_value, + char *&out_err) const; + bool get_optional_string_by_key (const char *name, const char *&out_value, + char *&out_err) const; +}; + +/* Subclass of value for objects: an unordered collection of + key/value pairs. */ + +class object : public value +{ + public: + ~object (); + + enum kind get_kind () const FINAL OVERRIDE { return JSON_OBJECT; } + void print (pretty_printer *pp) const FINAL OVERRIDE; + value *clone () const FINAL OVERRIDE; + + value *get (const char *key) const; + value *get_if_nonnull (const char *key) const; + + void set (const char *key, value *v); + + private: + typedef hash_map > map_t; + map_t m_map; +}; + +/* Subclass of value for arrays. */ + +class array : public value +{ + public: + ~array (); + + enum kind get_kind () const FINAL OVERRIDE { return JSON_ARRAY; } + void print (pretty_printer *pp) const FINAL OVERRIDE; + value *clone () const FINAL OVERRIDE; + + unsigned get_length () const { return m_elements.length (); } + value *get (int idx) const { return m_elements[idx]; } + void append (value *v) { m_elements.safe_push (v); } + + private: + auto_vec m_elements; +}; + +/* Subclass of value for numbers. */ + +class number : public value +{ + public: + number (double value) : m_value (value) {} + + enum kind get_kind () const FINAL OVERRIDE { return JSON_NUMBER; } + void print (pretty_printer *pp) const FINAL OVERRIDE; + value *clone () const FINAL OVERRIDE; + + double get () const { return m_value; } + + private: + double m_value; +}; + +/* Subclass of value for strings. */ + +class string : public value +{ + public: + string (const char *utf8) : m_utf8 (xstrdup (utf8)) {} + ~string () { free (m_utf8); } + + enum kind get_kind () const FINAL OVERRIDE { return JSON_STRING; } + void print (pretty_printer *pp) const FINAL OVERRIDE; + value *clone () const FINAL OVERRIDE; + + const char *get_string () const { return m_utf8; } + + private: + char *m_utf8; +}; + +/* Subclass of value for the three JSON literals "true", "false", + and "null". */ + +class literal : public value +{ + public: + literal (enum kind kind) : m_kind (kind) {} + + enum kind get_kind () const FINAL OVERRIDE { return m_kind; } + void print (pretty_printer *pp) const FINAL OVERRIDE; + value *clone () const FINAL OVERRIDE; + + private: + enum kind m_kind; +}; + +/* Declarations for parsing JSON to a json::value * tree. */ + +extern value *parse_utf8_string (size_t length, const char *utf8_buf, + char **err_out); +extern value *parse_utf8_string (const char *utf8, char **err_out); + +} // namespace json + +#endif /* GCC_JSON_H */ diff --git a/gcc/selftest-input.h b/gcc/selftest-input.h new file mode 100644 index 0000000..d56af36 --- /dev/null +++ b/gcc/selftest-input.h @@ -0,0 +1,54 @@ +/* Support for selftests of location handling. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_SELFTEST_INPUT_H +#define GCC_SELFTEST_INPUT_H + +/* The selftest code should entirely disappear in a production + configuration, hence we guard all of it with #if CHECKING_P. */ + +#if CHECKING_P + +namespace selftest { + +/* input.c. */ + +/* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN + on LOC. Use REPORT_LOC as the effective location when reporting + any issues. */ + +extern void assert_loceq (const location &report_loc, + const char *exp_filename, int exp_linenum, + int exp_colnum, location_t loc); + +/* Evaluate EXP_FILENAME, EXP_LINENUM, EXP_COLNUM, and LOC. + Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN + on LOC. */ + +#define ASSERT_LOCEQ(EXP_FILENAME, EXP_LINENUM, EXP_COLNUM, LOC) \ + SELFTEST_BEGIN_STMT \ + ::selftest::assert_loceq (SELFTEST_LOCATION, (EXP_FILENAME), \ + (EXP_LINENUM), (EXP_COLNUM), (LOC)); \ + SELFTEST_END_STMT + +} /* end of namespace selftest. */ + +#endif /* #if CHECKING_P */ + +#endif /* GCC_SELFTEST_INPUT_H */ diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c index f62bc72..4f72e6c 100644 --- a/gcc/selftest-run-tests.c +++ b/gcc/selftest-run-tests.c @@ -66,6 +66,7 @@ selftest::run_tests () sreal_c_tests (); fibonacci_heap_c_tests (); typed_splay_tree_c_tests (); + json_c_tests (); /* Mid-level data structures. */ input_c_tests (); @@ -73,6 +74,7 @@ selftest::run_tests () gimple_c_tests (); rtl_tests_c_tests (); read_rtl_function_c_tests (); + firehose_c_tests (); /* Higher-level tests, or for components that other selftests don't rely on. */ diff --git a/gcc/selftest.c b/gcc/selftest.c index b41b9f5..26a6749 100644 --- a/gcc/selftest.c +++ b/gcc/selftest.c @@ -162,7 +162,19 @@ read_file (const location &loc, const char *path) FILE *f_in = fopen (path, "r"); if (!f_in) fail_formatted (loc, "unable to open file: %s", path); + char *result = read_file (loc, f_in, path); + fclose (f_in); + return result; +} + +/* Read all of F_IN into memory, returning a 0-terminated buffer + that must be freed by the caller. F_IN is *not* closed. + Fail (and abort) if there are any problems, with LOC as the reported + location of the failure, using DESC as a description of the file. */ +char * +read_file (const location &loc, FILE *f_in, const char *desc) +{ /* Read content, allocating FIXME. */ char *result = NULL; size_t total_sz = 0; @@ -186,13 +198,18 @@ read_file (const location &loc, const char *path) } if (!feof (f_in)) - fail_formatted (loc, "error reading from %s: %s", path, + fail_formatted (loc, "error reading from %s: %s", desc, xstrerror (errno)); - fclose (f_in); - /* 0-terminate the buffer. */ + if (total_sz == 0) + { + size_t new_alloc_sz = alloc_sz ? alloc_sz * 2: total_sz + 1; + result = (char *)xrealloc (result, new_alloc_sz); + alloc_sz = new_alloc_sz; + } gcc_assert (total_sz < alloc_sz); + gcc_assert (result); result[total_sz] = '\0'; return result; @@ -296,6 +313,17 @@ test_read_file () free (buf); } +/* Verify that read_file can cope with an empty file. */ + +static void +test_read_empty_file () +{ + temp_source_file t (SELFTEST_LOCATION, "empty.txt", ""); + char *buf = read_file (SELFTEST_LOCATION, t.get_filename ()); + ASSERT_STREQ ("", buf); + free (buf); +} + /* Verify locate_file (and read_file). */ static void @@ -317,6 +345,7 @@ selftest_c_tests () test_assertions (); test_named_temp_file (); test_read_file (); + test_read_empty_file (); test_locate_file (); } diff --git a/gcc/selftest.h b/gcc/selftest.h index dad53e9..08ed5e3 100644 --- a/gcc/selftest.h +++ b/gcc/selftest.h @@ -153,6 +153,10 @@ for_each_line_table_case (void (*testcase) (const line_table_case &)); extern char *read_file (const location &loc, const char *path); +/* FIXME. */ + +extern char *read_file (const location &loc, FILE *infile, const char *desc); + /* A helper function for writing tests that interact with the garbage collector. */ @@ -177,12 +181,14 @@ extern void edit_context_c_tests (); extern void et_forest_c_tests (); extern void fold_const_c_tests (); extern void fibonacci_heap_c_tests (); +extern void firehose_c_tests (); extern void function_tests_c_tests (); extern void gimple_c_tests (); extern void ggc_tests_c_tests (); extern void hash_map_tests_c_tests (); extern void hash_set_tests_c_tests (); extern void input_c_tests (); +extern void json_c_tests (); extern void pretty_print_c_tests (); extern void read_rtl_function_c_tests (); extern void rtl_tests_c_tests (); diff --git a/gcc/testsuite/selftests/checker-output/test-clang-analyzer.json b/gcc/testsuite/selftests/checker-output/test-clang-analyzer.json new file mode 100644 index 0000000..eda9abc --- /dev/null +++ b/gcc/testsuite/selftests/checker-output/test-clang-analyzer.json @@ -0,0 +1,122 @@ +{ + "customfields": { + "scan-build-invocation": "scan-build -v -plist --use-analyzer /usr/bin/clang -o /tmp/tmp8ytuRj gcc -B. -c ../../src/bogus.c", + "returncode": 0, + "stdout": "scan-build: Using '/usr/bin/clang' for static analysis\nscan-build: Emitting reports for this run to '/tmp/tmp8ytuRj/2017-05-24-001755-39710-1'.\nscan-build: Analysis run complete.\nscan-build: Analysis results (plist files) deposited in '/tmp/tmp8ytuRj/2017-05-24-001755-39710-1'\n", + "stderr": "../../src/bogus.c: In function \u2018test\u2019:\n../../src/bogus.c:5:10: warning: function returns address of local variable [-Wreturn-local-addr]\n return tmp;\n ^~~\n../../src/bogus.c:5:3: warning: Address of stack memory associated with local variable 'tmp' returned to caller\n return tmp;\n ^~~~~~~~~~\n1 warning generated.\n", + "plistpath": "/tmp/tmp8ytuRj/2017-05-24-001755-39710-1/report-DEoPmt.plist" + }, + "results": [ + { + "severity": null, + "trace": { + "states": [ + { + "notes": null, + "location": { + "function": { + "name": "" + }, + "range_": { + "start": { + "column": 3, + "line": 3 + }, + "end": { + "column": 6, + "line": 3 + } + }, + "file": { + "abspath": null, + "givenpath": "../../src/bogus.c", + "hash_": null + }, + "point": null + } + }, + { + "notes": null, + "location": { + "function": { + "name": "" + }, + "range_": { + "start": { + "column": 3, + "line": 5 + }, + "end": { + "column": 8, + "line": 5 + } + }, + "file": { + "abspath": null, + "givenpath": "../../src/bogus.c", + "hash_": null + }, + "point": null + } + }, + { + "notes": { + "text": "Address of stack memory associated with local variable 'tmp' returned to caller" + }, + "location": { + "function": { + "name": "" + }, + "range_": null, + "file": { + "abspath": null, + "givenpath": "../../src/bogus.c", + "hash_": null + }, + "point": { + "column": 3, + "line": 5 + } + } + } + ] + }, + "type": "Issue", + "notes": null, + "testid": null, + "message": { + "text": "Address of stack memory associated with local variable 'tmp' returned to caller" + }, + "cwe": null, + "customfields": null, + "location": { + "function": null, + "range_": null, + "file": { + "abspath": null, + "givenpath": "../../src/bogus.c", + "hash_": null + }, + "point": { + "column": 3, + "line": 5 + } + } + } + ], + "metadata": { + "stats": { + "wallclocktime": 0.22788214683532715 + }, + "sut": null, + "file_": { + "abspath": "/home/david/coding-3/gcc-git-static-analysis/build/gcc/../../src/bogus.c", + "givenpath": "../../src/bogus.c", + "hash_": null + }, + "generator": { + "version": null, + "name": "clang-analyzer" + } + } +} \ No newline at end of file diff --git a/gcc/testsuite/selftests/checker-output/test-cppcheck.json b/gcc/testsuite/selftests/checker-output/test-cppcheck.json new file mode 100644 index 0000000..c9651ee --- /dev/null +++ b/gcc/testsuite/selftests/checker-output/test-cppcheck.json @@ -0,0 +1,50 @@ +{ + "customfields": { + "cppcheck-invocation": "cppcheck --xml --xml-version=2 ../../src/test-sources/conditional-leak.c", + "returncode": 0, + "stdout": "Checking ../../src/test-sources/conditional-leak.c...\n", + "stderr": "\n\n \n \n \n \n \n \n\n" + }, + "results": [ + { + "severity": "error", + "trace": null, + "type": "Issue", + "notes": null, + "testid": "memleak", + "message": { + "text": "Memory leak: ptr_1" + }, + "cwe": null, + "customfields": null, + "location": { + "function": null, + "range_": null, + "file": { + "abspath": null, + "givenpath": "../../src/test-sources/conditional-leak.c", + "hash_": null + }, + "point": { + "column": 0, + "line": 11 + } + } + } + ], + "metadata": { + "stats": { + "wallclocktime": 0.006749868392944336 + }, + "sut": null, + "file_": { + "abspath": "/home/david/coding-3/gcc-git-static-analysis/build/gcc/../../src/test-sources/conditional-leak.c", + "givenpath": "../../src/test-sources/conditional-leak.c", + "hash_": null + }, + "generator": { + "version": "1.63", + "name": "cppcheck" + } + } +} \ No newline at end of file diff --git a/gcc/testsuite/selftests/checker-output/test-failure.json b/gcc/testsuite/selftests/checker-output/test-failure.json new file mode 100644 index 0000000..fd07cab --- /dev/null +++ b/gcc/testsuite/selftests/checker-output/test-failure.json @@ -0,0 +1,38 @@ +{ + "customfields": { + "traceback": "Traceback (most recent call last):\n File \"/home/david/coding-3/gcc-git-static-analysis/src/checkers/checker.py\", line 142, in checked_invoke\n analysis = self.raw_invoke(gccinv, sourcefile)\n File \"./checkers/always_fails.py\", line 40, in raw_invoke\n return self._run_subprocess(sourcefile, args)\n File \"/home/david/coding-3/gcc-git-static-analysis/src/checkers/checker.py\", line 213, in _run_subprocess\n stdout=PIPE, stderr=PIPE, env=env)\n File \"/usr/lib64/python2.7/site-packages/subprocess32.py\", line 812, in __init__\n restore_signals, start_new_session)\n File \"/usr/lib64/python2.7/site-packages/subprocess32.py\", line 1557, in _execute_child\n raise child_exception_type(errno_num, err_msg)\nOSError: [Errno 2] No such file or directory: '/this/executable/does/not/exist'\n" + }, + "results": [ + { + "type": "Failure", + "message": { + "text": "Exception running always-fails: [Errno 2] No such file or directory: '/this/executable/does/not/exist'" + }, + "failureid": "exception", + "location": { + "function": null, + "range_": null, + "file": { + "abspath": null, + "givenpath": "checkers/test-sources/harmless.c", + "hash_": null + }, + "point": null + }, + "customfields": null + } + ], + "metadata": { + "stats": null, + "sut": null, + "file_": { + "abspath": "/home/david/coding-3/gcc-git-static-analysis/src/checkers/test-sources/harmless.c", + "givenpath": "checkers/test-sources/harmless.c", + "hash_": null + }, + "generator": { + "version": null, + "name": "always-fails" + } + } +} \ No newline at end of file diff --git a/gcc/testsuite/selftests/checker-policy/test-policy.json b/gcc/testsuite/selftests/checker-policy/test-policy.json new file mode 100644 index 0000000..90532b2 --- /dev/null +++ b/gcc/testsuite/selftests/checker-policy/test-policy.json @@ -0,0 +1,7 @@ +[{ "executable": "../../src/checkers/clang_analyzer.py", + "languages": ["c", "c++"] }, + { "executable": "../../src/checkers/cppcheck.py", + "languages": ["c", "c++"] }, + { "executable": "../../src/checkers/flawfinder.py", + "languages": ["c", "c++"] }, + { "executable": "../../src/checkers/ianal.py"}] diff --git a/gcc/toplev.c b/gcc/toplev.c index 425315c..5b52c7c 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see #include "omp-offload.h" #include "hsa-common.h" #include "edit-context.h" +#include "checkers.h" #if defined(DBX_DEBUGGING_INFO) || defined(XCOFF_DEBUGGING_INFO) #include "dbxout.h" @@ -500,6 +501,10 @@ compile_file (void) if (lang_hooks.decls.post_compilation_parsing_cleanups) lang_hooks.decls.post_compilation_parsing_cleanups (); + /* FIXME. */ + if (flag_run_analyzers) + checkers_finish (); + if (seen_error ()) return; @@ -1971,6 +1976,9 @@ do_compile () { int i; + if (flag_run_analyzers) + checkers_start (flag_run_analyzers); + timevar_start (TV_PHASE_SETUP); /* This must be run always, because it is needed to compute the FP diff --git a/gcc/x86_64.annobin.cc b/gcc/x86_64.annobin.cc new file mode 100644 index 0000000..f9c5b03 --- /dev/null +++ b/gcc/x86_64.annobin.cc @@ -0,0 +1,211 @@ +/* x86_64.annobin - x86_64 specific parts of the annobin plugin. + Copyright (c) 2017 Red Hat. + Created by Nick Clifton. + + This is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. */ + +#include "annobin.h" + +#define GNU_PROPERTY_X86_ISA_1_USED 0xc0000000 +#define GNU_PROPERTY_X86_ISA_1_NEEDED 0xc0000001 + +#define GNU_PROPERTY_X86_ISA_1_486 (1U << 0) +#define GNU_PROPERTY_X86_ISA_1_586 (1U << 1) +#define GNU_PROPERTY_X86_ISA_1_686 (1U << 2) +#define GNU_PROPERTY_X86_ISA_1_SSE (1U << 3) +#define GNU_PROPERTY_X86_ISA_1_SSE2 (1U << 4) +#define GNU_PROPERTY_X86_ISA_1_SSE3 (1U << 5) +#define GNU_PROPERTY_X86_ISA_1_SSSE3 (1U << 6) +#define GNU_PROPERTY_X86_ISA_1_SSE4_1 (1U << 7) +#define GNU_PROPERTY_X86_ISA_1_SSE4_2 (1U << 8) +#define GNU_PROPERTY_X86_ISA_1_AVX (1U << 9) +#define GNU_PROPERTY_X86_ISA_1_AVX2 (1U << 10) +#define GNU_PROPERTY_X86_ISA_1_AVX512F (1U << 11) +#define GNU_PROPERTY_X86_ISA_1_AVX512CD (1U << 12) +#define GNU_PROPERTY_X86_ISA_1_AVX512ER (1U << 13) +#define GNU_PROPERTY_X86_ISA_1_AVX512PF (1U << 14) +#define GNU_PROPERTY_X86_ISA_1_AVX512VL (1U << 15) +#define GNU_PROPERTY_X86_ISA_1_AVX512DQ (1U << 16) +#define GNU_PROPERTY_X86_ISA_1_AVX512BW (1U << 17) + + +static unsigned long global_x86_isa = 0; +static unsigned long min_x86_isa = 0; +static unsigned long max_x86_isa = 0; + +void +annobin_save_target_specific_information (void) +{ +} + +void +annobin_record_global_target_notes (void) +{ + /* Note - most, but not all, bits in the ix86_isa_flags variable + are significant for purposes of ABI compatibility. We do not + bother to filter out any bits however, as we prefer to leave + it to the consumer to decide what is significant. */ + min_x86_isa = max_x86_isa = global_x86_isa = ix86_isa_flags; + + annobin_output_numeric_note (GNU_BUILD_ATTRIBUTE_ABI, global_x86_isa, + "numeric: ABI", NULL, NT_GNU_BUILD_ATTRIBUTE_OPEN); + annobin_inform (1, "Record global isa of %lx", global_x86_isa); +} + +void +annobin_target_specific_function_notes (void) +{ + if ((unsigned long) ix86_isa_flags != global_x86_isa) + { + annobin_inform (1, "ISA value has changed from %lx to %lx for %s", + global_x86_isa, ix86_isa_flags, current_function_name ()); + + annobin_output_numeric_note (GNU_BUILD_ATTRIBUTE_ABI, ix86_isa_flags, + "numeric: ABI", current_function_name (), + NT_GNU_BUILD_ATTRIBUTE_FUNC); + + if ((unsigned long) ix86_isa_flags < min_x86_isa) + min_x86_isa = ix86_isa_flags; + if ((unsigned long) ix86_isa_flags > max_x86_isa) + max_x86_isa = ix86_isa_flags; + } +} + +static unsigned int +convert_gcc_isa_to_gnu_property_isa (unsigned int isa) +{ + unsigned int result = 0; + + if (isa & OPTION_MASK_ISA_SSE) + result |= GNU_PROPERTY_X86_ISA_1_SSE; + if (isa & OPTION_MASK_ISA_SSE2) + result |= GNU_PROPERTY_X86_ISA_1_SSE2; + if (isa & OPTION_MASK_ISA_SSE3) + result |= GNU_PROPERTY_X86_ISA_1_SSSE3; + if (isa & OPTION_MASK_ISA_SSE4_1) + result |= GNU_PROPERTY_X86_ISA_1_SSE4_1; + if (isa & OPTION_MASK_ISA_SSE4_2) + result |= GNU_PROPERTY_X86_ISA_1_SSE4_2; + if (isa & OPTION_MASK_ISA_AVX) + result |= GNU_PROPERTY_X86_ISA_1_AVX; + if (isa & OPTION_MASK_ISA_AVX2) + result |= GNU_PROPERTY_X86_ISA_1_AVX2; +#ifdef OPTION_MASK_ISA_AVX512F + if (isa & OPTION_MASK_ISA_AVX512F) + result |= GNU_PROPERTY_X86_ISA_1_AVX512F; + if (isa & OPTION_MASK_ISA_AVX512CD) + result |= GNU_PROPERTY_X86_ISA_1_AVX512CD; + if (isa & OPTION_MASK_ISA_AVX512ER) + result |= GNU_PROPERTY_X86_ISA_1_AVX512ER; + if (isa & OPTION_MASK_ISA_AVX512PF) + result |= GNU_PROPERTY_X86_ISA_1_AVX512PF; + if (isa & OPTION_MASK_ISA_AVX512VL) + result |= GNU_PROPERTY_X86_ISA_1_AVX512VL; + if (isa & OPTION_MASK_ISA_AVX512DQ) + result |= GNU_PROPERTY_X86_ISA_1_AVX512DQ; + if (isa & OPTION_MASK_ISA_AVX512BW) + result |= GNU_PROPERTY_X86_ISA_1_AVX512BW; +#endif + return result; +} + +typedef struct +{ + Elf32_Word pr_type; + Elf32_Word pr_datasz; + Elf32_Word pr_data; +} Elf32_loader_note; + +typedef struct +{ + Elf32_Word pr_type; + Elf32_Word pr_datasz; + Elf64_Xword pr_data; +} Elf64_loader_note; + +typedef struct +{ + Elf32_Word pr_type; + Elf32_Word pr_datasz; + Elf32_Word pr_data; + Elf32_Word pr_pad; +} Elf64_32_loader_note; + +void +annobin_target_specific_loader_notes (void) +{ + char buffer [1024]; /* FIXME: Is this enough ? */ + char * ptr; + + annobin_inform (1, "Creating notes for the dynamic loader"); + + fprintf (asm_out_file, "\t.pushsection %s, \"a\", %%note\n", NOTE_GNU_PROPERTY_SECTION_NAME); + fprintf (asm_out_file, "\t.balign 4\n"); + + ptr = buffer; + + if (annobin_is_64bit) + { + Elf64_32_loader_note note32; + + note32.pr_datasz = sizeof (note32.pr_data); + note32.pr_pad = 0; + + if (annobin_enable_stack_size_notes) + { + Elf64_loader_note note64; + + note64.pr_type = GNU_PROPERTY_STACK_SIZE; + note64.pr_datasz = sizeof (note64.pr_data); + note64.pr_data = annobin_max_stack_size; + memcpy (ptr, & note64, sizeof note64); + ptr += sizeof (note64); + } + + note32.pr_type = GNU_PROPERTY_X86_ISA_1_USED; + note32.pr_data = convert_gcc_isa_to_gnu_property_isa (max_x86_isa); + memcpy (ptr, & note32, sizeof note32); + ptr += sizeof (note32); + + note32.pr_type = GNU_PROPERTY_X86_ISA_1_NEEDED; + note32.pr_data = convert_gcc_isa_to_gnu_property_isa (min_x86_isa); + memcpy (ptr, & note32, sizeof note32); + ptr += sizeof (note32); + } + else + { + Elf32_loader_note note32; + + note32.pr_datasz = sizeof (note32.pr_data); + + if (annobin_enable_stack_size_notes) + { + note32.pr_type = GNU_PROPERTY_STACK_SIZE; + note32.pr_data = annobin_max_stack_size; + memcpy (ptr, & note32, sizeof note32); + ptr += sizeof (note32); + } + + note32.pr_type = GNU_PROPERTY_X86_ISA_1_USED; + note32.pr_data = convert_gcc_isa_to_gnu_property_isa (max_x86_isa); + memcpy (ptr, & note32, sizeof note32); + ptr += sizeof (note32); + + note32.pr_type = GNU_PROPERTY_X86_ISA_1_NEEDED; + note32.pr_data = convert_gcc_isa_to_gnu_property_isa (min_x86_isa); + memcpy (ptr, & note32, sizeof note32); + ptr += sizeof (note32); + } + + annobin_output_note ("GNU", 4, true, "Loader notes", buffer, ptr - buffer, + false, NT_GNU_PROPERTY_TYPE_0); + fflush (asm_out_file); +}