summaryrefslogtreecommitdiffstats
path: root/src/retrace/coredump2packages
blob: ac2db9f2d1af425395ce14a719b5dbe7c107165b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
#! /usr/bin/python
# -*- coding:utf-8;mode:python -*-
# Gets list of packages necessary for processing of a coredump.
# Uses eu-unstrip and yum.

import subprocess
import yum
import sys
import argparse

parser = argparse.ArgumentParser(description='Get packages for coredump processing.')
parser.add_argument('--repos', default='*', metavar='WILDCARD',
                    help='Yum repository wildcard to be enabled')
parser.add_argument('coredump', help='Coredump')
parser.add_argument('--log', metavar='FILENAME',
                    help='Store debug output to a file')
args = parser.parse_args()

if args.log:
    log = open(args.log, "w")
else:
    log = open("/dev/null", "w")

#
# Initialize yum, enable only repositories specified via command line
# --repos option.
#
stdout = sys.stdout
sys.stdout = log
yumbase = yum.YumBase()
yumbase.doConfigSetup()
if not yumbase.setCacheDir():
    exit(2)
log.write("Closing all enabled repositories...\n")
for repo in yumbase.repos.listEnabled():
    log.write(" - {0}\n".format(repo.name))
    repo.close()
    yumbase.repos.disableRepo(repo.id)
log.write("Enabling repositories matching \'{0}\'...\n".format(args.repos))
for repo in yumbase.repos.findRepos(args.repos):
    log.write(" - {0}\n".format(repo.name))
    repo.enable()
    repo.skip_if_unavailable = True
yumbase.repos.doSetup()
yumbase.repos.populateSack(mdtype='metadata', cacheonly=1)
yumbase.repos.populateSack(mdtype='filelists', cacheonly=1)
sys.stdout = stdout

#
# Get eu-unstrip output, which contains build-ids and binary object
# paths
#
log.write("Running eu-unstrip...\n")
unstrip_args = ['eu-unstrip', '--core={0}'.format(args.coredump), '-n']
unstrip_proc = subprocess.Popen(unstrip_args, stdout=subprocess.PIPE)
unstrip = unstrip_proc.communicate()[0]
log.write("{0}\n".format(unstrip))
if not unstrip:
    exit(1)

def binary_packages_from_debuginfo_package(debuginfo_package, binobj_path):
    """
    Returns a list of packages corresponding to the provided debuginfo
    package. One of the packages in the list contains the binary
    specified in binobj_path; this is a list because if binobj_patch
    is not specified (and sometimes it is not, binobj_path might
    contain just '-'), we do not know which package contains the
    binary, we know only packages from the same SRPM as the debuginfo
    package.
    """
    package_list = []
    if binobj_path == '-': # [exe] without binary name
        log.write("   Yum search for [exe] without binary name, "
                  "packages with NVR {0}:{1}-{2}.{3}...\n".format(debuginfo_package.epoch,
                                                                  debuginfo_package.ver,
                                                                  debuginfo_package.rel,
                                                                  debuginfo_package.arch))
        # Append all packages with the same base package name.
        # Other possibility is to download the debuginfo RPM,
        # unpack it, and get the name of the binary from the
        # /usr/lib/debug/.build-id/xx/yyyyyy symlink.
        evra_list = yumbase.pkgSack.searchNevra(epoch=debuginfo_package.epoch,
                                                ver=debuginfo_package.ver,
                                                rel=debuginfo_package.rel,
                                                arch=debuginfo_package.arch)
        for package in evra_list:
            log.write("    - {0}: base name \"{1}\"\n".format(str(package), package.base_package_name))
            if package.base_package_name != debuginfo_package.base_package_name:
                continue
            package_list.append(package)
    else:
        log.write("   Yum search for {0}...\n".format(binobj_path))
        binobj_package_list = yumbase.pkgSack.searchFiles(binobj_path)
        for binobj_package in binobj_package_list:
            log.write("    - {0}".format(str(binobj_package)))
            if 0 != binobj_package.returnEVR().compare(debuginfo_package.returnEVR()):
                log.write(": NVR doesn't match\n")
                continue
            log.write(": NVR matches\n")
            package_list.append(binobj_package)
    return package_list

def process_unstrip_entry(build_id, binobj_path):
    """
    Returns a tuple of two items.

    First item is a list of packages which we found to be associated
    with the unstrip entry defined by build_id and binobj_path.

    Second item is a list of package versions (same package name,
    different epoch-version-release), which contain the binary object
    (an executable or shared library) corresponding to this unstrip
    entry. If this method failed to find an unique package name (with
    only different versions), this list contains the list of base
    package names. This item can be used to associate a coredump with
    some crashing package.
    """
    package_list = []
    coredump_package_list = []
    coredump_base_package_list = []
    # Ask for a known path from debuginfo package.
    debuginfo_path = "/usr/lib/debug/.build-id/{0}/{1}.debug".format(build_id[:2], build_id[2:])
    log.write("Yum search for {0}...\n".format(debuginfo_path))
    debuginfo_package_list = yumbase.pkgSack.searchFiles(debuginfo_path)

    # A problem here is that some libraries lack debuginfo. Either
    # they were stripped during build, or they were not stripped by
    # /usr/lib/rpm/find-debuginfo.sh because of wrong permissions or
    # something. The proper solution is to detect such libraries and
    # fix the packages.
    for debuginfo_package in debuginfo_package_list:
        log.write(" - {0}\n".format(str(debuginfo_package)))
        package_list.append(debuginfo_package)
        binary_packages = binary_packages_from_debuginfo_package(debuginfo_package, binobj_path)
        coredump_base_package_list.append(debuginfo_package.base_package_name)
        if len(binary_packages) == 1:
            coredump_package_list.append(str(binary_packages[0]))
        package_list.extend(binary_packages)
    if len(coredump_package_list) == len(coredump_base_package_list):
        return package_list, coredump_package_list
    else:
        return package_list, coredump_base_package_list


def process_unstrip_output():
    """
    Parse the eu-unstrip output, and search for packages via yum.

    Returns a tuple containing three items:
      - a list of package objects
      - a list of missing buildid entries
      - a list of coredump package adepts
    """
    # List of packages found in yum repositories and matching the
    # coredump.
    package_list = []
    # List of pairs (library/executable path, build id) which were not
    # found via yum.
    missing_buildid_list = []
    # coredump package adepts
    coredump_package_list = []
    first_entry = True
    for line in unstrip.split('\n'):
        parts = line.split()
        if not parts or len(parts) < 3:
            continue
        build_id = parts[1].split('@')[0]
        binobj_path = parts[2]
        if binobj_path[0] != '/' and parts[4] != '[exe]':
            continue
        entry_package_list, entry_coredump_package_list = process_unstrip_entry(build_id, binobj_path)
        if first_entry:
            coredump_package_list = entry_coredump_package_list
            first_entry = False
        if len(entry_package_list) == 0:
            missing_buildid_list.append([binobj_path, build_id])
        else:
            for entry_package in entry_package_list:
                found = False
                for package in package_list:
                    if str(entry_package) == str(package):
                        found = True
                        break
                if not found:
                    package_list.append(entry_package)
    return package_list, missing_buildid_list, coredump_package_list

package_list, missing_buildid_list, coredump_package_list = process_unstrip_output()

#
# The package list might contain multiple packages with the same name,
# but different version. This happens because some binary had the same
# build id over multiple package releases.
#
def find_duplicates(package_list):
    for p1 in range(0, len(package_list) - 1):
        package1 = package_list[p1]
        for p2 in range(p1 + 1, len(package_list)):
            package2 = package_list[p2]
            if package1.name == package2.name:
                return package1, package2
    return None, None

def count_removals(package_list, base_package_name, epoch, ver, rel, arch):
    count = 0
    for package in package_list:
        if package.base_package_name != base_package_name:
            continue
        if package.epoch != epoch or package.ver != ver or package.rel != rel or package.arch != arch:
            continue
        count += 1
    return count

log.write("Checking for duplicates...\n")
while True:
    package1, package2 = find_duplicates(package_list)
    if package1 is None:
        break
    p1removals = count_removals(package_list,
                                package1.base_package_name,
                                package1.epoch,
                                package1.ver,
                                package1.rel,
                                package1.arch)
    p2removals = count_removals(package_list,
                                package2.base_package_name,
                                package2.epoch,
                                package2.ver,
                                package2.rel,
                                package2.arch)

    log.write(" - {0}".format(package1.base_package_name))
    if package1.base_package_name != package2.base_package_name:
        log.write(" {0}\n".format(package2.base_package_name))
    else:
        log.write("\n")
    log.write("   - {0}:{1}-{2}.{3} ({4} dependent packages)\n".format(package1.epoch,
                                                                       package1.ver,
                                                                       package1.rel,
                                                                       package1.arch,
                                                                       p1removals))
    log.write("   - {0}:{1}-{2}.{3} ({4} dependent packages)\n".format(package2.epoch,
                                                                       package2.ver,
                                                                       package2.rel,
                                                                       package2.arch,
                                                                       p2removals))

    removal_candidate = package1
    if p1removals == p2removals:
        # Remove older if we can choose
        if package1.returnEVR().compare(package2.returnEVR()) > 0:
            removal_candidate = package2
        log.write("   - decided to remove {0}:{1}-{2}.{3} because it's older\n".format(removal_candidate.epoch,
                                                                                       removal_candidate.ver,
                                                                                       removal_candidate.rel,
                                                                                       removal_candidate.arch))
    else:
        if p1removals > p2removals:
            removal_candidate = package2
        log.write("   - decided to remove {0}:{1}-{2}.{3} because has fewer dependencies\n".format(removal_candidate.epoch,
                                                                                                   removal_candidate.ver,
                                                                                                   removal_candidate.rel,
                                                                                                   removal_candidate.arch))
    # Remove the removal_candidate packages from the package list
    for package in package_list[:]:
        if package.base_package_name == removal_candidate.base_package_name and \
                0 == package.returnEVR().compare(removal_candidate.returnEVR()):
            package_list.remove(package)

# Clean coredump_package_list:
for coredump_package in coredump_package_list[:]:
    found = False
    for package in package_list:
        if str(package) == coredump_package or package.base_package_name == coredump_package:
            found = True
            break
    if not found:
        coredump_package_list.remove(coredump_package)

#
# Print names of found packages first, then a newline separator, and
# then objects for which the packages were not found.
#
if len(coredump_package_list) == 1:
    print coredump_package_list[0]
else:
    print "-"
print
for package in sorted(package_list):
    print str(package)
print
for path, build_id in missing_buildid_list:
    print "{0} {1}".format(path, build_id)