summaryrefslogtreecommitdiffstats
path: root/files/scripts/create-filelist
blob: 8fc3367360123a7181d604104c890d1056585daf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/python
from __future__ import print_function

# A simple script to generate a file list in a format easily consumable by a
# shell script.

# Originally written by Jason Tibbitts <tibbs@math.uh.edu> in 2016.
# Donated to the public domain.  If you require a statement of license, please
# consider this work to be licensed as "CC0 Universal", any version you choose.

import argparse
import hashlib
import os
import sys
from scandir import scandir


def get_ftype(entry):
    """Return a simple indicator of the file type."""
    if entry.is_symlink():
        return 'l'
    if entry.is_dir():
        return 'd'
    return 'f'


def sha1(fname):
    """Return the SHA1 checksum of a file in hex."""
    fh = open(fname, 'rb')
    sha1 = hashlib.sha1()
    block = fh.read(2 ** 16)
    while len(block) > 0:
        sha1.update(block)
        block = fh.read(2 ** 16)

    return sha1.hexdigest()


def recursedir(path='.', skip=[], alwaysskip=['.~tmp~']):
    """Just like scandir, but recursively.

    Will skip everything in the skip array, but only at the top level
    directory.
    """
    for entry in scandir(path):
        if entry.name in skip:
            continue
        if entry.name in alwaysskip:
            continue
        if entry.is_dir(follow_symlinks=False):
            # Don't pass skip here, because we only skip in the top level
            for rentry in recursedir(entry.path, alwaysskip=alwaysskip):
                yield rentry
        yield entry


def parseopts():
    null = open(os.devnull, 'w')
    p = argparse.ArgumentParser(
        description='Generate a list of files and times, suitable for consumption by quick-fedora-mirror, '
                    'and a much smaller list with packages, Device Tree boot files, HTML files and '
                    'directories filtered out, for consumption by fedfind.')
    p.add_argument('-c', '--checksum', action='store_true',
                   help='Include checksums of all repomd.xml files in the file list.')
    p.add_argument('-C', '--checksum-file', action='append', dest='checksum_files',
                   help='Include checksums of all instances of the specified file.')
    p.add_argument('-s', '--skip', action='store_true',
                   help='Skip the file lists in the top directory')
    p.add_argument('-S', '--skip-file', action='append', dest='skip_files',
                   help='Skip the specified file in the top directory.')

    p.add_argument('-d', '--dir', help='Directory to scan (default: .).')

    p.add_argument('-t', '--timelist', type=argparse.FileType('w'), default=sys.stdout,
                   help='Filename of the file list with times (default: stdout).')
    p.add_argument('-f', '--filelist', type=argparse.FileType('w'), default=null,
                   help='Filename of the file list without times (default: no plain file list is generated).')
    p.add_argument('-F', '--filterlist', type=argparse.FileType('w'), default=null,
                   help='Filename of the filtered file list for fedfind (default: not generated).')

    opts = p.parse_args()

    if not opts.dir:
        opts.dir = '.'

    opts.checksum_files = opts.checksum_files or []
    if opts.checksum:
        opts.checksum_files += ['repomd.xml']

    opts.skip_files = opts.skip_files or []
    if opts.skip:
        if not opts.timelist.name == '<stdout>':
            opts.skip_files += [opts.timelist.name]
        if not opts.filelist.name == '<stdout>':
            opts.skip_files += [opts.filelist.name]

    return opts


def main():
    opts = parseopts()
    checksums = {}

    os.chdir(opts.dir)

    print('[Version]', file=opts.timelist)
    print('2', file=opts.timelist)
    print(file=opts.timelist)
    print('[Files]', file=opts.timelist)

    for entry in recursedir(skip=opts.skip_files):
        # opts.filelist.write(entry.path + '\n')
        print(entry.path, file=opts.filelist)
        # write to filtered list if appropriate
        skips = ('.rpm', '.drpm', '.dtb', '.html')
        if not any(entry.path.endswith(skip) for skip in skips) and not (entry.is_dir()):
            print(entry.path, file=opts.filterlist)
        if entry.name in opts.checksum_files:
            checksums[entry.path[2:]] = True
        info = entry.stat(follow_symlinks=False)
        modtime = max(info.st_mtime, info.st_ctime)
        size = info.st_size
        ftype = get_ftype(entry)
        # opts.timelist.write('{0}\t{1}\t{2}\n'.format(modtime, ftype, entry.path[2:]))
        print('{0}\t{1}\t{2}\t{3}'.format(modtime, ftype, size, entry.path[2:]), file=opts.timelist)

    print('\n[Checksums SHA1]', file=opts.timelist)

    # It's OK if the checksum section is empty, but we should include it anyway
    # as the client expects it.
    for f in sorted(checksums):
        print('{0}\t{1}'.format(sha1(f), f), file=opts.timelist)

    print('\n[End]', file=opts.timelist)


if __name__ == '__main__':
        main()