1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
|
#!/usr/bin/python
from __future__ import print_function
# A simple script to generate a file list in a format easily consumable by a
# shell script.
# Originally written by Jason Tibbitts <tibbs@math.uh.edu> in 2016.
# Donated to the public domain. If you require a statement of license, please
# consider this work to be licensed as "CC0 Universal", any version you choose.
import argparse
import hashlib
import os
import stat
import sys
# Get scandir from whatever module provides it today
try:
from os import scandir
except ImportError:
from scandir import scandir
# productmd is optional, needed only for the imagelist feature
try:
from productmd.images import SUPPORTED_IMAGE_FORMATS
except ImportError:
SUPPORTED_IMAGE_FORMATS = []
class SEntry(object):
"""A simpler DirEntry-like object."""
def __init__(self, direntry, restricted=False):
self.direntry = direntry
self.restricted = restricted
self.path = direntry.path
self.name = direntry.name
info = direntry.stat(follow_symlinks=False)
self.modtime = max(info.st_mtime, info.st_ctime)
self.readable_group = info.st_mode & stat.S_IRGRP
self.readable_world = info.st_mode & stat.S_IROTH
self.size = info.st_size
ftype = 'f'
perm = ''
if direntry.is_symlink():
ftype = 'l'
elif direntry.is_dir():
ftype = 'd'
if self.restricted:
perm = '*'
# Note that we want an unreadable state to override the restricted state
if not self.readable_world:
perm = '-'
self.ftype = ftype + perm
def sha1(fname):
"""Return the SHA1 checksum of a file in hex."""
fh = open(fname, 'rb')
sha1 = hashlib.sha1()
block = fh.read(2 ** 16)
while len(block) > 0:
sha1.update(block)
block = fh.read(2 ** 16)
return sha1.hexdigest()
def recursedir(path='.', skip=[], alwaysskip=['.~tmp~'], in_restricted=False):
"""Like scandir, but recursively.
Will skip everything in the skip array, but only at the top level
directory.
Returns SEntry objects. If in_restricted is true, all returned entries will
be marked as restricted even if their permissions are not restricted.
"""
for dentry in scandir(path):
if dentry.name in skip:
continue
if dentry.name in alwaysskip:
continue
# Skip things which are not at least group readable
# Symlinks are followed here so that clients won't see dangling
# symlinks to content they can't transfer. It's the default, but to
# avoid confusion it's been made explicit.
if not (dentry.stat(follow_symlinks=True).st_mode & stat.S_IRGRP):
# print('{} is not group readable; skipping.'.format(dentry.path))
continue
se = SEntry(dentry, in_restricted)
if dentry.is_dir(follow_symlinks=False):
this_restricted = in_restricted
if not se.readable_world:
# print('{} is not world readable; marking as restricted.'.format(se.path), file=sys.stderr)
this_restricted = True
# Don't pass skip here, because we only skip in the top level
for re in recursedir(se.path, alwaysskip=alwaysskip, in_restricted=this_restricted):
yield re
yield se
def parseopts():
null = open(os.devnull, 'w')
p = argparse.ArgumentParser(
description='Generate a list of files and times, suitable for consumption by quick-fedora-mirror, '
'and (optionally) a much smaller list of only files that match one of the productmd '
' supported image types, for use by fedfind.')
p.add_argument('-c', '--checksum', action='store_true',
help='Include checksums of all repomd.xml files in the file list.')
p.add_argument('-C', '--checksum-file', action='append', dest='checksum_files',
help='Include checksums of all instances of the specified file.')
p.add_argument('-s', '--skip', action='store_true',
help='Skip the file lists in the top directory')
p.add_argument('-S', '--skip-file', action='append', dest='skip_files',
help='Skip the specified file in the top directory.')
p.add_argument('-d', '--dir', help='Directory to scan (default: .).')
p.add_argument('-t', '--timelist', type=argparse.FileType('w'), default=sys.stdout,
help='Filename of the file list with times (default: stdout).')
p.add_argument('-f', '--filelist', type=argparse.FileType('w'), default=null,
help='Filename of the file list without times (default: no plain file list is generated).')
p.add_argument('-i', '--imagelist', type=argparse.FileType('w'), default=null,
help='Filename of the image file list for fedfind (default: not generated). Requires '
'the productmd library.')
opts = p.parse_args()
if not opts.dir:
opts.dir = '.'
opts.checksum_files = opts.checksum_files or []
if opts.checksum:
opts.checksum_files += ['repomd.xml']
opts.skip_files = opts.skip_files or []
if opts.skip:
if not opts.timelist.name == '<stdout>':
opts.skip_files += [os.path.basename(opts.timelist.name)]
if not opts.filelist.name == '<stdout>':
opts.skip_files += [os.path.basename(opts.filelist.name)]
if not opts.imagelist.name == '<stdout>':
opts.skip_files += [os.path.basename(opts.imagelist.name)]
return opts
def main():
opts = parseopts()
if opts.imagelist.name != os.devnull and not SUPPORTED_IMAGE_FORMATS:
sys.exit("--imagelist requires the productmd library!")
checksums = {}
os.chdir(opts.dir)
print('[Version]', file=opts.timelist)
# XXX Technically this should be version 3. But old clients will simply
# ignore the extended file types for restricted directories, and so we can
# add this now and let things simmer for a while before bumping the format
# and hard-breaking old clients.
print('2', file=opts.timelist)
print(file=opts.timelist)
print('[Files]', file=opts.timelist)
for entry in recursedir(skip=opts.skip_files):
print(entry.path, file=opts.filelist)
# write to filtered list if appropriate
imgs = ['.{0}'.format(form) for form in SUPPORTED_IMAGE_FORMATS]
if any(entry.path.endswith(img) for img in imgs):
print(entry.path, file=opts.imagelist)
if entry.name in opts.checksum_files:
checksums[entry.path[2:]] = True
print('{0}\t{1}\t{2}\t{3}'.format(entry.modtime, entry.ftype,
entry.size, entry.path[2:]),
file=opts.timelist)
print('\n[Checksums SHA1]', file=opts.timelist)
# It's OK if the checksum section is empty, but we should include it anyway
# as the client expects it.
for f in sorted(checksums):
print('{0}\t{1}'.format(sha1(f), f), file=opts.timelist)
print('\n[End]', file=opts.timelist)
if __name__ == '__main__':
main()
|