summaryrefslogtreecommitdiffstats
path: root/command.py
blob: d0dd617a6feacecab6729c85b3ad7a027145c94b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
# -*- coding: UTF-8 -*-
# Copyright 2014 Red Hat, Inc.
# Part of clufter project
# Licensed under GPLv2 (a copy included | http://gnu.org/licenses/gpl-2.0.txt)
"""Base command stuff (TBD)"""
__author__ = "Jan Pokorný <jpokorny @at@ Red Hat .dot. com>"

import logging
from itertools import izip_longest
from optparse import SUPPRESS_HELP
from os import fdopen
from platform import system, linux_distribution

from .command_context import CommandContext
from .error import ClufterError, \
                   EC
from .filter import Filter
from .format import SimpleFormat
from .plugin_registry import PluginRegistry
from .utils import any2iter, \
                   args2sgpl, \
                   args2tuple, \
                   cli_decor, \
                   func_defaults_varnames, \
                   head_tail, \
                   hybridproperty, \
                   longopt_letters_reprio, \
                   selfaware, \
                   tuplist
from .utils_func import apply_aggregation_preserving_depth, \
                        apply_intercalate, \
                        apply_loose_zip_preserving_depth, \
                        bifilter, \
                        tailshake, \
                        zip_empty

log = logging.getLogger(__name__)

protodecl = lambda x: len(x) == 2 and isinstance(x[0], Filter)

class CommandError(ClufterError):
    pass


class commands(PluginRegistry):
    """Command registry (to be used as a metaclass for commands)"""
    pass


class Command(object):
    """Base for commands, i.e., encapsulations of filter chains

    Also see the docstring for `deco`.
    """
    __metaclass__ = commands

    @hybridproperty
    def filter_chain(this):
        """Chain of filter identifiers/classes for the command"""
        return this._filter_chain

    def __init__(self, *filter_chain):
        self._filter_chain = filter_chain  # already resolved
        # following will all be resolved lazily, on-demand;
        # all of these could be evaluated upon instantiation immediately,
        # but this is not the right thing to do due to potentially many
        # commands being instantiated initially, while presumably only one
        # of them will be run later on
        self._desc_opts = None
        self._fnc_defaults_varnames = None
        self._filter_chain_analysis = None  # will be dict

    #
    # filter chain related
    #

    @property
    def filter_chain_analysis(self):
        if self._filter_chain_analysis is None:
            filter_chain = self._filter_chain
            self._filter_chain_analysis = self.analyse_chain(filter_chain)
        return self._filter_chain_analysis

    @staticmethod
    @selfaware
    def analyse_chain(me, filter_chain, analysis_acc=None):
        """Given the filter chain, return filter backtrack and terminal chain

        This is done by recursive traversal.  Also check that the graph is
        actually connected wrt. protocols compatibility between each of
        adjacent filters is performed.

        XXX: mentioned check doesn't know about CompositeFormat and
             the connected magic, yet
        """
        new = analysis_acc is None
        if new:
            analysis_acc = dict(filter_backtrack={},
                                terminal_chain=[[]])
        filter_backtrack = analysis_acc['filter_backtrack']
        terminal_chain = analysis_acc['terminal_chain'][-1]

        assert tuplist(filter_chain)
        # PASSDOWN or FILTERS
        pass_through, filter_chain = head_tail(*filter_chain) \
                                     if isinstance(filter_chain, tuple) \
                                     and len(filter_chain) > 1 \
                                     else (None, filter_chain)
        for i in filter_chain:
            i, i_tail = head_tail(i)
            bt = filter_backtrack.setdefault(i, {})
            if new or not (bt or i_tail):
                # new for UPFILTERs, which are also terminals (input ones)
                terminal_chain.append(i)
            if pass_through:
                if pass_through in bt:
                    raise CommandError(me,
                        "filter `{0}' is feeded by `{1}' more than once",
                        i.__class__.__name__, pass_through.__class__.__name__
                    )
                common_protocols = sorted(
                    reduce(
                        set.intersection,
                        map(set, (pass_through.out_format._protocols,
                                  i.in_format._protocols))
                    ),
                    key=lambda x:
                        int(x == pass_through.out_format.native_protocol)
                        + int(x == i.in_format.native_protocol)
                )
                if not common_protocols:
                    raise CommandError(me,
                        "filter `{0}' and its feeder `{1}' have no protocol"
                        " in common",
                        i.__class__.__name__, pass_through.__class__.__name__
                    )
                bt[pass_through] = common_protocols
            if i_tail:
                # PASSDOWN
                # this uses a dirty trick of exploiting the end of the list
                # as a sort of a stack, where the per-recursion-level result
                # is available for the caller (who is also responsible for
                # preparing a new list here for callee to fill) so it can
                # move it to the right position afterwards
                analysis_acc['terminal_chain'].append([])  # not terminal_chain
                me((i, ) + i_tail, analysis_acc)
                terminal_chain.append(analysis_acc['terminal_chain'].pop())
            elif new:
                # yes, terminal UPFILTER is tracked twice as terminal (I/O)
                terminal_chain.append(i)

        return analysis_acc

    #
    # self-introspection (arguments, description, options)
    #

    def _figure_fnc_defaults_varnames(self):
        """Dissect self._fnc to arg defaults (dict) + all arg names (tuple)"""
        try:
            fnc = self._fnc
        except:
            raise CommandError(self, "Subclass does not implement _fnc")
        if self._fnc_defaults_varnames is None:
            self._fnc_defaults_varnames = func_defaults_varnames(fnc, skip=1)
        return self._fnc_defaults_varnames

    def _figure_parser_opt_noop(self, options, shortopts):
        # add option to NOOPize some filters (those with in_format=out_format)
        choices = []
        for f in apply_intercalate(self.filter_chain):
            if issubclass(f.in_format.__class__, f.out_format.__class__):
                choices.append(f.__class__.name)
        optname_used = "noop"
        short_aliases = shortopts.setdefault(optname_used[0], [])
        assert optname_used not in \
               (options[i][0][0] for i in short_aliases)
        log.debug("choices: {0}".format(choices))
        opt = dict(
            action='append',
            choices=choices,
            default=[],
            help="debug only: NOOPize filter (2+: repeat) [none out of %choices]"
        )
        options.append([["--" + optname_used], opt])

    def _figure_parser_opt_unofficial(self, options, fnc_varnames):
        # unofficial/unsupported ones
        for var in fnc_varnames:
            optname_used = cli_decor(var)
            short_aliases = shortopts.setdefault(optname_used[0], [])
            assert optname_used not in \
                   (options[i][0][0] for i in short_aliases)
            options.append([["--" + optname_used], dict(help=SUPPRESS_HELP)])

    def _figure_parser_desc_opts(self, fnc_defaults, fnc_varnames):
        readopts, shortopts, options = False, {}, []
        description = []
        fnc_varnames = set(fnc_varnames)

        for line in self.__doc__.splitlines():
            line = line.lstrip()
            if readopts:
                if not line:
                    continue
                line = line.replace('\t', ' ')
                optname, optdesc = head_tail(line.split(' ', 1))  # 2nd->tuple
                if not all((optname, optdesc)) or optname not in fnc_varnames:
                    log.debug("Bad option line: {0}".format(line))
                else:
                    optname_used = cli_decor(optname)
                    log.debug("Command `{0}', found option `{1}' ({2})".format(
                        self.__class__.name, optname_used, optname
                    ))
                    fnc_varnames.remove(optname)
                    short_aliases = shortopts.setdefault(optname_used[0], [])
                    assert optname_used not in \
                           (options[i][0][0] for i in short_aliases)
                    short_aliases.append(len(options))  # as an index
                    opt = {}
                    opt['help'] = optdesc[0].strip()
                    if optname in fnc_defaults:  # default if known
                        default = fnc_defaults[optname]
                        if default in (True, False):
                            opt['action'] = ('store_true',
                                             'store_false')[int(default)]
                            opt['help'] += " [{0}]".format('enabled' if default
                                                           else 'disabled')
                        else:
                            opt['help'] += " [%default]"
                        opt['default'] = default
                    options.append([["--" + optname_used], opt])
            elif line.lower().startswith('options:'):
                readopts = True
            else:
                description.append(line)

        for short, aliases in shortopts.iteritems():  # foreach in ideal shorts
            for i, alias in enumerate(aliases):  # foreach in conflicting ones
                for c in longopt_letters_reprio(options[alias][0][0]):
                    if c not in shortopts or i == 0:
                        use = '-' + c
                        break
                else:
                    log.warning("Could not find short option for `{0}'"
                                .format(options[alias][0]))
                    break
                options[alias][0].append(use)

        self._figure_parser_opt_noop(options, shortopts)
        self._figure_parser_opt_unofficial(options, fnc_varnames)

        description = description[:-1] if not description[-1] else description
        description = '\n'.join(description)
        return description, options

    @property
    def parser_desc_opts(self):
        """Parse docstring as description + Option constructor args list"""
        if self._desc_opts is None:
            self._desc_opts = self._figure_parser_desc_opts(
                *self._figure_fnc_defaults_varnames()
            )
        return self._desc_opts

    #
    # execution related
    #

    @staticmethod
    @selfaware
    def _iochain_check_terminals(me, io_chain, terminal_chain):
        # validate "terminal filter chain" vs "io chain"
        # 1. "shapes" match incl. input (head)/output (tail) protocol match
        if len(terminal_chain) == 1 and len(io_chain) == len(terminal_chain[0]):
            # see `deco`: 2.
            io_chain = args2tuple(io_chain)
        to_check = apply_loose_zip_preserving_depth(terminal_chain, io_chain)
        for to_check_inner in to_check:
            for passno, check in enumerate(head_tail(to_check_inner)):
                checked = apply_aggregation_preserving_depth(
                    lambda i:
                        head_tail(i[1])[0] not in getattr(i[0],
                            ('in_format', 'out_format')[passno])._protocols
                            and str(head_tail(i[1])[0]) or None
                        if protodecl(i) else i if any(i) else None
                )(to_check_inner[passno])
                checked_flat = apply_intercalate((checked,))
                for order, cmd in filter(lambda (i, x): x,
                                         enumerate(checked_flat)):
                    raise CommandError(me,
                        "filter resolution #{0} of {1}: {2}", order + 1,
                        ('input', 'output')[passno],
                        "filter/io chain definition (shape) mismatch"
                        if isinstance(cmd, (type(zip_empty), Filter))
                        else "`{0}' protocol not suitable".format(cmd)
                    )
        return to_check

    @classmethod
    def _iochain_proceed(cls, cmd_ctxt, io_chain):
        # currently works sequentially, jumping through the terminals in-order;
        # when any of them (apparently the output one) hasn't its prerequisites
        # (i.e., input data) satisfied, the run is restarted with first
        # producing such data (which are output of another filter feeding
        # the one in question) -- this can be repeated multiple times if
        # there is a longer chain forming such a gap
        # -- this is certainly needlessly slow method, but there is a hope
        #    the same approach could be applied when parallelizing the stuff
        # XXX could be made more robust (ordering still not as strict as it
        #                                should)
        # XXX some parts could be performed in parallel (requires previous
        #     item so to prevent deadlocks on cond. var. wait)
        #     - see also `heapq` standard module
        filter_backtrack = cmd_ctxt['filter_chain_analysis']['filter_backtrack']
        terminal_chain = cmd_ctxt['filter_chain_analysis']['terminal_chain']
        terminals = apply_intercalate(terminal_chain)

        terminal_chain = cls._iochain_check_terminals(io_chain, terminal_chain)

        magic_fds = {}
        input_cache = cmd_ctxt.setdefault('input_cache', {})
        worklist = list(reversed(tailshake(terminal_chain,
                                           partitioner=lambda x:
                                           not (tuplist(x)) or protodecl(x))))
        while worklist:
            flt, io_decl = worklist.pop()
            flt_ctxt = cmd_ctxt.ensure_filter(flt)
            if not filter_backtrack[flt] and not flt_ctxt['out']:
                # INFILTER in in-mode
                log.debug("Run `{0}' filter with `{1}' io decl. as INFILTER"
                          .format(flt.__class__.__name__, io_decl))
                if io_decl in input_cache:
                    in_obj = input_cache[io_decl]
                else:
                    in_obj = flt.in_format.as_instance(*io_decl)
                    input_cache[io_decl] = in_obj
            elif filter_backtrack[flt] and not flt_ctxt['out']:
                # not INFILTER in either mode (nor output already precomputed?)
                log.debug("Run `{0}' filter with `{1}' io decl. as DOWNFILTER"
                          .format(flt.__class__.__name__, io_decl))
                inputs = map(lambda x: cmd_ctxt.filter(x.__class__.__name__)['out'],
                             filter_backtrack[flt])
                notyet, ok = bifilter(lambda x:
                                  cmd_ctxt.filter(x.__class__.__name__)['out'] is None,
                                  filter_backtrack[flt])
                if notyet:
                    log.debug("Backtrack with inclusion of {0} to feed `{1}'"
                              .format(', '.join("`{0}'"
                                      .format(nt.__class__.__name__)
                                              for nt in notyet),
                                      flt.__class__.__name__))
                    worklist.append((flt, io_decl))
                    worklist.extend(reversed(tuple((ny, None)
                                             for ny in notyet)))
                    continue

                # turning @DIGIT+ magic files into fileobjs (needs global view)
                fd = SimpleFormat.io_decl_fd(io_decl)
                if fd is not None:
                    if fd not in magic_fds:
                        try:
                            magic_fds[fd] = fdopen(fd, 'ab')
                        except (OSError, IOError):
                            # keep untouched
                            pass
                    io_decl = args2sgpl(io_decl[0], magic_fds[fd], *io_decl[2:])

                assert all(inputs)
                in_obj = flt.in_format.as_instance(*inputs)
            if not flt_ctxt['out'] or flt not in terminals:
                if not flt_ctxt['out']:
                    if flt.__class__.name in cmd_ctxt['filter_noop']:
                        ret = in_obj
                    else:
                        ret = flt(in_obj, flt_ctxt)
                    flt_ctxt['out'] = ret
                if flt not in terminals or not filter_backtrack[flt]:
                    continue
            # output time!  (INFILTER terminal listed twice in io_chain)
            log.debug("Run `{0}' filter with `{1}' io decl. as TERMINAL"
                      .format(flt.__class__.__name__, io_decl))
            # XXX following could be stored somewhere, but rather pointless
            flt_ctxt['out'](*io_decl)

        map(lambda f: f.close(), magic_fds.itervalues())  # close "magic" fds
        return EC.EXIT_SUCCESS  # XXX some better decision?

    def __call__(self, opts, args=None, cmd_ctxt=None):
        """Proceed the command"""
        ec = EC.EXIT_SUCCESS
        fnc_defaults, fnc_varnames = self._figure_fnc_defaults_varnames()
        kwargs = {}
        if args:
            if '::' in args[0]:
                # desugaring, which is useful mainly if non-contiguous sequence
                # of value-based options need to be specified
                args = args[0].split('::') + args[1:]
            args.reverse()  # we will be poping from the end
        for v in fnc_varnames:
            default = fnc_defaults.get(v, None)
            opt = getattr(opts, v, default)
            if opt != default:
                kwargs[v] = opt
                continue
            while args:
                cur = args.pop()
                if cur != '':
                    kwargs[v] = cur
                    break
            if opt is None and v not in kwargs:
                raise CommandError(self, "missing ex-/implicit `{0}' value", v)
        cmd_ctxt = cmd_ctxt or CommandContext()
        cmd_ctxt.ensure_filters(apply_intercalate(self._filter_chain))
        cmd_ctxt['filter_chain_analysis'] = self.filter_chain_analysis
        cmd_ctxt['filter_noop'] = getattr(opts, 'noop', [])
        io_driver = any2iter(self._fnc(cmd_ctxt, **kwargs))
        io_handler = (self._iochain_proceed, lambda c, ec=EC.EXIT_SUCCESS: ec)
        io_driver_map = izip_longest(io_driver, io_handler)
        for driver, handler in io_driver_map:
            driver = () if driver is None else (driver, )
            ec = handler(cmd_ctxt, *driver)
            if ec != EC.EXIT_SUCCESS:
                break
        return ec

    @classmethod
    def deco(cls, *filter_chain):
        """Decorator as an easy factory of actual commands

        Parameters:
            filter_chain: particular scalars and vectors (variable depth)
                          representing graph of filters that form this command

        Note on graph representation within filter_chain:

                 __B    ___D
                /      /
            A--<___C--<             in ----------------> out
                       \
            O___________>--P


        graph with letter denoting the filters and with the left->right
        direction of flow from the inputs towards outputs (final outputs
        at terminals: B, D, P), is encoded as:

            ((A, B, (C, D, P)), (O, P), )

        where, for filter x (in {A, ..., D, O, P} for the example at hand):

            EXPRESSION  ::= UPFILTERS
            UPFILTERS   ::= TERMINAL | ( FILTERS )
            FILTERS     ::= FILTER, | FILTERS FILTER
            FILTER      ::= PASSDOWN | TERMINAL
            PASSDOWN    ::= (TERMINAL, DOWNFILTERS)
            TERMINAL    ::= x
            DOWNFILTERS ::= FILTERS

        where:
            - {UP,DOWN}FILTERS dichotomy is present only as
              a forward-reference for easier explanation
            - there is a limitation such that each filter can
              be contained as at most one node in the graph as above
              (this corresponds to the notion of inputs merge for
              the filter, as otherwise there would be ambiguity:
              in the selected notation, can the filter occurences stand
              for unique nodes?  remember, filters as singletons)
            - UPFILTERS ::= TERMINAL is a syntactic sugar exploiting
              unambiguity in converting such expression as (TERMINAL, )
            - to make it explicit, the graph is expressed in depth-first
              (or DFS) manner


        Note on the decorated function:
            It should either return an iterable or behave itself as a generator
            yielding the items (at once) and on subsequent round triggering
            some postprocessing (still from decorated function's perspective).
            The items coming from the function encodes the protocols at
            the input(s) and the output(s) of the filter graph encoded in
            `filter_chain` and ought to reflect this processing construct
            as follows:
                1. for each UPFILTER in order, there is a tuple of two parts
                   1b. first part denotes the input (only single decl)
                   2b. second part denotes the output, which follows the
                       branch of filter chain pertaining the particular
                       UPFILTER, and can be either scalar or (arbitrarily)
                       nested iterable to match that filter chain branch
                       (proper nesting is not needed, only the order is
                       important, see point 4.)
                2. if there is just one UPFILTER, the toplevel definition
                   can be just the respective un-nested item, as this case
                   is easy to distinguish and apply un-sugaring if applicable
                3. when there is the same filter down the line shared by
                   2+ UPFILTERs (cf. "limitation such that each filter" above)
                   the respective protocol encoding is expected just once
                   within the first(!) respective UPFILTER definition
                #-- not yet, if ever, as it is opposed by good diagnostics --
                #4. nesting of the second part of the tuple (2b.) is not
                #   strictly needed and only the order is important,
                #   as the association is performed on intercalated chains
                #   anyway (note that this is orthogonal to simplification 2.)

            for the graph above, it would be, e.g.,:

            (('Aproto', 'a-in.txt'),
                (('Bproto', 'b-out.txt'), (('Dproto', 'd-out.txt'), ('Pproto')))),
            (('Oproto', 'e-in.txt'), )

            #which, as per point 4., can be further simplified as:

            #(('Aproto', 'a-in.txt'),
            #    ('Bproto', 'b-out.txt'), ('Dproto', 'd-out.txt'), ('Pproto')),
            #(('Oproto', 'e-in.txt'), )
        """
        def deco_fnc(fnc):
            log.debug("Command: deco for {0}"
                      .format(fnc))
            attrs = {
                '__module__': fnc.__module__,
                '__doc__': fnc.__doc__,
                '_filter_chain': args2sgpl(filter_chain),
                '_fnc': staticmethod(fnc),
            }
            # optimization: shorten type() -> new() -> probe
            ret = cls.probe(fnc.__name__, (cls, ), attrs)
            return ret
        return deco_fnc


class CommandAlias(object):
    """Way to define either static or dynamic command alias"""
    __metaclass__ = commands

    _system = system()
    _system_extra = linux_distribution(full_distribution_name=0) \
                    if _system == 'Linux' else ()

    @classmethod
    def deco(outer_cls, decl):
        if not hasattr(decl, '__call__'):
            assert issubclass(decl, Command)
            fnc = lambda **kwargs: decl
        else:
            fnc = decl
        log.debug("CommandAlias: deco for {0}".format(fnc))

        def new(cls, cmds):
            # XXX really pass mutable cmds dict?
            use_obj = fnc(cmds, outer_cls._system, outer_cls._system_extra)
            if not isinstance(use_obj, Command):
                assert isinstance(use_obj, basestring)
                use_obj = cmds[use_obj]
                assert isinstance(use_obj, Command)
            return use_obj

        attrs = {
            '__module__': fnc.__module__,
            '__new__': new,
        }
        # optimization: shorten type() -> new() -> probe
        ret = outer_cls.probe(fnc.__name__, (outer_cls, ), attrs)
        return ret