root/releases/pkgcore-checks/0.3.5/pkgcore_checks/base.py @ ferringb%2540gmail.com-20070408184900-k7li2y7xdgo2sbto

Revision ferringb%2540gmail.com-20070408184900-k7li2y7xdgo2sbto, 17.3 KB (checked in by Brian Harring <ferringb@…>, 21 months ago)

pull in 449 from my branch; add attrs awareness for pickling, try it first, then fall back to slots (backwards compatible api extension iow). fixes an issue with trying to pickle/restore Result objects from metadata_xml

Line 
1# Copyright: 2006 Brian Harring <ferringb@gmail.com>
2# Copyright: 2006 Marien Zwart <marienz@gentoo.org>
3# License: GPL2
4
5
6"""Core classes and interfaces.
7
8This defines a couple of standard feed types and scopes. Currently
9feed types are strings and scopes are integers, but you should use the
10symbolic names wherever possible (everywhere except for adding a new
11feed type) since this might change in the future. Scopes are integers,
12but do not rely on that either.
13
14Feed types have to match exactly. Scopes are ordered: they define a
15minimally accepted scope, and for transforms the output scope is
16identical to the input scope.
17"""
18
19
20import operator
21
22from pkgcore.config import ConfigHint
23from pkgcore.util.compatibility import any
24from pkgcore.util.demandload import demandload
25demandload(globals(), "logging re itertools")
26
27repository_feed = "repo"
28category_feed = "cat"
29package_feed = "cat/pkg"
30versioned_feed = "cat/pkg-ver"
31ebuild_feed = "cat/pkg-ver+text"
32
33# The plugger needs to be able to compare those and know the highest one.
34version_scope, package_scope, category_scope, repository_scope = range(4)
35max_scope = repository_scope
36
37
38class Addon(object):
39
40    """Base class for extra functionality for pcheck other than a check.
41
42    The checkers can depend on one or more of these. They will get
43    called at various points where they can extend pcheck (if any
44    active checks depend on the addon).
45
46    These methods are not part of the checker interface because that
47    would mean addon functionality shared by checkers would run twice.
48    They are not plugins because they do not do anything useful if no
49    checker depending on them is active.
50
51    This interface is not finished. Expect it to grow more methods
52    (but if not overridden they will be no-ops).
53
54    @cvar required_addons: sequence of addons this one depends on.
55    """
56
57    required_addons = ()
58    known_results = []
59
60    def __init__(self, options, *args):
61        """Initialize.
62
63        An instance of every addon in required_addons is passed as extra arg.
64
65        @param options: the optparse values.
66        """
67        self.options = options
68
69    @staticmethod
70    def mangle_option_parser(parser):
71        """Add extra options and/or groups to the option parser.
72
73        This hook is always triggered, even if the checker is not
74        activated (because it runs before the commandline is parsed).
75
76        @param parser: an C{OptionParser} instance.
77        """
78
79    @staticmethod
80    def check_values(values):
81        """Postprocess the optparse values.
82
83        Should raise C{optparse.OptionValueError} on failure.
84
85        This is only called for addons that are enabled, but before
86        they are instantiated.
87        """
88
89
90class set_documentation(type):
91    def __new__(cls, name, bases, d):
92        if "__doc__" in d:
93            d.setdefault("documentation", d["__doc__"])
94        return type.__new__(cls, name, bases, d)
95
96class Template(Addon):
97
98    """Base template for a check."""
99
100    __metaclass__ = set_documentation
101
102    scope = 0
103    # The plugger sorts based on this. Should be left alone except for
104    # weird pseudo-checks like the cache wiper that influence other checks.
105    priority = 0
106
107    def start(self):
108        """Do startup here."""
109
110    def feed(self, item, reporter):
111        raise NotImplementedError
112
113    def finish(self, reporter):
114        """Do cleanup and omit final results here."""
115
116
117class Transform(object):
118
119    """Base class for a feed type transformer.
120
121    @cvar source: start type
122    @cvar dest: destination type
123    @cvar scope: minimun scope
124    @cvar cost: cost
125    """
126
127    def __init__(self, child):
128        self.child = child
129
130    def start(self):
131        """Startup."""
132        self.child.start()
133
134    def feed(self, item, reporter):
135        raise NotImplementedError
136
137    def finish(self, reporter):
138        """Clean up."""
139        self.child.finish(reporter)
140
141    def __repr__(self):
142        return '%s(%r)' % (self.__class__.__name__, self.child)
143
144    def finish(self, reporter):
145        pass
146
147
148def _collect_checks(obj):
149    if isinstance(obj, Transform):
150        i = collect_checks(obj.child)
151    elif isinstance(obj, CheckRunner):
152        i = itertools.chain(*map(collect_checks, obj.checks))
153    elif isinstance(obj, Addon):
154        i = [obj]
155    else:
156        i = itertools.chain(*map(collect_checks, i))
157    for x in i:
158        yield x
159
160def collect_checks(obj):
161    return set(_collect_checks(obj))
162
163def collect_checks_classes(obj):
164    return set(x.__class__ for x in collect_checks(obj))
165
166class Result(object):
167
168    __metaclass__ = set_documentation
169
170    __slots__ = ()
171
172    def __str__(self):
173        try:
174            return self.short_desc
175        except NotImplementedError:
176            return "result from %s" % self.__class__.__name__
177   
178    @property
179    def short_desc(self):
180        raise NotImplementedError
181
182    @property
183    def long_desc(self):
184        return self.short_desc
185   
186    def _store_cp(self, pkg):
187        self.category = pkg.category
188        self.package = pkg.package
189   
190    def _store_cpv(self, pkg):
191        self._store_cp(pkg)
192        self.version = pkg.fullver
193
194    def __getstate__(self):
195        attrs = getattr(self, '__attrs__', getattr(self, '__slots__', None))
196        if attrs:
197            try:
198                return dict((k, getattr(self, k)) for k in attrs)
199            except AttributeError, a:
200                # rethrow so we at least know the class
201                raise AttributeError(self.__class__, str(a))
202        return object.__getstate__(self)
203   
204    def __setstate__(self, data):
205        attrs = set(getattr(self, '__attrs__', getattr(self, '__slots__', [])))
206        if attrs.difference(data) or len(attrs) != len(data):
207            raise TypeError("can't restore %s due to data %r not being complete" %
208                (self.__class__, data))
209        for k, v in data.iteritems():
210            setattr(self, k, v)
211
212
213class Reporter(object):
214
215    def add_report(self, result):
216        raise NotImplementedError(self.add_report)
217
218    def start(self):
219        pass
220
221    def start_check(self, source, target):
222        pass
223   
224    def end_check(self):
225        pass
226
227    def finish(self):
228        pass
229
230
231def convert_check_filter(tok):
232    """Convert an input string into a filter function.
233
234    The filter function accepts a qualified python identifier string
235    and returns a bool.
236
237    The input can be a regexp or a simple string. A simple string must
238    match a component of the qualified name exactly. A regexp is
239    matched against the entire qualified name.
240
241    Matches are case-insensitive.
242
243    Examples::
244
245      convert_check_filter('foo')('a.foo.b') == True
246      convert_check_filter('foo')('a.foobar') == False
247      convert_check_filter('foo.*')('a.foobar') == False
248      convert_check_filter('foo.*')('foobar') == True
249    """
250    tok = tok.lower()
251    if '+' in tok or '*' in tok:
252        return re.compile(tok, re.I).match
253    else:
254        toklist = tok.split('.')
255        def func(name):
256            chunks = name.lower().split('.')
257            if len(toklist) > len(chunks):
258                return False
259            for i in xrange(len(chunks)):
260                if chunks[i:i+len(toklist)] == toklist:
261                    return True
262            return False
263        return func
264
265
266class _CheckSet(object):
267
268    """Run only listed checks."""
269
270    # No config hint here since this one is abstract.
271
272    def __init__(self, patterns):
273        self.patterns = list(convert_check_filter(pat) for pat in patterns)
274
275class Whitelist(_CheckSet):
276
277    """Only run checks matching one of the provided patterns."""
278
279    pkgcore_config_type = ConfigHint(
280        {'patterns': 'list'}, typename='pcheck_checkset')
281
282    def filter(self, checks):
283        return list(
284            c for c in checks
285            if any(f('%s.%s' % (c.__module__, c.__name__))
286                   for f in self.patterns))
287
288class Blacklist(_CheckSet):
289
290    """Only run checks not matching any of the provided patterns."""
291
292    pkgcore_config_type = ConfigHint(
293        {'patterns': 'list'}, typename='pcheck_checkset')
294
295    def filter(self, checks):
296        return list(
297            c for c in checks
298            if not any(f('%s.%s' % (c.__module__, c.__name__))
299                       for f in self.patterns))
300
301
302class Suite(object):
303
304    pkgcore_config_type = ConfigHint({
305            'target_repo': 'ref:repo', 'src_repo': 'ref:repo',
306            'checkset': 'ref:pcheck_checkset'}, typename='pcheck_suite')
307
308    def __init__(self, target_repo, checkset=None, src_repo=None):
309        self.target_repo = target_repo
310        self.checkset = checkset
311        self.src_repo = src_repo
312
313
314class CheckRunner(object):
315
316    def __init__(self, checks):
317        self.checks = checks
318
319    def start(self):
320        for check in self.checks:
321            # Intentionally not catching and logging exceptions:
322            # if we fail this early we may as well abort.
323            check.start()
324
325    def feed(self, item, reporter):
326        for check in self.checks:
327            try:
328                check.feed(item, reporter)
329            except (KeyboardInterrupt, SystemExit):
330                raise
331            except Exception:
332                logging.exception('check %r raised', check)
333
334    def finish(self, reporter):
335        for check in self.checks:
336            try:
337                check.finish(reporter)
338            except Exception:
339                logging.exception('finishing check %r failed', check)
340
341    # The plugger tests use these.
342    def __eq__(self, other):
343        return self.__class__ is other.__class__ and \
344            frozenset(self.checks) == frozenset(other.checks)
345
346    def __ne__(self, other):
347        return not self == other
348
349    def __hash__(self):
350        return hash(frozenset(self.checks))
351
352    def __repr__(self):
353        return '%s(%s)' % (self.__class__.__name__, ', '.join(sorted(
354                    str(check) for check in self.checks)))
355
356
357def plug(sinks, transforms, sources, debug=None):
358    """Plug together a pipeline.
359
360    This tries to return a single pipeline if possible (even if it is
361    more "expensive" than using separate pipelines). If more than one
362    pipeline is needed it does not try to minimize the number.
363
364    @param sinks: Sequence of check instances.
365    @param transforms: Sequence of transform classes.
366    @param sources: Sequence of source instances.
367    @param debug: A logging function or C{None}.
368    @returns: a sequence of sinks that are unreachable (out of scope or
369        missing sources/transforms of the right type),
370        a sequence of (source, consumer) tuples.
371    """
372
373    # This is not optimized to deal with huge numbers of sinks,
374    # sources and transforms, but that should not matter (although it
375    # may be necessary to handle a lot of sinks a bit better at some
376    # point, which should be fairly easy since we only care about
377    # their type and scope).
378
379    assert sinks
380
381    feed_to_transforms = {}
382    for transform in transforms:
383        feed_to_transforms.setdefault(transform.source, []).append(transform)
384
385    # Map from typename to best scope
386    best_scope = {}
387    for source in sources:
388        # (not particularly clever, if we get a ton of sources this
389        # should be optimized to do less duplicate work).
390        local_best_scope = {}
391        reachable = set()
392        todo = set([source.feed_type])
393        while todo:
394            feed_type = todo.pop()
395            reachable.add(feed_type)
396            for transform in feed_to_transforms.get(feed_type, ()):
397                if transform.scope <= source.scope and \
398                        transform.dest not in reachable:
399                    todo.add(transform.dest)
400        for feed_type in reachable:
401            scope = best_scope.get(feed_type)
402            if scope is None or scope < source.scope:
403                best_scope[feed_type] = source.scope
404
405    # Throw out unreachable sinks.
406    good_sinks = []
407    bad_sinks = []
408    for sink in sinks:
409        scope = best_scope.get(sink.feed_type)
410        if scope is None or sink.scope > scope:
411            bad_sinks.append(sink)
412        else:
413            good_sinks.append(sink)
414
415    if not good_sinks:
416        # No point in continuing.
417        return bad_sinks, ()
418
419    # Throw out all sources with a scope lower than the least required scope.
420    # Does not check transform requirements, may not be very useful.
421    lowest_required_scope = min(sink.scope for sink in good_sinks)
422    highest_required_scope = max(sink.scope for sink in good_sinks)
423    sources = list(s for s in sources if s.scope >= lowest_required_scope)
424    if not sources:
425        # No usable sources, abort.
426        return bad_sinks + good_sinks, ()
427
428    # All types we need to reach.
429    sink_types = set(sink.feed_type for sink in good_sinks)
430
431    # Map from scope, source typename to cheapest source.
432    source_map = {}
433    for new_source in sources:
434        current_source = source_map.get((new_source.scope,
435                                         new_source.feed_type))
436        if current_source is None or current_source.cost > new_source.cost:
437            source_map[new_source.scope, new_source.feed_type] = new_source
438
439    # Tuples of (visited_types, source, transforms, price)
440    pipes = set()
441    unprocessed = set(
442        (frozenset((source.feed_type,)), source, frozenset(), source.cost)
443        for source in source_map.itervalues())
444    if debug is not None:
445        for pipe in unprocessed:
446            debug('initial: %r', pipe)
447
448    # If we find a single pipeline driving all sinks we want to use it.
449    # List of tuples of source, transforms.
450    pipes_to_run = None
451    best_cost = None
452    while unprocessed:
453        next = unprocessed.pop()
454        if next in pipes:
455            continue
456        pipes.add(next)
457        visited, source, trans, cost = next
458        if visited >= sink_types:
459            # Already reaches all sink types. Check if it is usable as
460            # single pipeline:
461            if best_cost is None or cost < best_cost:
462                pipes_to_run = [(source, trans)]
463                best_cost = cost
464            # No point in growing this further: it already reaches everything.
465            continue
466        if best_cost is not None and best_cost <= cost:
467            # No point in growing this further.
468            continue
469        for transform in transforms:
470            if source.scope >= transform.scope and \
471                    transform.source in visited and \
472                    transform.dest not in visited:
473                unprocessed.add((
474                        visited.union((transform.dest,)), source,
475                        trans.union((transform,)), cost + transform.cost))
476                if debug is not None:
477                    debug(
478                        'growing %r for %r with %r', trans, source, transform)
479
480    if pipes_to_run is None:
481        # No single pipe will drive everything, try combining pipes.
482        # This is pretty stupid but effective. Map sources to
483        # pipelines they drive, try combinations of sources (using a
484        # source more than once in a combination makes no sense since
485        # we also have the "combined" pipeline in pipes).
486        source_to_pipes = {}
487        for visited, source, trans, cost in pipes:
488            source_to_pipes.setdefault(source, []).append(
489                (visited, trans, cost))
490        unprocessed = set(
491            (visited, frozenset([source]), ((source, trans),), cost)
492            for visited, source, trans, cost in pipes)
493        done = set()
494        while unprocessed:
495            next = unprocessed.pop()
496            if next in done:
497                continue
498            done.add(next)
499            visited, sources, seq, cost = next
500            if visited >= sink_types:
501                # This combination reaches everything.
502                if best_cost is None or cost < best_cost:
503                    pipes_to_run = seq
504                    best_cost = cost
505                # No point in growing this further.
506            if best_cost is not None and best_cost <= cost:
507                # No point in growing this further.
508                continue
509            for source, source_pipes in source_to_pipes.iteritems():
510                if source not in sources:
511                    for new_visited, trans, new_cost in source_pipes:
512                        unprocessed.add((
513                                visited.union(new_visited),
514                                sources.union([source]),
515                                seq + ((source, trans),),
516                                cost + new_cost))
517
518    # Just an assert since unreachable sinks should have been thrown away.
519    assert pipes_to_run, 'did not find a solution?'
520
521    good_sinks.sort(key=operator.attrgetter('priority'))
522
523    def build_transform(scope, feed_type, transforms):
524        children = list(
525            # Note this relies on the cheapest pipe not having
526            # any "loops" in its transforms.
527            trans(build_transform(scope, trans.dest, transforms))
528            for trans in transforms
529            if trans.source == feed_type and trans.scope <= scope)
530        # Hacky: we modify this in place.
531        for i in reversed(xrange(len(good_sinks))):
532            sink = good_sinks[i]
533            if sink.feed_type == feed_type and sink.scope <= source.scope:
534                children.append(sink)
535                del good_sinks[i]
536        return CheckRunner(children)
537
538    result = list(
539        (source, build_transform(source.scope, source.feed_type, transforms))
540        for source, transforms in pipes_to_run)
541
542    assert not good_sinks, 'sinks left: %r' % (good_sinks,)
543    return bad_sinks, result
Note: See TracBrowser for help on using the browser.