root/pkgcore-checks/pkgcore_checks/metadata_xml.py @ ferringb%2540gmail.com-20080624174036-q45fk4fxguj9kjlv

Revision ferringb%2540gmail.com-20080624174036-q45fk4fxguj9kjlv, 8.1 kB (checked in by Brian Harring <ferringb@…>, 7 months ago)

punt trailing whitespace

Line 
1# Copyright: 2006 Brian Harring <ferringb@gmail.com>
2# License: GPL2
3
4import os
5from pkgcore_checks import base
6from snakeoil.demandload import demandload
7demandload(globals(),
8    'urllib:urlopen',
9    'tempfile:NamedTemporaryFile',
10    'pkgcore.log:logger',
11    'pkgcore.spawn:spawn,find_binary',
12)
13
14
15class base_MissingXml(base.Result):
16    """required xml file is missing"""
17
18    __slots__ = ('category', 'package', 'filename')
19    __attrs__ = __slots__
20
21    def __init__(self, filename, category, package=None):
22        base.Result.__init__(self)
23        self.category = category
24        self.package = package
25        self.filename = filename
26
27    @property
28    def _label(self):
29        if self.package is not None:
30            return "%s/%s" % (self.category, self.package)
31        return self.category
32
33    @property
34    def short_desc(self):
35        return "%s is missing %s" % (self._label, os.path.basename(self.filename))
36
37
38class base_BadlyFormedXml(base.Result):
39    """xml isn't well formed"""
40
41    __slots__ = ("category", "package", "filename")
42    __attrs__ = __slots__
43
44    def __init__(self, filename, category, package=None):
45        base.Result.__init__(self)
46        self.category = category
47        self.package = package
48        self.filename = filename
49
50    @property
51    def _label(self):
52        if self.package is not None:
53            return "%s/%s" % (self.category, self.package)
54        return self.category
55
56    @property
57    def short_desc(self):
58        return "%s %s is not well formed xml" % (self._label, os.path.basename(self.filename))
59
60
61class base_InvalidXml(base.Result):
62    """xml fails dtd validation"""
63
64    __slots__ = ("category", "package", "filename")
65    __attrs__ = __slots__
66
67    def __init__(self, filename, category, package=None):
68        base.Result.__init__(self, filename, category, package=None)
69        self.category = category
70        self.package = package
71        self.filename = filename
72
73    @property
74    def _label(self):
75        if self.package is not None:
76            return "%s/%s" % (self.category, self.package)
77        return self.category
78
79    @property
80    def short_desc(self):
81        return "%s %s violates metadata.dtd" % (self._label, os.path.basename(self.filename))
82
83
84class PkgMissingMetadataXml(base_MissingXml):
85    __slots__ = ()
86    threshold = base.package_feed
87
88class CatMissingMetadataXml(base_MissingXml):
89    __slots__ = ()
90    threshold = base.category_feed
91
92class PkgInvalidXml(base_InvalidXml):
93    __slots__ = ()
94    threshold = base.package_feed
95
96class CatInvalidXml(base_InvalidXml):
97    __slots__ = ()
98    threshold = base.category_feed
99
100class PkgBadlyFormedXml(base_BadlyFormedXml):
101    __slots__ = ()
102    threshold = base.package_feed
103
104class CatBadlyFormedXml(base_BadlyFormedXml):
105    __slots__ = ()
106    threshold = base.category_feed
107
108
109class base_check(base.Template):
110    """base class for metadata.xml scans"""
111
112    dtd_url = "http://www.gentoo.org/dtd/metadata.dtd"
113    misformed_error = None
114    invalid_error = None
115    missing_error = None
116
117    @classmethod
118    def mangle_option_parser(cls, parser):
119        if not parser.has_option('--metadata-dtd'):
120            parser.add_option(
121                '--metadata-dtd', help='location to cache %s' % (cls.dtd_url,))
122
123    def __init__(self, options):
124        base.Template.__init__(self, options)
125        self.base = getattr(options.src_repo, "base", None)
126        self.dtd_file = None
127
128    def start(self):
129        loc = self.base
130        if self.base is not None:
131            loc = os.path.join(self.base, "metadata", "dtd", "metadata.dtd")
132            if not os.path.exists(loc):
133                loc = None
134
135        if loc is not None:
136            self.dtd_loc = loc
137        else:
138            self.dtd_loc = self.options.metadata_dtd
139            if self.dtd_loc is not None:
140                if not os.path.exists(self.dtd_loc):
141                    logger.warn('metadata.dtd cannot be opened, refetching')
142                    dtd = urlopen(self.dtd_url).read()
143                    try:
144                        open(self.dtd_loc, 'w').write(dtd)
145                    except (IOError, OSError), e:
146                        logger.warn(
147                            'metadata.dtd could not be written (%s)', e)
148                        self.dtd_loc = None
149            if self.dtd_loc is None:
150                dtd = urlopen(self.dtd_url).read()
151                self.dtd_file = NamedTemporaryFile()
152                self.dtd_loc = self.dtd_file.name
153                os.chmod(self.dtd_loc, 0644)
154                self.dtd_file.write(dtd)
155                self.dtd_file.flush()
156
157        self.validator = get_validator(self.dtd_loc)
158        self.last_seen = None
159
160    def feed(self, thing, reporter):
161        raise NotImplementedError(self.feed)
162
163    def finish(self, reporter):
164        self.last_seen = None
165
166    def check_file(self, loc):
167        if not os.path.exists(loc):
168            return self.missing_error
169        ret = self.validator(loc)
170        if ret == 0:
171            return None
172        elif ret == 1:
173            return self.misformed_error
174        elif ret == 2:
175            return self.invalid_error
176        raise AssertionError("got %r from validator, which isn't "
177            "valid" % ret)
178
179
180class PackageMetadataXmlCheck(base_check):
181    """package level metadata.xml scans"""
182
183    feed_type = base.versioned_feed
184    scope = base.package_scope
185    misformed_error = PkgBadlyFormedXml
186    invalid_error = PkgInvalidXml
187    missing_error = PkgMissingMetadataXml
188
189    known_results = (PkgBadlyFormedXml, PkgInvalidXml, PkgMissingMetadataXml)
190
191    def feed(self, pkg, reporter):
192        if self.last_seen == pkg.key:
193            return
194        self.last_seen = pkg.key
195        loc = os.path.join(os.path.dirname(pkg.ebuild.get_path()),
196                           "metadata.xml")
197        ret = self.check_file(loc)
198        if ret is not None:
199            reporter.add_report(ret(loc, pkg.category, pkg.package))
200
201
202class CategoryMetadataXmlCheck(base_check):
203    """metadata.xml scans"""
204    feed_type = base.versioned_feed
205    scope = base.category_scope
206    misformed_error = CatBadlyFormedXml
207    invalid_error = CatInvalidXml
208    missing_error = CatMissingMetadataXml
209
210    known_results = (CatBadlyFormedXml, CatInvalidXml, CatMissingMetadataXml)
211
212    dtd_url = "http://www.gentoo.org/dtd/metadata.dtd"
213
214    def feed(self, pkg, reporter):
215        if self.last_seen == pkg.category:
216            return
217        self.last_seen = pkg.category
218        loc = os.path.join(self.base, pkg.category, "metadata.xml")
219        ret = self.check_file(loc)
220        if ret is not None:
221            reporter.add_report(ret(loc, pkg.category))
222
223
224_libxml2_module = None
225def get_validator(loc):
226    global _libxml2_module
227    if _libxml2_module is None:
228        try:
229            import libxml2
230            _libxml2_module = libxml2
231        except ImportError:
232            _libxml2_module = False
233
234    if _libxml2_module:
235        return libxml_parser(_libxml2_module, loc).validate
236    return xmllint_parser(loc).validate
237
238
239class libxml_parser(object):
240
241    def __init__(self, module, loc):
242        self.libxml2 = module
243        self.parsed_dtd = self.libxml2.parseDTD(None, loc)
244        self.validator = self.libxml2.newValidCtxt()
245
246    def validate(self, loc):
247        """
248        @param loc: location to verify
249        @return: 0 no issue
250                 1 badly formed
251                 2 invalid xml
252        """
253        xml = self.libxml2.createFileParserCtxt(loc)
254        xml.parseDocument()
255        if not xml.isValid():
256            return 2
257        elif not xml.doc().validateDtd(self.validator, self.parsed_dtd):
258            return 1
259        return 0
260
261
262class xmllint_parser(object):
263
264    def __init__(self, loc):
265        self.dtd_loc = loc
266        self.bin_loc = find_binary("xmllint")
267
268    def validate(self, loc):
269        """
270        @param loc: location to verify
271        @return: 0 no issue
272                 1 badly formed
273                 2 invalid xml
274        """
275        ret = spawn([self.bin_loc, "--nonet", "--noout", "--dtdvalid",
276            self.dtd_loc, loc], fd_pipes={})
277
278        if ret == 1:
279            return 1
280
281        elif ret == 3:
282            return 2
283
284        return 0
Note: See TracBrowser for help on using the browser.