diff options
Diffstat (limited to 'import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py')
-rw-r--r-- | import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py | 248 |
1 files changed, 0 insertions, 248 deletions
diff --git a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py b/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py deleted file mode 100644 index 9c6c14ee6..000000000 --- a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py +++ /dev/null @@ -1,248 +0,0 @@ -__all__ = [ - 'LXMLTreeBuilderForXML', - 'LXMLTreeBuilder', - ] - -from io import BytesIO -from io import StringIO -import collections -from lxml import etree -from bs4.element import ( - Comment, - Doctype, - NamespacedAttribute, - ProcessingInstruction, -) -from bs4.builder import ( - FAST, - HTML, - HTMLTreeBuilder, - PERMISSIVE, - ParserRejectedMarkup, - TreeBuilder, - XML) -from bs4.dammit import EncodingDetector - -LXML = 'lxml' - -class LXMLTreeBuilderForXML(TreeBuilder): - DEFAULT_PARSER_CLASS = etree.XMLParser - - is_xml = True - - NAME = "lxml-xml" - ALTERNATE_NAMES = ["xml"] - - # Well, it's permissive by XML parser standards. - features = [NAME, LXML, XML, FAST, PERMISSIVE] - - CHUNK_SIZE = 512 - - # This namespace mapping is specified in the XML Namespace - # standard. - DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"} - - def default_parser(self, encoding): - # This can either return a parser object or a class, which - # will be instantiated with default arguments. - if self._default_parser is not None: - return self._default_parser - return etree.XMLParser( - target=self, strip_cdata=False, recover=True, encoding=encoding) - - def parser_for(self, encoding): - # Use the default parser. - parser = self.default_parser(encoding) - - if isinstance(parser, collections.Callable): - # Instantiate the parser with default arguments - parser = parser(target=self, strip_cdata=False, encoding=encoding) - return parser - - def __init__(self, parser=None, empty_element_tags=None): - # TODO: Issue a warning if parser is present but not a - # callable, since that means there's no way to create new - # parsers for different encodings. - self._default_parser = parser - if empty_element_tags is not None: - self.empty_element_tags = set(empty_element_tags) - self.soup = None - self.nsmaps = [self.DEFAULT_NSMAPS] - - def _getNsTag(self, tag): - # Split the namespace URL out of a fully-qualified lxml tag - # name. Copied from lxml's src/lxml/sax.py. - if tag[0] == '{': - return tuple(tag[1:].split('}', 1)) - else: - return (None, tag) - - def prepare_markup(self, markup, user_specified_encoding=None, - exclude_encodings=None, - document_declared_encoding=None): - """ - :yield: A series of 4-tuples. - (markup, encoding, declared encoding, - has undergone character replacement) - - Each 4-tuple represents a strategy for parsing the document. - """ - if isinstance(markup, str): - # We were given Unicode. Maybe lxml can parse Unicode on - # this system? - yield markup, None, document_declared_encoding, False - - if isinstance(markup, str): - # No, apparently not. Convert the Unicode to UTF-8 and - # tell lxml to parse it as UTF-8. - yield (markup.encode("utf8"), "utf8", - document_declared_encoding, False) - - # Instead of using UnicodeDammit to convert the bytestring to - # Unicode using different encodings, use EncodingDetector to - # iterate over the encodings, and tell lxml to try to parse - # the document as each one in turn. - is_html = not self.is_xml - try_encodings = [user_specified_encoding, document_declared_encoding] - detector = EncodingDetector( - markup, try_encodings, is_html, exclude_encodings) - for encoding in detector.encodings: - yield (detector.markup, encoding, document_declared_encoding, False) - - def feed(self, markup): - if isinstance(markup, bytes): - markup = BytesIO(markup) - elif isinstance(markup, str): - markup = StringIO(markup) - - # Call feed() at least once, even if the markup is empty, - # or the parser won't be initialized. - data = markup.read(self.CHUNK_SIZE) - try: - self.parser = self.parser_for(self.soup.original_encoding) - self.parser.feed(data) - while len(data) != 0: - # Now call feed() on the rest of the data, chunk by chunk. - data = markup.read(self.CHUNK_SIZE) - if len(data) != 0: - self.parser.feed(data) - self.parser.close() - except (UnicodeDecodeError, LookupError, etree.ParserError) as e: - raise ParserRejectedMarkup(str(e)) - - def close(self): - self.nsmaps = [self.DEFAULT_NSMAPS] - - def start(self, name, attrs, nsmap={}): - # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy. - attrs = dict(attrs) - nsprefix = None - # Invert each namespace map as it comes in. - if len(self.nsmaps) > 1: - # There are no new namespaces for this tag, but - # non-default namespaces are in play, so we need a - # separate tag stack to know when they end. - self.nsmaps.append(None) - elif len(nsmap) > 0: - # A new namespace mapping has come into play. - inverted_nsmap = dict((value, key) for key, value in list(nsmap.items())) - self.nsmaps.append(inverted_nsmap) - # Also treat the namespace mapping as a set of attributes on the - # tag, so we can recreate it later. - attrs = attrs.copy() - for prefix, namespace in list(nsmap.items()): - attribute = NamespacedAttribute( - "xmlns", prefix, "http://www.w3.org/2000/xmlns/") - attrs[attribute] = namespace - - # Namespaces are in play. Find any attributes that came in - # from lxml with namespaces attached to their names, and - # turn then into NamespacedAttribute objects. - new_attrs = {} - for attr, value in list(attrs.items()): - namespace, attr = self._getNsTag(attr) - if namespace is None: - new_attrs[attr] = value - else: - nsprefix = self._prefix_for_namespace(namespace) - attr = NamespacedAttribute(nsprefix, attr, namespace) - new_attrs[attr] = value - attrs = new_attrs - - namespace, name = self._getNsTag(name) - nsprefix = self._prefix_for_namespace(namespace) - self.soup.handle_starttag(name, namespace, nsprefix, attrs) - - def _prefix_for_namespace(self, namespace): - """Find the currently active prefix for the given namespace.""" - if namespace is None: - return None - for inverted_nsmap in reversed(self.nsmaps): - if inverted_nsmap is not None and namespace in inverted_nsmap: - return inverted_nsmap[namespace] - return None - - def end(self, name): - self.soup.endData() - completed_tag = self.soup.tagStack[-1] - namespace, name = self._getNsTag(name) - nsprefix = None - if namespace is not None: - for inverted_nsmap in reversed(self.nsmaps): - if inverted_nsmap is not None and namespace in inverted_nsmap: - nsprefix = inverted_nsmap[namespace] - break - self.soup.handle_endtag(name, nsprefix) - if len(self.nsmaps) > 1: - # This tag, or one of its parents, introduced a namespace - # mapping, so pop it off the stack. - self.nsmaps.pop() - - def pi(self, target, data): - self.soup.endData() - self.soup.handle_data(target + ' ' + data) - self.soup.endData(ProcessingInstruction) - - def data(self, content): - self.soup.handle_data(content) - - def doctype(self, name, pubid, system): - self.soup.endData() - doctype = Doctype.for_name_and_ids(name, pubid, system) - self.soup.object_was_parsed(doctype) - - def comment(self, content): - "Handle comments as Comment objects." - self.soup.endData() - self.soup.handle_data(content) - self.soup.endData(Comment) - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" - return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment - - -class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): - - NAME = LXML - ALTERNATE_NAMES = ["lxml-html"] - - features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE] - is_xml = False - - def default_parser(self, encoding): - return etree.HTMLParser - - def feed(self, markup): - encoding = self.soup.original_encoding - try: - self.parser = self.parser_for(encoding) - self.parser.feed(markup) - self.parser.close() - except (UnicodeDecodeError, LookupError, etree.ParserError) as e: - raise ParserRejectedMarkup(str(e)) - - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" - return '<html><body>%s</body></html>' % fragment |