From d3448388d6f91db10cfad22e0308d0aafcf3eba2 Mon Sep 17 00:00:00 2001 From: Mindiell Date: Mon, 31 Jul 2023 07:48:04 +0200 Subject: [PATCH] First commit --- .gitignore | 2 + __init__.py | 0 domtree.py | 546 ++++++++++++++++++++++++++++++++++++++ ordered_set.py | 20 ++ requirements.txt | 3 + tests/__init__.py | 0 tests/test_ordered_set.py | 24 ++ tests/test_tree.py | 91 +++++++ tree.py | 195 ++++++++++++++ 9 files changed, 881 insertions(+) create mode 100644 .gitignore create mode 100644 __init__.py create mode 100644 domtree.py create mode 100644 ordered_set.py create mode 100644 requirements.txt create mode 100644 tests/__init__.py create mode 100644 tests/test_ordered_set.py create mode 100644 tests/test_tree.py create mode 100644 tree.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..372c13e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ + diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/domtree.py b/domtree.py new file mode 100644 index 0000000..0569470 --- /dev/null +++ b/domtree.py @@ -0,0 +1,546 @@ +# encoding: utf-8 +""" +https://dom.spec.whatwg.org/ + +Other URL: +- https://fr.wikipedia.org/wiki/Document_Object_Model +""" + +from enum import Enum + +from tree import Node, Tree + + +# Move exceptions in a specific file ? +class DOMException(Exception): + pass + + +class IndexSizeError(DOMException): + pass + + +# Move Enums in a specific file ? +class NodeFilter(Enum): + SHOW_ALL = 0xffffffff + + +class ShadowRootMode(Enum): + open = 1 + closed = 2 + + +class SlotAssignmentMode(Enum): + manual = 1 + named = 2 + + +class Document(Node): + def __init__(self): + super().__init__(self) + self._implementation = None + self._URL = None + self._documentURI = None + self._compatMode = None + self._characterSet = None + self._charset = None + self._inputEncoding = None + self._contentType = None + self._doctype = None + self._documentElement = None + + @property + def implementation(self): + return self._implementation + + @property + def URL(self): + return self._URL + + @property + def documentURI(self): + return self._documentURI + + @property + def compatMode(self): + return self._compatMode + + @property + def characterSet(self): + return self._characterSet + + @property + def charset(self): + return self._charset + + @property + def inputEncoding(self): + return self._inputEncoding + + @property + def contentType(self): + return self._contentType + + @property + def doctype(self): + return self._doctype + + @property + def documentElement(self): + return self._documentElement + + def getElementsByTagName(self, qualifiedName): + # TODO + pass + + def getElementsByTagNameNS(self, namespace, localName): + # TODO + pass + + def getElementsByClassName(self, classNames): + # TODO + pass + + def createElement(self, localName, options={}): + # TODO + pass + + def createElementNS(self, namespace, qualifiedName, options={}): + # TODO + pass + + def createDocumentFragment(self): + # TODO + pass + + def createTextNode(self, data): + # TODO + pass + + def createCDATASection(self, data): + # TODO + pass + + def createComment(self, data): + # TODO + pass + + def createProcessingInstruction(self, target, data): + # TODO + pass + + def importNode(self, node, deep=False): + # TODO + pass + + def adoptNode(self, node): + # TODO + pass + + def createAttribute(self, localName): + # TODO + pass + + def createAttributeNS(self, namespace, qualifiedName): + # TODO + pass + + def createEvent(self, interface): + # TODO + pass + + def createRange(self): + # TODO + pass + + def createNodeIterator(self, root, whatToShow=0xffffffff, filter=None): + # TODO + pass + + def createTreeWalker(self, root, whatToShow=0xffffffff, filter=None): + # TODO + pass + + +class DocumentType(Node): + def __init__(self): + super().__init__(self) + self._name = None + self._publicId = None + self._systemId = None + + @property + def length(self): + return 0 + + @property + def name(self): + return self._name + + @property + def publicId(self): + return self._publicId + + @property + def systemId(self): + return self._systemId + + +class DocumentFragment(Node): + def __init__(self): + super().__init__(self) + + +class Element(Node): + def __init__(self): + super().__init__(self) + self._namespaceURI = None + self._prefix = None + self._localName = None + self._tagName = None + self._classList = None + self._attributes = [] + self._shadowRoot = None + self.id = None + self.className = None + self.slot = None + + @property + def namespaceURI(self): + return self.namespaceURI + + @property + def prefix(self): + return self._prefix + + @property + def localName(self): + return self._localName + + @property + def tagName(self): + return self._tagName + + @property + def classList(self): + return self._classList + + @property + def attributes(self): + return self._attributes + + @property + def shadowRoot(self): + return self._shadowRoot + + def hasAttributes(self): + return len(self._attributes) > 0 + + def getAttributeNames(self): + # TODO + pass + + def getAttribute(self, qualifiedName): + # TODO + pass + + def getAttributeNS(self, namespace, localName): + # TODO + pass + + def setAttribute(self, qualifiedName, value): + # TODO + pass + + def setAttributeNS(self, namespace, localName, value): + # TODO + pass + + def removeAttribute(self, qualifiedName): + # TODO + pass + + def removeAttributeNS(self, namespace, localName): + # TODO + pass + + def toggleAttribute(self, qualifiedName, force): + # TODO + pass + + def hasAttribute(self, qualifiedName): + # TODO + pass + + def hasAttributeNS(self, namespace, localName): + # TODO + pass + + def getAttributeNode(self, qualifiedName): + # TODO + pass + + def getAttributeNodeNS(self, namespace, localName): + # TODO + pass + + def setAttributeNode(self, qualifiedName, value): + # TODO + pass + + def setAttributeNodeNS(self, namespace, localName, value): + # TODO + pass + + def removeAttributeNode(self, qualifiedName): + # TODO + pass + + def attachShadow(self, init): + # TODO + pass + + def closest(self, namespace, selectors): + # TODO + pass + + def matches(self, namespace, selectors): + # TODO + pass + + def webkitMatchesSelector(self, selectors): + # TODO + pass + + def getElementsByTagName(self, qualifiedName): + # TODO + pass + + def getElementsByTagNameNS(self, namespace, localName): + # TODO + pass + + def getElementsByClassName(self, classNames): + # TODO + pass + + def insertAdjacentElement(self, where, element): + # TODO + pass + + def insertAdjacentText(self, where, data): + # TODO + pass + + def in_a_document_tree(self): + """ + An element is in a document tree if its root is a document. + """ + return isinstance(self.root, Document) + + def in_a_document(self): + """ + An element is in a document if it is in a document tree. + """ + return self.in_a_document_tree() + + def connected(self): + """ + An element is connected if its shadow-including root is a document. + + The shadow-including root of an object is its root’s host’s shadow-including + root, if the object’s root is a shadow root; otherwise its root. + """ + if isinstance(self.root, ShadowRoot): + return self.root.host.root + return self.root + + +class CharacterData(Node): + def __init__(self): + super().__init__(self) + self.data = "" + + @property + def length(self): + return len(self.data) + + def _verify_offset(self, offset): + if offset > self.length: + raise(IndexSizeError( + f"Given offset ({offset}) is bigger than data length ({self.length})." + )) + + def sub_string(self, offset, count): + # TODO + self.verify_offset(offset) + + def append_data(self, data): + # TODO + self.data += data + + def insert_data(self, offset, data): + # TODO + self.verify_offset(offset) + + def delete_data(self, offset, count): + # TODO + self.verify_offset(offset) + + def replace_data(self, offset, count, data): + # TODO + self.verify_offset(offset) + + +class Attr(Node): + def __init__(self): + super().__init__(self) + self._namespaceURI = None + self._prefix = None + self._localName = None + self._name = None + self._value = None + self._ownerElement = None + self._specified = None + + @property + def length(self): + return 0 + + @property + def namespaceURI(self): + return self._namespaceURI + + @property + def prefix(self): + return self._prefix + + @property + def localName(self): + return self._localName + + @property + def name(self): + return self._name + + @property + def value(self): + return self._value + + @property + def ownerElement(self): + return self._ownerElement + + @property + def specified(self): + return self._specified + + +class ShadowRoot(DocumentFragment): + def __init__(self): + super().__init__(self) + self._mode = None + self._delegateFocus = None + self._slotAssignment = None + self._host = None + self.onslotchange = None + + @property + def mode(self): + return self._mode + + @property + def delegateFocus(self): + return self._delegateFocus + + @property + def slotAssignment(self): + return self._slotAssignment + + @property + def host(self): + return self._host + + @property + def namespaceURI(self): + return self._namespaceURI + + + +class Text(CharacterData): + def __init__(self, data=""): + super().__init__(self) + self.data = data + + @property + def wholeText(self): + # TODO + pass + + def splitText(self, offset): + # TODO + pass + + +class CDATASection(Text): + pass + + +class ProcessingInstruction(CharacterData): + def __init__(self): + super().__init__(self) + self._target = None + + @property + def target(self): + return self._target + + +class Comment(CharacterData): + def __init__(self, data=""): + super().__init__(self) + self.data = data + + +class NodeTree(Tree): + """ + A node tree is constrained as follows, expressed as a relationship between a node + and its potential children: + Document + In tree order + 1. Zero or more ProcessingInstruction or Comment nodes. + 2. Optionally one DocumentType node. + 3. Zero or more ProcessingInstruction or Comment nodes. + 4. Optionally one Element node. + 5. Zero or more ProcessingInstruction or Comment nodes. + DocumentFragment + Element + Zero or more Element or CharacterData nodes. + DocumentType + CharacterData + Attr + No children. + """ + pass + + +class DocumentTree: + """ + A document tree is a node tree whose root is a document. + """ + def __init__(self): + super().__init__(self) + self.root = Document() + + +class ShadowTree: + """ + A shadow tree is a node tree whose root is a shadow root. + + A shadow root is always attached to another node tree through its host. A shadow + tree is therefore never alone. The node tree of a shadow root’s host is sometimes + referred to as the light tree. + """ + def __init__(self): + super().__init__(self) + self.root = ShadowRoot() diff --git a/ordered_set.py b/ordered_set.py new file mode 100644 index 0000000..8f33099 --- /dev/null +++ b/ordered_set.py @@ -0,0 +1,20 @@ +# encoding: utf-8 +""" +https://dom.spec.whatwg.org/ + +Other URL: +- https://fr.wikipedia.org/wiki/Document_Object_Model +""" + + +def unique(sequence): + seen = set() + return [x for x in sequence if not (x in seen or seen.add(x))] + + +class OrderedSet: + def __init__(self, string=""): + self.elements = unique(string.split()) + + def serialize(self): + return " ".join(self.elements) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1c466e0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +requests +beautifulsoup4 +pytest diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_ordered_set.py b/tests/test_ordered_set.py new file mode 100644 index 0000000..3aba0c5 --- /dev/null +++ b/tests/test_ordered_set.py @@ -0,0 +1,24 @@ +# encoding: utf-8 + +from ordered_set import OrderedSet + + +class TestOrderedSet: + def setup_method(self): + self.simple_string = "This is a simple sentence." + self.peer_string = "foo bar baz foo baz bar bar bar bas" + self.simple_os = OrderedSet(self.simple_string) + self.peer_os = OrderedSet(self.peer_string) + + def test_simple_string(self): + assert len(self.simple_os.elements) == 5 + assert self.simple_os.elements[0] == "This" + assert self.simple_os.elements[-1] == "sentence." + + def test_peer_string(self): + assert len(self.peer_os.elements) == 4 + assert self.peer_os.elements[0] == "foo" + assert self.peer_os.elements[-1] == "bas" + + def test_serializer(self): + assert self.simple_os.serialize() == self.simple_string diff --git a/tests/test_tree.py b/tests/test_tree.py new file mode 100644 index 0000000..5244205 --- /dev/null +++ b/tests/test_tree.py @@ -0,0 +1,91 @@ +# encoding: utf-8 +""" +Testing by creating a simple Tree : + Tree + 0root0 + / \ + 1A0 4C1 + / \ + 2B0 3D1 +""" + +from tree import Node, Tree + + +class TestTree: + def setup_method(self): + self.tree = Tree() + self.node_A = Node() + self.node_B = Node() + self.node_C = Node() + self.node_D = Node() + # Building tree + self.tree.root.add_child(self.node_A) + self.node_A.add_child(self.node_B) + self.tree.root.add_child(self.node_C) + self.node_A.add_child(self.node_D) + + def test_root(self): + assert self.tree.root.parent is None + assert self.node_A.root is self.tree.root + assert self.node_B.root is self.tree.root + assert self.node_C.root is self.tree.root + assert self.node_D.root is self.tree.root + + def test_parent(self): + assert self.tree.root.parent is None + assert self.node_A.parent is self.tree.root + assert self.node_B.parent is self.node_A + assert self.node_C.parent is self.tree.root + assert self.node_D.parent is self.node_A + + def test_descendant(self): + assert self.node_B.is_descendant(self.node_A) + assert not self.node_B.is_descendant(self.node_C) + assert self.node_B.is_inclusive_descendant(self.node_A) + assert not self.node_A.is_descendant(self.node_A) + assert self.node_A.is_inclusive_descendant(self.node_A) + + def test_ancestor(self): + assert self.node_A.is_ancestor(self.node_B) + assert not self.node_C.is_ancestor(self.node_B) + assert self.node_A.is_inclusive_ancestor(self.node_B) + assert not self.node_B.is_ancestor(self.node_B) + assert self.node_B.is_inclusive_ancestor(self.node_B) + + def test_first_child(self): + assert self.node_A.first_child is self.node_B + assert self.node_A.first_child is not self.node_C + assert self.node_C.first_child is None + + def test_last_child(self): + assert self.node_A.last_child is self.node_D + assert self.node_C.last_child is None + + def test_sibling(self): + assert self.node_A.is_sibling(self.node_B) is False + assert self.node_A.is_sibling(self.node_C) is True + assert self.node_B.is_sibling(self.node_D) is True + assert self.node_A.is_inclusive_sibling(self.node_C) is True + assert self.node_A.is_sibling(self.node_A) is True + assert self.node_A.is_inclusive_sibling(self.node_A) is True + assert self.node_A.previous_sibling is None + assert self.node_C.previous_sibling is self.node_A + assert self.node_B.previous_sibling is None + assert self.node_D.previous_sibling is self.node_B + assert self.node_A.next_sibling is self.node_C + assert self.node_C.next_sibling is None + assert self.node_B.next_sibling is self.node_D + assert self.node_D.next_sibling is None + + def test_index(self): + assert self.node_A.index == 0 + assert self.node_B.index == 0 + assert self.node_C.index == 1 + assert self.node_D.index == 1 + + def test_order(self): + assert self.node_A.order == 1 + assert self.node_B.order == 2 + assert self.node_C.order == 4 + assert self.node_D.order == 3 diff --git a/tree.py b/tree.py new file mode 100644 index 0000000..442ac98 --- /dev/null +++ b/tree.py @@ -0,0 +1,195 @@ +# encoding: utf-8 +""" +https://dom.spec.whatwg.org/ + +Other URL: +- https://fr.wikipedia.org/wiki/Document_Object_Model +""" + + +class Node: + """ + An object that participates in a tree has a parent, which is either null or an + object, and has children, which is an ordered set of objects. An object A whose + parent is object B is a child of B. + """ + def __init__(self, parent=None): + self.parent = parent + self.children = [] + self.order = 0 + + def add_child(self, child): + """ + Adding a child to Node after lasts known child. + """ + # Compute new order for modified tree + if len(self.children) == 0: + order = self.order + 1 + else: + for node in self.nodes: + order = node.order + 1 + # Add child to node + self.children.append(child) + child.parent = self + # Reorder tree + self.root.reorder(order) + child.order = order + + @property + def root(self): + """ + The root of an object is itself, if its parent is null, or else it is the root + of its parent. The root of a tree is any object participating in that tree whose + parent is null. + """ + if self.parent is None: + return self + else: + return self.parent.root + + @property + def first_child(self): + """ + The first child of an object is its first child or null if it has no children. + """ + try: + return self.children[0] + except IndexError: + return None + + @property + def last_child(self): + """ + The last child of an object is its last child or null if it has no children. + """ + try: + return self.children[-1] + except IndexError: + return None + + @property + def index(self): + """ + The index of an object is its number of preceding siblings, or 0 if it has none. + """ + try: + return self.parent.children.index(self) + except AttributeError: + return None + + @property + def previous_sibling(self): + """ + The previous sibling of an object is its first preceding sibling or null if it + has no preceding sibling. + """ + if self.index == 0: + return None + return self.parent.children[self.index - 1] + + @property + def next_sibling(self): + """ + The next sibling of an object is its first following sibling or null if it has + no following sibling. + """ + if self.parent.last_child == self: + return None + return self.parent.children[self.index + 1] + + @property + def nodes(self): + """ + List Node and all of its nodes through depth-first traversal + See: https://en.wikipedia.org/wiki/Depth-first_search + """ + yield self + for child in self.children: + yield from child.nodes + + def reorder(self, value): + """ + Increment all nodes with order equal or greater than value + """ + for node in self.root.nodes: + if node.order >= value: + node.order += 1 + + def is_descendant(self, node): + """ + An object A is called a descendant of an object B, if either A is a child of B + or A is a child of an object C that is a descendant of B. + """ + if ( + self.parent == node + or self.parent is not None and self.parent.is_descendant(node) + ): + return True + return False + + def is_inclusive_descendant(self, node): + """ + An inclusive descendant is an object or one of its descendants. + """ + if node is self: + return True + return self.is_descendant(node) + + def is_ancestor(self, node): + """ + An object A is called an ancestor of an object B if and only if B is a + descendant of A. + """ + return node.is_descendant(self) + + def is_inclusive_ancestor(self, node): + """ + An inclusive ancestor is an object or one of its ancestors. + """ + if node is self: + return True + return self.is_ancestor(node) + + def is_sibling(self, node): + """ + An object A is called a sibling of an object B, if and only if B and A share the + same non-null parent. + """ + if self.parent is not None and self.parent == node.parent: + return True + return False + + def is_inclusive_sibling(self, node): + """ + An inclusive sibling is an object or one of its siblings. + """ + if node is self: + return True + return self.is_sibling(node) + + def is_preceding(self, node): + """ + An object A is preceding an object B if A and B are in the same tree and A comes + before B in tree order. + """ + if self.root == node.root: + return True + return False + + def is_following(self, node): + """ + An object A is following an object B if A and B are in the same tree and A comes + after B in tree order. + """ + if self.root == node.root: + return True + return False + + +class Tree: + """ + A tree is a finite hierarchical tree structure. In tree order is preorder, + depth-first traversal of a tree. + """ + def __init__(self): + self.root = Node()