First commit

This commit is contained in:
Mindiell 2023-07-31 07:48:04 +02:00
commit d3448388d6
9 changed files with 881 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
__pycache__/

0
__init__.py Normal file
View File

546
domtree.py Normal file
View File

@ -0,0 +1,546 @@
# encoding: utf-8
"""
https://dom.spec.whatwg.org/
Other URL:
- https://fr.wikipedia.org/wiki/Document_Object_Model
"""
from enum import Enum
from tree import Node, Tree
# Move exceptions in a specific file ?
class DOMException(Exception):
pass
class IndexSizeError(DOMException):
pass
# Move Enums in a specific file ?
class NodeFilter(Enum):
SHOW_ALL = 0xffffffff
class ShadowRootMode(Enum):
open = 1
closed = 2
class SlotAssignmentMode(Enum):
manual = 1
named = 2
class Document(Node):
def __init__(self):
super().__init__(self)
self._implementation = None
self._URL = None
self._documentURI = None
self._compatMode = None
self._characterSet = None
self._charset = None
self._inputEncoding = None
self._contentType = None
self._doctype = None
self._documentElement = None
@property
def implementation(self):
return self._implementation
@property
def URL(self):
return self._URL
@property
def documentURI(self):
return self._documentURI
@property
def compatMode(self):
return self._compatMode
@property
def characterSet(self):
return self._characterSet
@property
def charset(self):
return self._charset
@property
def inputEncoding(self):
return self._inputEncoding
@property
def contentType(self):
return self._contentType
@property
def doctype(self):
return self._doctype
@property
def documentElement(self):
return self._documentElement
def getElementsByTagName(self, qualifiedName):
# TODO
pass
def getElementsByTagNameNS(self, namespace, localName):
# TODO
pass
def getElementsByClassName(self, classNames):
# TODO
pass
def createElement(self, localName, options={}):
# TODO
pass
def createElementNS(self, namespace, qualifiedName, options={}):
# TODO
pass
def createDocumentFragment(self):
# TODO
pass
def createTextNode(self, data):
# TODO
pass
def createCDATASection(self, data):
# TODO
pass
def createComment(self, data):
# TODO
pass
def createProcessingInstruction(self, target, data):
# TODO
pass
def importNode(self, node, deep=False):
# TODO
pass
def adoptNode(self, node):
# TODO
pass
def createAttribute(self, localName):
# TODO
pass
def createAttributeNS(self, namespace, qualifiedName):
# TODO
pass
def createEvent(self, interface):
# TODO
pass
def createRange(self):
# TODO
pass
def createNodeIterator(self, root, whatToShow=0xffffffff, filter=None):
# TODO
pass
def createTreeWalker(self, root, whatToShow=0xffffffff, filter=None):
# TODO
pass
class DocumentType(Node):
def __init__(self):
super().__init__(self)
self._name = None
self._publicId = None
self._systemId = None
@property
def length(self):
return 0
@property
def name(self):
return self._name
@property
def publicId(self):
return self._publicId
@property
def systemId(self):
return self._systemId
class DocumentFragment(Node):
def __init__(self):
super().__init__(self)
class Element(Node):
def __init__(self):
super().__init__(self)
self._namespaceURI = None
self._prefix = None
self._localName = None
self._tagName = None
self._classList = None
self._attributes = []
self._shadowRoot = None
self.id = None
self.className = None
self.slot = None
@property
def namespaceURI(self):
return self.namespaceURI
@property
def prefix(self):
return self._prefix
@property
def localName(self):
return self._localName
@property
def tagName(self):
return self._tagName
@property
def classList(self):
return self._classList
@property
def attributes(self):
return self._attributes
@property
def shadowRoot(self):
return self._shadowRoot
def hasAttributes(self):
return len(self._attributes) > 0
def getAttributeNames(self):
# TODO
pass
def getAttribute(self, qualifiedName):
# TODO
pass
def getAttributeNS(self, namespace, localName):
# TODO
pass
def setAttribute(self, qualifiedName, value):
# TODO
pass
def setAttributeNS(self, namespace, localName, value):
# TODO
pass
def removeAttribute(self, qualifiedName):
# TODO
pass
def removeAttributeNS(self, namespace, localName):
# TODO
pass
def toggleAttribute(self, qualifiedName, force):
# TODO
pass
def hasAttribute(self, qualifiedName):
# TODO
pass
def hasAttributeNS(self, namespace, localName):
# TODO
pass
def getAttributeNode(self, qualifiedName):
# TODO
pass
def getAttributeNodeNS(self, namespace, localName):
# TODO
pass
def setAttributeNode(self, qualifiedName, value):
# TODO
pass
def setAttributeNodeNS(self, namespace, localName, value):
# TODO
pass
def removeAttributeNode(self, qualifiedName):
# TODO
pass
def attachShadow(self, init):
# TODO
pass
def closest(self, namespace, selectors):
# TODO
pass
def matches(self, namespace, selectors):
# TODO
pass
def webkitMatchesSelector(self, selectors):
# TODO
pass
def getElementsByTagName(self, qualifiedName):
# TODO
pass
def getElementsByTagNameNS(self, namespace, localName):
# TODO
pass
def getElementsByClassName(self, classNames):
# TODO
pass
def insertAdjacentElement(self, where, element):
# TODO
pass
def insertAdjacentText(self, where, data):
# TODO
pass
def in_a_document_tree(self):
"""
An element is in a document tree if its root is a document.
"""
return isinstance(self.root, Document)
def in_a_document(self):
"""
An element is in a document if it is in a document tree.
"""
return self.in_a_document_tree()
def connected(self):
"""
An element is connected if its shadow-including root is a document.
The shadow-including root of an object is its roots hosts shadow-including
root, if the objects root is a shadow root; otherwise its root.
"""
if isinstance(self.root, ShadowRoot):
return self.root.host.root
return self.root
class CharacterData(Node):
def __init__(self):
super().__init__(self)
self.data = ""
@property
def length(self):
return len(self.data)
def _verify_offset(self, offset):
if offset > self.length:
raise(IndexSizeError(
f"Given offset ({offset}) is bigger than data length ({self.length})."
))
def sub_string(self, offset, count):
# TODO
self.verify_offset(offset)
def append_data(self, data):
# TODO
self.data += data
def insert_data(self, offset, data):
# TODO
self.verify_offset(offset)
def delete_data(self, offset, count):
# TODO
self.verify_offset(offset)
def replace_data(self, offset, count, data):
# TODO
self.verify_offset(offset)
class Attr(Node):
def __init__(self):
super().__init__(self)
self._namespaceURI = None
self._prefix = None
self._localName = None
self._name = None
self._value = None
self._ownerElement = None
self._specified = None
@property
def length(self):
return 0
@property
def namespaceURI(self):
return self._namespaceURI
@property
def prefix(self):
return self._prefix
@property
def localName(self):
return self._localName
@property
def name(self):
return self._name
@property
def value(self):
return self._value
@property
def ownerElement(self):
return self._ownerElement
@property
def specified(self):
return self._specified
class ShadowRoot(DocumentFragment):
def __init__(self):
super().__init__(self)
self._mode = None
self._delegateFocus = None
self._slotAssignment = None
self._host = None
self.onslotchange = None
@property
def mode(self):
return self._mode
@property
def delegateFocus(self):
return self._delegateFocus
@property
def slotAssignment(self):
return self._slotAssignment
@property
def host(self):
return self._host
@property
def namespaceURI(self):
return self._namespaceURI
class Text(CharacterData):
def __init__(self, data=""):
super().__init__(self)
self.data = data
@property
def wholeText(self):
# TODO
pass
def splitText(self, offset):
# TODO
pass
class CDATASection(Text):
pass
class ProcessingInstruction(CharacterData):
def __init__(self):
super().__init__(self)
self._target = None
@property
def target(self):
return self._target
class Comment(CharacterData):
def __init__(self, data=""):
super().__init__(self)
self.data = data
class NodeTree(Tree):
"""
A node tree is constrained as follows, expressed as a relationship between a node
and its potential children:
Document
In tree order
1. Zero or more ProcessingInstruction or Comment nodes.
2. Optionally one DocumentType node.
3. Zero or more ProcessingInstruction or Comment nodes.
4. Optionally one Element node.
5. Zero or more ProcessingInstruction or Comment nodes.
DocumentFragment
Element
Zero or more Element or CharacterData nodes.
DocumentType
CharacterData
Attr
No children.
"""
pass
class DocumentTree:
"""
A document tree is a node tree whose root is a document.
"""
def __init__(self):
super().__init__(self)
self.root = Document()
class ShadowTree:
"""
A shadow tree is a node tree whose root is a shadow root.
A shadow root is always attached to another node tree through its host. A shadow
tree is therefore never alone. The node tree of a shadow roots host is sometimes
referred to as the light tree.
"""
def __init__(self):
super().__init__(self)
self.root = ShadowRoot()

20
ordered_set.py Normal file
View File

@ -0,0 +1,20 @@
# encoding: utf-8
"""
https://dom.spec.whatwg.org/
Other URL:
- https://fr.wikipedia.org/wiki/Document_Object_Model
"""
def unique(sequence):
seen = set()
return [x for x in sequence if not (x in seen or seen.add(x))]
class OrderedSet:
def __init__(self, string=""):
self.elements = unique(string.split())
def serialize(self):
return " ".join(self.elements)

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
requests
beautifulsoup4
pytest

0
tests/__init__.py Normal file
View File

24
tests/test_ordered_set.py Normal file
View File

@ -0,0 +1,24 @@
# encoding: utf-8
from ordered_set import OrderedSet
class TestOrderedSet:
def setup_method(self):
self.simple_string = "This is a simple sentence."
self.peer_string = "foo bar baz foo baz bar bar bar bas"
self.simple_os = OrderedSet(self.simple_string)
self.peer_os = OrderedSet(self.peer_string)
def test_simple_string(self):
assert len(self.simple_os.elements) == 5
assert self.simple_os.elements[0] == "This"
assert self.simple_os.elements[-1] == "sentence."
def test_peer_string(self):
assert len(self.peer_os.elements) == 4
assert self.peer_os.elements[0] == "foo"
assert self.peer_os.elements[-1] == "bas"
def test_serializer(self):
assert self.simple_os.serialize() == self.simple_string

91
tests/test_tree.py Normal file
View File

@ -0,0 +1,91 @@
# encoding: utf-8
"""
Testing by creating a simple Tree :
Tree
0root0
/ \
1A0 4C1
/ \
2B0 3D1
"""
from tree import Node, Tree
class TestTree:
def setup_method(self):
self.tree = Tree()
self.node_A = Node()
self.node_B = Node()
self.node_C = Node()
self.node_D = Node()
# Building tree
self.tree.root.add_child(self.node_A)
self.node_A.add_child(self.node_B)
self.tree.root.add_child(self.node_C)
self.node_A.add_child(self.node_D)
def test_root(self):
assert self.tree.root.parent is None
assert self.node_A.root is self.tree.root
assert self.node_B.root is self.tree.root
assert self.node_C.root is self.tree.root
assert self.node_D.root is self.tree.root
def test_parent(self):
assert self.tree.root.parent is None
assert self.node_A.parent is self.tree.root
assert self.node_B.parent is self.node_A
assert self.node_C.parent is self.tree.root
assert self.node_D.parent is self.node_A
def test_descendant(self):
assert self.node_B.is_descendant(self.node_A)
assert not self.node_B.is_descendant(self.node_C)
assert self.node_B.is_inclusive_descendant(self.node_A)
assert not self.node_A.is_descendant(self.node_A)
assert self.node_A.is_inclusive_descendant(self.node_A)
def test_ancestor(self):
assert self.node_A.is_ancestor(self.node_B)
assert not self.node_C.is_ancestor(self.node_B)
assert self.node_A.is_inclusive_ancestor(self.node_B)
assert not self.node_B.is_ancestor(self.node_B)
assert self.node_B.is_inclusive_ancestor(self.node_B)
def test_first_child(self):
assert self.node_A.first_child is self.node_B
assert self.node_A.first_child is not self.node_C
assert self.node_C.first_child is None
def test_last_child(self):
assert self.node_A.last_child is self.node_D
assert self.node_C.last_child is None
def test_sibling(self):
assert self.node_A.is_sibling(self.node_B) is False
assert self.node_A.is_sibling(self.node_C) is True
assert self.node_B.is_sibling(self.node_D) is True
assert self.node_A.is_inclusive_sibling(self.node_C) is True
assert self.node_A.is_sibling(self.node_A) is True
assert self.node_A.is_inclusive_sibling(self.node_A) is True
assert self.node_A.previous_sibling is None
assert self.node_C.previous_sibling is self.node_A
assert self.node_B.previous_sibling is None
assert self.node_D.previous_sibling is self.node_B
assert self.node_A.next_sibling is self.node_C
assert self.node_C.next_sibling is None
assert self.node_B.next_sibling is self.node_D
assert self.node_D.next_sibling is None
def test_index(self):
assert self.node_A.index == 0
assert self.node_B.index == 0
assert self.node_C.index == 1
assert self.node_D.index == 1
def test_order(self):
assert self.node_A.order == 1
assert self.node_B.order == 2
assert self.node_C.order == 4
assert self.node_D.order == 3

195
tree.py Normal file
View File

@ -0,0 +1,195 @@
# encoding: utf-8
"""
https://dom.spec.whatwg.org/
Other URL:
- https://fr.wikipedia.org/wiki/Document_Object_Model
"""
class Node:
"""
An object that participates in a tree has a parent, which is either null or an
object, and has children, which is an ordered set of objects. An object A whose
parent is object B is a child of B.
"""
def __init__(self, parent=None):
self.parent = parent
self.children = []
self.order = 0
def add_child(self, child):
"""
Adding a child to Node after lasts known child.
"""
# Compute new order for modified tree
if len(self.children) == 0:
order = self.order + 1
else:
for node in self.nodes:
order = node.order + 1
# Add child to node
self.children.append(child)
child.parent = self
# Reorder tree
self.root.reorder(order)
child.order = order
@property
def root(self):
"""
The root of an object is itself, if its parent is null, or else it is the root
of its parent. The root of a tree is any object participating in that tree whose
parent is null.
"""
if self.parent is None:
return self
else:
return self.parent.root
@property
def first_child(self):
"""
The first child of an object is its first child or null if it has no children.
"""
try:
return self.children[0]
except IndexError:
return None
@property
def last_child(self):
"""
The last child of an object is its last child or null if it has no children.
"""
try:
return self.children[-1]
except IndexError:
return None
@property
def index(self):
"""
The index of an object is its number of preceding siblings, or 0 if it has none.
"""
try:
return self.parent.children.index(self)
except AttributeError:
return None
@property
def previous_sibling(self):
"""
The previous sibling of an object is its first preceding sibling or null if it
has no preceding sibling.
"""
if self.index == 0:
return None
return self.parent.children[self.index - 1]
@property
def next_sibling(self):
"""
The next sibling of an object is its first following sibling or null if it has
no following sibling.
"""
if self.parent.last_child == self:
return None
return self.parent.children[self.index + 1]
@property
def nodes(self):
"""
List Node and all of its nodes through depth-first traversal
See: https://en.wikipedia.org/wiki/Depth-first_search
"""
yield self
for child in self.children:
yield from child.nodes
def reorder(self, value):
"""
Increment all nodes with order equal or greater than value
"""
for node in self.root.nodes:
if node.order >= value:
node.order += 1
def is_descendant(self, node):
"""
An object A is called a descendant of an object B, if either A is a child of B
or A is a child of an object C that is a descendant of B.
"""
if (
self.parent == node
or self.parent is not None and self.parent.is_descendant(node)
):
return True
return False
def is_inclusive_descendant(self, node):
"""
An inclusive descendant is an object or one of its descendants.
"""
if node is self:
return True
return self.is_descendant(node)
def is_ancestor(self, node):
"""
An object A is called an ancestor of an object B if and only if B is a
descendant of A.
"""
return node.is_descendant(self)
def is_inclusive_ancestor(self, node):
"""
An inclusive ancestor is an object or one of its ancestors.
"""
if node is self:
return True
return self.is_ancestor(node)
def is_sibling(self, node):
"""
An object A is called a sibling of an object B, if and only if B and A share the
same non-null parent.
"""
if self.parent is not None and self.parent == node.parent:
return True
return False
def is_inclusive_sibling(self, node):
"""
An inclusive sibling is an object or one of its siblings.
"""
if node is self:
return True
return self.is_sibling(node)
def is_preceding(self, node):
"""
An object A is preceding an object B if A and B are in the same tree and A comes
before B in tree order.
"""
if self.root == node.root:
return True
return False
def is_following(self, node):
"""
An object A is following an object B if A and B are in the same tree and A comes
after B in tree order.
"""
if self.root == node.root:
return True
return False
class Tree:
"""
A tree is a finite hierarchical tree structure. In tree order is preorder,
depth-first traversal of a tree.
"""
def __init__(self):
self.root = Node()