197 lines
5.7 KiB
Python
197 lines
5.7 KiB
Python
from copy import copy
|
|
from typing import BinaryIO, Callable, Iterable, Protocol, Self, TextIO, TypeAlias, runtime_checkable
|
|
from bs4 import BeautifulSoup, Tag
|
|
from dataclasses import InitVar, dataclass, field
|
|
|
|
Parsable: TypeAlias = "BeautifulSoup | Tag | str | bytes | TextIO | BinaryIO"
|
|
ElementLike: TypeAlias = "BeautifulSoup | Tag | str | Element"
|
|
ElementLikeList: TypeAlias = Iterable[ElementLike]
|
|
RenderableElement: TypeAlias = "Element | Iterable[Element]"
|
|
Templatable: TypeAlias = "Template | TemplateInstance"
|
|
TemplateDict: TypeAlias = "dict[str, Template]"
|
|
|
|
class Clonable[T = Self](Protocol):
|
|
def clone(self) -> T:
|
|
return copy(self)
|
|
|
|
# tag names
|
|
PgSlotName = "puppygirl-slot"
|
|
TemplateName = "template"
|
|
SlotName = "slot"
|
|
|
|
IdAttr = "id"
|
|
NameAttr = "name"
|
|
TemplateAttr = "template"
|
|
UnnamedSlotId = "unnamed"
|
|
|
|
@dataclass
|
|
class Element(Clonable):
|
|
value: InitVar[ElementLike]
|
|
|
|
def __post_init__(self, value: ElementLike):
|
|
if isinstance(value, str):
|
|
value = BeautifulSoup(value, features = "html.parser")
|
|
|
|
if isinstance(value, BeautifulSoup):
|
|
value = next(iter(value))
|
|
|
|
self.value = value
|
|
|
|
def from_element_like(value: ElementLike) -> Self:
|
|
if isinstance(value, Element):
|
|
return value
|
|
return Element(value)
|
|
|
|
def clone(self) -> Self:
|
|
return Element(copy(self.value))
|
|
|
|
# proxy all calls to inner template
|
|
def __getattr__(self, name):
|
|
return getattr(self.value, name)
|
|
|
|
def __getitem__(self, index):
|
|
return self.value[index]
|
|
|
|
@dataclass
|
|
class Template(Element, Clonable["TemplateInstance"]):
|
|
def __post_init__(self, value: ElementLike):
|
|
Element.__post_init__(self, value)
|
|
|
|
if value.name != TemplateName:
|
|
raise TypeError(f"{value} is not a template")
|
|
|
|
if not value.has_attr(IdAttr):
|
|
raise TypeError(f"{value} missing id attribute")
|
|
|
|
def clone(self) -> "TemplateInstance":
|
|
return TemplateInstance(copy(self.value))
|
|
|
|
class TemplateSlot(Element):
|
|
is_default: bool = True
|
|
|
|
def append(self, value):
|
|
if self.is_default:
|
|
self.is_default = False
|
|
self.value.clear()
|
|
self.value.append(value)
|
|
|
|
@dataclass
|
|
class TemplateInstance(Element, Clonable):
|
|
slots: dict[str, Tag] = field(init = False)
|
|
slot_instances: dict[str, TemplateSlot] = field(default_factory=dict)
|
|
|
|
def __post_init__(self, value: ElementLike):
|
|
Element.__post_init__(self, value)
|
|
|
|
slots = {}
|
|
for slot in value.find_all(SlotName):
|
|
if slot.has_attr(NameAttr):
|
|
slots[slot[NameAttr]] = slot
|
|
else:
|
|
slots[UnnamedSlotId] = slot
|
|
|
|
self.slots = slots
|
|
|
|
def _get_slot(self, slot_name: str) -> TemplateSlot:
|
|
slot = self.slot_instances.get(slot_name)
|
|
if slot is not None: return slot
|
|
|
|
slot = TemplateSlot(self.slots.get(slot_name))
|
|
self.slot_instances[slot_name] = slot
|
|
return slot
|
|
|
|
def insert_content(self, content: ElementLike):
|
|
slot_name = content.get(SlotName)
|
|
|
|
if slot_name is None:
|
|
slot_name = UnnamedSlotId
|
|
|
|
slot = self._get_slot(slot_name)
|
|
|
|
if slot is not None:
|
|
slot.append(content)
|
|
|
|
def remove_slots(self):
|
|
for slot in self.slots.values():
|
|
slot.unwrap()
|
|
|
|
@runtime_checkable
|
|
class Renderable(Protocol):
|
|
def render(self, element: Element, templates: TemplateDict) -> RenderableElement:
|
|
return element
|
|
|
|
@dataclass
|
|
class PuppygirlTag(Renderable):
|
|
puppygirl: "Puppygirl"
|
|
|
|
@dataclass
|
|
class PuppygirlSlot(PuppygirlTag):
|
|
name: str = "puppygirl-slot"
|
|
|
|
def apply_template(self, element: Element, template: Clonable[TemplateInstance]) -> Iterable[Element]:
|
|
instance = template.clone()
|
|
|
|
for content in element.find_all(recursive=False):
|
|
instance.insert_content(content)
|
|
del content[SlotName]
|
|
|
|
instance.remove_slots()
|
|
return instance.value
|
|
|
|
def render(self, element: Element, templates: TemplateDict) -> RenderableElement:
|
|
if not element.has_attr(TemplateAttr):
|
|
return element
|
|
|
|
template = templates.get(element[TemplateAttr])
|
|
return self.apply_template(element, template)
|
|
|
|
class Puppygirl:
|
|
elements: list[Renderable]
|
|
templates: TemplateDict
|
|
|
|
def __init__(self, elements: list[Callable[[Self], Renderable]] = [], templates: ElementLikeList = []):
|
|
self.templates = Puppygirl._create_template_dict(templates)
|
|
self.elements = [self._instantiate(el) for el in elements]
|
|
|
|
def _instantiate(self, value: Callable[[Self], Renderable] | Renderable) -> Renderable:
|
|
if(isinstance(value, Callable)):
|
|
return value(self)
|
|
return value
|
|
|
|
def _create_template_dict(templates: Iterable[ElementLike]) -> TemplateDict:
|
|
templates = [Template(t) for t in templates]
|
|
return {t[IdAttr]: t for t in templates}
|
|
|
|
def add_template(self, template: ElementLike):
|
|
template = Template(template)
|
|
self._templates[template[IdAttr]] = template
|
|
|
|
def _find_local_templates(tree: BeautifulSoup) -> TemplateDict:
|
|
templates = tree.find_all(TemplateName)
|
|
templates = filter(lambda t: t.has_attr(IdAttr), templates)
|
|
return Puppygirl._create_template_dict(templates)
|
|
|
|
def fetch(self, path: str) -> BeautifulSoup:
|
|
with open(path, "r") as f:
|
|
return self.parse(f)
|
|
|
|
def parse(self, value: Parsable) -> BeautifulSoup:
|
|
if isinstance(value, BeautifulSoup) or isinstance(value, Tag):
|
|
return self.parse_tree(value)
|
|
|
|
return self.parse_tree(BeautifulSoup(value, features='html.parser'))
|
|
|
|
def parse_tree(self, tree: BeautifulSoup) -> BeautifulSoup:
|
|
templates = Puppygirl._find_local_templates(tree) | self.templates
|
|
|
|
for element in self.elements:
|
|
if hasattr(element, "name"):
|
|
for tag in tree.find_all(element.name):
|
|
new_tag = element.render(tag, templates)
|
|
if isinstance(new_tag, Iterable):
|
|
tag.extend(new_tag)
|
|
tag.unwrap()
|
|
else:
|
|
tag.replace_with(new_tag)
|
|
|
|
return tree
|