From 86f1023583da66473ffbde3aac92045efeb387b4 Mon Sep 17 00:00:00 2001 From: rowan Date: Mon, 6 Oct 2025 05:05:27 -0400 Subject: [PATCH] initial commit --- .gitignore | 3 + requirements.txt | 1 + src/puppygirl/__init__.py | 201 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 205 insertions(+) create mode 100644 .gitignore create mode 100644 requirements.txt create mode 100644 src/puppygirl/__init__.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c5e7d4d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.venv +*.html diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..530985c --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +beautifulsoup4~=4.14.2 \ No newline at end of file diff --git a/src/puppygirl/__init__.py b/src/puppygirl/__init__.py new file mode 100644 index 0000000..0198dd9 --- /dev/null +++ b/src/puppygirl/__init__.py @@ -0,0 +1,201 @@ +from copy import copy +from typing import BinaryIO, Callable, Iterable, Protocol, Self, TextIO, TypeAlias, runtime_checkable +from bs4 import BeautifulSoup, Tag +from dataclasses import InitVar, dataclass, field + +Parsable: TypeAlias = "BeautifulSoup | Tag | str | bytes | TextIO | BinaryIO" +ElementLike: TypeAlias = "BeautifulSoup | Tag | str | Element" +ElementLikeList: TypeAlias = Iterable[ElementLike] +RenderableElement: TypeAlias = "Element | Iterable[Element]" +Templatable: TypeAlias = "Template | TemplateInstance" +TemplateDict: TypeAlias = "dict[str, Template]" + +class Clonable[T = Self](Protocol): + def clone(self) -> T: + return copy(self) + +def to_element(value: ElementLike) -> "Element": + if isinstance(value, Element): + return value + return Element(value) + +# tag names +PgSlotName = "puppygirl-slot" +TemplateName = "template" +SlotName = "slot" + +IdAttr = "id" +NameAttr = "name" +TemplateAttr = "template" +UnnamedSlotId = "unnamed" + +@dataclass +class Element(Clonable): + value: InitVar[ElementLike] + + def __post_init__(self, value: ElementLike): + if isinstance(value, str): + value = BeautifulSoup(value, features = "html.parser") + + if isinstance(value, BeautifulSoup): + value = next(iter(value)) + + self.value = value + + def clone(self) -> Self: + return Element(copy(self.value)) + + # proxy all calls to inner template + def __getattr__(self, name): + return getattr(self.value, name) + + def __getitem__(self, index): + return self.value[index] + +@dataclass +class Template(Element, Clonable["TemplateInstance"]): + def __post_init__(self, value: ElementLike): + Element.__post_init__(self, value) + + if value.name != TemplateName: + raise TypeError(f"{value} is not a template") + + if not value.has_attr(IdAttr): + raise TypeError(f"{value} missing id attribute") + + def clone(self) -> "TemplateInstance": + return TemplateInstance(copy(self.value)) + +class TemplateSlot(Element): + is_default: bool = True + + def append(self, value): + if self.is_default: + self.is_default = False + self.value.clear() + self.value.append(value) + +@dataclass +class TemplateInstance(Element, Clonable): + slots: dict[str, Tag] = field(init = False) + slot_instances: dict[str, TemplateSlot] = field(default_factory=dict) + + def __post_init__(self, value: ElementLike): + Element.__post_init__(self, value) + + slots = {} + for slot in value.find_all(SlotName): + if slot.has_attr(NameAttr): + slots[slot[NameAttr]] = slot + else: + slots[UnnamedSlotId] = slot + + self.slots = slots + + def _get_slot(self, slot_name: str) -> TemplateSlot: + slot = self.slot_instances.get(slot_name) + if slot is not None: return slot + + slot = TemplateSlot(self.slots.get(slot_name)) + self.slot_instances[slot_name] = slot + return slot + + def insert_content(self, content: ElementLike): + slot_name = content.get(SlotName) + + if slot_name is None: + slot_name = UnnamedSlotId + + slot = self._get_slot(slot_name) + + if slot is not None: + slot.append(content) + + def remove_slots(self): + for slot in self.slots.values(): + slot.unwrap() + +@runtime_checkable +class Renderable(Protocol): + def render(self, element: Element, templates: TemplateDict) -> RenderableElement: + return element + +@dataclass +class PuppygirlTag(Renderable): + puppygirl: "Puppygirl" + +@dataclass +class PuppygirlSlot(PuppygirlTag): + name: str = "puppygirl-slot" + + def apply_template(self, element: Element, template: Clonable[TemplateInstance]) -> Iterable[Element]: + instance = template.clone() + + for content in element.find_all(recursive=False): + instance.insert_content(content) + del content[SlotName] + + instance.remove_slots() + return instance.value + + def render(self, element: Element, templates: TemplateDict) -> RenderableElement: + if not element.has_attr(TemplateAttr): + return element + + template = templates.get(element[TemplateAttr]) + return self.apply_template(element, template) + +class Puppygirl: + elements: list[Renderable] + templates: TemplateDict + + def __init__(self, elements: list[Callable[[Self], Renderable]] = [], templates: ElementLikeList = []): + self.templates = Puppygirl._create_template_dict(templates) + self.elements = [self._instantiate(el) for el in elements] + + def _instantiate(self, value: Callable[[Self], Renderable] | Renderable) -> Renderable: + if(isinstance(value, Callable)): + return value(self) + return value + + def _create_template_dict(templates: Iterable[ElementLike]) -> TemplateDict: + templates = [Template(t) for t in templates] + return {t[IdAttr]: t for t in templates} + + def add_template(self, template: ElementLike): + template = Template(template) + self._templates[template[IdAttr]] = template + + def _find_local_templates(tree: BeautifulSoup) -> TemplateDict: + templates = tree.find_all(TemplateName) + templates = filter(lambda t: t.has_attr(IdAttr), templates) + return Puppygirl._create_template_dict(templates) + + def fetch(self, path: str) -> BeautifulSoup: + with open(path, "r") as f: + return self.parse(f) + + def parse(self, value: Parsable) -> BeautifulSoup: + if isinstance(value, BeautifulSoup) or isinstance(value, Tag): + return self.parse_tree(value) + + return self.parse_tree(BeautifulSoup(value, features='html.parser')) + + def parse_tree(self, tree: BeautifulSoup) -> BeautifulSoup: + templates = Puppygirl._find_local_templates(tree) | self.templates + + for element in self.elements: + if hasattr(element, "name"): + for tag in tree.find_all(element.name): + new_tag = element.render(tag, templates) + if isinstance(new_tag, Iterable): + tag.extend(new_tag) + tag.unwrap() + else: + tag.replace_with(new_tag) + + return tree + +pg = Puppygirl([PuppygirlSlot]) +tree = pg.fetch("mdn.html") +print(tree.prettify())