initial commit

This commit is contained in:
Rowan 2025-10-06 05:05:27 -04:00
commit 86f1023583
3 changed files with 205 additions and 0 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
__pycache__/
.venv
*.html

1
requirements.txt Normal file
View file

@ -0,0 +1 @@
beautifulsoup4~=4.14.2

201
src/puppygirl/__init__.py Normal file
View file

@ -0,0 +1,201 @@
from copy import copy
from typing import BinaryIO, Callable, Iterable, Protocol, Self, TextIO, TypeAlias, runtime_checkable
from bs4 import BeautifulSoup, Tag
from dataclasses import InitVar, dataclass, field
Parsable: TypeAlias = "BeautifulSoup | Tag | str | bytes | TextIO | BinaryIO"
ElementLike: TypeAlias = "BeautifulSoup | Tag | str | Element"
ElementLikeList: TypeAlias = Iterable[ElementLike]
RenderableElement: TypeAlias = "Element | Iterable[Element]"
Templatable: TypeAlias = "Template | TemplateInstance"
TemplateDict: TypeAlias = "dict[str, Template]"
class Clonable[T = Self](Protocol):
def clone(self) -> T:
return copy(self)
def to_element(value: ElementLike) -> "Element":
if isinstance(value, Element):
return value
return Element(value)
# tag names
PgSlotName = "puppygirl-slot"
TemplateName = "template"
SlotName = "slot"
IdAttr = "id"
NameAttr = "name"
TemplateAttr = "template"
UnnamedSlotId = "unnamed"
@dataclass
class Element(Clonable):
value: InitVar[ElementLike]
def __post_init__(self, value: ElementLike):
if isinstance(value, str):
value = BeautifulSoup(value, features = "html.parser")
if isinstance(value, BeautifulSoup):
value = next(iter(value))
self.value = value
def clone(self) -> Self:
return Element(copy(self.value))
# proxy all calls to inner template
def __getattr__(self, name):
return getattr(self.value, name)
def __getitem__(self, index):
return self.value[index]
@dataclass
class Template(Element, Clonable["TemplateInstance"]):
def __post_init__(self, value: ElementLike):
Element.__post_init__(self, value)
if value.name != TemplateName:
raise TypeError(f"{value} is not a template")
if not value.has_attr(IdAttr):
raise TypeError(f"{value} missing id attribute")
def clone(self) -> "TemplateInstance":
return TemplateInstance(copy(self.value))
class TemplateSlot(Element):
is_default: bool = True
def append(self, value):
if self.is_default:
self.is_default = False
self.value.clear()
self.value.append(value)
@dataclass
class TemplateInstance(Element, Clonable):
slots: dict[str, Tag] = field(init = False)
slot_instances: dict[str, TemplateSlot] = field(default_factory=dict)
def __post_init__(self, value: ElementLike):
Element.__post_init__(self, value)
slots = {}
for slot in value.find_all(SlotName):
if slot.has_attr(NameAttr):
slots[slot[NameAttr]] = slot
else:
slots[UnnamedSlotId] = slot
self.slots = slots
def _get_slot(self, slot_name: str) -> TemplateSlot:
slot = self.slot_instances.get(slot_name)
if slot is not None: return slot
slot = TemplateSlot(self.slots.get(slot_name))
self.slot_instances[slot_name] = slot
return slot
def insert_content(self, content: ElementLike):
slot_name = content.get(SlotName)
if slot_name is None:
slot_name = UnnamedSlotId
slot = self._get_slot(slot_name)
if slot is not None:
slot.append(content)
def remove_slots(self):
for slot in self.slots.values():
slot.unwrap()
@runtime_checkable
class Renderable(Protocol):
def render(self, element: Element, templates: TemplateDict) -> RenderableElement:
return element
@dataclass
class PuppygirlTag(Renderable):
puppygirl: "Puppygirl"
@dataclass
class PuppygirlSlot(PuppygirlTag):
name: str = "puppygirl-slot"
def apply_template(self, element: Element, template: Clonable[TemplateInstance]) -> Iterable[Element]:
instance = template.clone()
for content in element.find_all(recursive=False):
instance.insert_content(content)
del content[SlotName]
instance.remove_slots()
return instance.value
def render(self, element: Element, templates: TemplateDict) -> RenderableElement:
if not element.has_attr(TemplateAttr):
return element
template = templates.get(element[TemplateAttr])
return self.apply_template(element, template)
class Puppygirl:
elements: list[Renderable]
templates: TemplateDict
def __init__(self, elements: list[Callable[[Self], Renderable]] = [], templates: ElementLikeList = []):
self.templates = Puppygirl._create_template_dict(templates)
self.elements = [self._instantiate(el) for el in elements]
def _instantiate(self, value: Callable[[Self], Renderable] | Renderable) -> Renderable:
if(isinstance(value, Callable)):
return value(self)
return value
def _create_template_dict(templates: Iterable[ElementLike]) -> TemplateDict:
templates = [Template(t) for t in templates]
return {t[IdAttr]: t for t in templates}
def add_template(self, template: ElementLike):
template = Template(template)
self._templates[template[IdAttr]] = template
def _find_local_templates(tree: BeautifulSoup) -> TemplateDict:
templates = tree.find_all(TemplateName)
templates = filter(lambda t: t.has_attr(IdAttr), templates)
return Puppygirl._create_template_dict(templates)
def fetch(self, path: str) -> BeautifulSoup:
with open(path, "r") as f:
return self.parse(f)
def parse(self, value: Parsable) -> BeautifulSoup:
if isinstance(value, BeautifulSoup) or isinstance(value, Tag):
return self.parse_tree(value)
return self.parse_tree(BeautifulSoup(value, features='html.parser'))
def parse_tree(self, tree: BeautifulSoup) -> BeautifulSoup:
templates = Puppygirl._find_local_templates(tree) | self.templates
for element in self.elements:
if hasattr(element, "name"):
for tag in tree.find_all(element.name):
new_tag = element.render(tag, templates)
if isinstance(new_tag, Iterable):
tag.extend(new_tag)
tag.unwrap()
else:
tag.replace_with(new_tag)
return tree
pg = Puppygirl([PuppygirlSlot])
tree = pg.fetch("mdn.html")
print(tree.prettify())