initial commit
This commit is contained in:
commit
86f1023583
3 changed files with 205 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
__pycache__/
|
||||||
|
.venv
|
||||||
|
*.html
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
beautifulsoup4~=4.14.2
|
201
src/puppygirl/__init__.py
Normal file
201
src/puppygirl/__init__.py
Normal file
|
@ -0,0 +1,201 @@
|
||||||
|
from copy import copy
|
||||||
|
from typing import BinaryIO, Callable, Iterable, Protocol, Self, TextIO, TypeAlias, runtime_checkable
|
||||||
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
from dataclasses import InitVar, dataclass, field
|
||||||
|
|
||||||
|
Parsable: TypeAlias = "BeautifulSoup | Tag | str | bytes | TextIO | BinaryIO"
|
||||||
|
ElementLike: TypeAlias = "BeautifulSoup | Tag | str | Element"
|
||||||
|
ElementLikeList: TypeAlias = Iterable[ElementLike]
|
||||||
|
RenderableElement: TypeAlias = "Element | Iterable[Element]"
|
||||||
|
Templatable: TypeAlias = "Template | TemplateInstance"
|
||||||
|
TemplateDict: TypeAlias = "dict[str, Template]"
|
||||||
|
|
||||||
|
class Clonable[T = Self](Protocol):
|
||||||
|
def clone(self) -> T:
|
||||||
|
return copy(self)
|
||||||
|
|
||||||
|
def to_element(value: ElementLike) -> "Element":
|
||||||
|
if isinstance(value, Element):
|
||||||
|
return value
|
||||||
|
return Element(value)
|
||||||
|
|
||||||
|
# tag names
|
||||||
|
PgSlotName = "puppygirl-slot"
|
||||||
|
TemplateName = "template"
|
||||||
|
SlotName = "slot"
|
||||||
|
|
||||||
|
IdAttr = "id"
|
||||||
|
NameAttr = "name"
|
||||||
|
TemplateAttr = "template"
|
||||||
|
UnnamedSlotId = "unnamed"
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Element(Clonable):
|
||||||
|
value: InitVar[ElementLike]
|
||||||
|
|
||||||
|
def __post_init__(self, value: ElementLike):
|
||||||
|
if isinstance(value, str):
|
||||||
|
value = BeautifulSoup(value, features = "html.parser")
|
||||||
|
|
||||||
|
if isinstance(value, BeautifulSoup):
|
||||||
|
value = next(iter(value))
|
||||||
|
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def clone(self) -> Self:
|
||||||
|
return Element(copy(self.value))
|
||||||
|
|
||||||
|
# proxy all calls to inner template
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self.value, name)
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
return self.value[index]
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Template(Element, Clonable["TemplateInstance"]):
|
||||||
|
def __post_init__(self, value: ElementLike):
|
||||||
|
Element.__post_init__(self, value)
|
||||||
|
|
||||||
|
if value.name != TemplateName:
|
||||||
|
raise TypeError(f"{value} is not a template")
|
||||||
|
|
||||||
|
if not value.has_attr(IdAttr):
|
||||||
|
raise TypeError(f"{value} missing id attribute")
|
||||||
|
|
||||||
|
def clone(self) -> "TemplateInstance":
|
||||||
|
return TemplateInstance(copy(self.value))
|
||||||
|
|
||||||
|
class TemplateSlot(Element):
|
||||||
|
is_default: bool = True
|
||||||
|
|
||||||
|
def append(self, value):
|
||||||
|
if self.is_default:
|
||||||
|
self.is_default = False
|
||||||
|
self.value.clear()
|
||||||
|
self.value.append(value)
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TemplateInstance(Element, Clonable):
|
||||||
|
slots: dict[str, Tag] = field(init = False)
|
||||||
|
slot_instances: dict[str, TemplateSlot] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def __post_init__(self, value: ElementLike):
|
||||||
|
Element.__post_init__(self, value)
|
||||||
|
|
||||||
|
slots = {}
|
||||||
|
for slot in value.find_all(SlotName):
|
||||||
|
if slot.has_attr(NameAttr):
|
||||||
|
slots[slot[NameAttr]] = slot
|
||||||
|
else:
|
||||||
|
slots[UnnamedSlotId] = slot
|
||||||
|
|
||||||
|
self.slots = slots
|
||||||
|
|
||||||
|
def _get_slot(self, slot_name: str) -> TemplateSlot:
|
||||||
|
slot = self.slot_instances.get(slot_name)
|
||||||
|
if slot is not None: return slot
|
||||||
|
|
||||||
|
slot = TemplateSlot(self.slots.get(slot_name))
|
||||||
|
self.slot_instances[slot_name] = slot
|
||||||
|
return slot
|
||||||
|
|
||||||
|
def insert_content(self, content: ElementLike):
|
||||||
|
slot_name = content.get(SlotName)
|
||||||
|
|
||||||
|
if slot_name is None:
|
||||||
|
slot_name = UnnamedSlotId
|
||||||
|
|
||||||
|
slot = self._get_slot(slot_name)
|
||||||
|
|
||||||
|
if slot is not None:
|
||||||
|
slot.append(content)
|
||||||
|
|
||||||
|
def remove_slots(self):
|
||||||
|
for slot in self.slots.values():
|
||||||
|
slot.unwrap()
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
class Renderable(Protocol):
|
||||||
|
def render(self, element: Element, templates: TemplateDict) -> RenderableElement:
|
||||||
|
return element
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PuppygirlTag(Renderable):
|
||||||
|
puppygirl: "Puppygirl"
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PuppygirlSlot(PuppygirlTag):
|
||||||
|
name: str = "puppygirl-slot"
|
||||||
|
|
||||||
|
def apply_template(self, element: Element, template: Clonable[TemplateInstance]) -> Iterable[Element]:
|
||||||
|
instance = template.clone()
|
||||||
|
|
||||||
|
for content in element.find_all(recursive=False):
|
||||||
|
instance.insert_content(content)
|
||||||
|
del content[SlotName]
|
||||||
|
|
||||||
|
instance.remove_slots()
|
||||||
|
return instance.value
|
||||||
|
|
||||||
|
def render(self, element: Element, templates: TemplateDict) -> RenderableElement:
|
||||||
|
if not element.has_attr(TemplateAttr):
|
||||||
|
return element
|
||||||
|
|
||||||
|
template = templates.get(element[TemplateAttr])
|
||||||
|
return self.apply_template(element, template)
|
||||||
|
|
||||||
|
class Puppygirl:
|
||||||
|
elements: list[Renderable]
|
||||||
|
templates: TemplateDict
|
||||||
|
|
||||||
|
def __init__(self, elements: list[Callable[[Self], Renderable]] = [], templates: ElementLikeList = []):
|
||||||
|
self.templates = Puppygirl._create_template_dict(templates)
|
||||||
|
self.elements = [self._instantiate(el) for el in elements]
|
||||||
|
|
||||||
|
def _instantiate(self, value: Callable[[Self], Renderable] | Renderable) -> Renderable:
|
||||||
|
if(isinstance(value, Callable)):
|
||||||
|
return value(self)
|
||||||
|
return value
|
||||||
|
|
||||||
|
def _create_template_dict(templates: Iterable[ElementLike]) -> TemplateDict:
|
||||||
|
templates = [Template(t) for t in templates]
|
||||||
|
return {t[IdAttr]: t for t in templates}
|
||||||
|
|
||||||
|
def add_template(self, template: ElementLike):
|
||||||
|
template = Template(template)
|
||||||
|
self._templates[template[IdAttr]] = template
|
||||||
|
|
||||||
|
def _find_local_templates(tree: BeautifulSoup) -> TemplateDict:
|
||||||
|
templates = tree.find_all(TemplateName)
|
||||||
|
templates = filter(lambda t: t.has_attr(IdAttr), templates)
|
||||||
|
return Puppygirl._create_template_dict(templates)
|
||||||
|
|
||||||
|
def fetch(self, path: str) -> BeautifulSoup:
|
||||||
|
with open(path, "r") as f:
|
||||||
|
return self.parse(f)
|
||||||
|
|
||||||
|
def parse(self, value: Parsable) -> BeautifulSoup:
|
||||||
|
if isinstance(value, BeautifulSoup) or isinstance(value, Tag):
|
||||||
|
return self.parse_tree(value)
|
||||||
|
|
||||||
|
return self.parse_tree(BeautifulSoup(value, features='html.parser'))
|
||||||
|
|
||||||
|
def parse_tree(self, tree: BeautifulSoup) -> BeautifulSoup:
|
||||||
|
templates = Puppygirl._find_local_templates(tree) | self.templates
|
||||||
|
|
||||||
|
for element in self.elements:
|
||||||
|
if hasattr(element, "name"):
|
||||||
|
for tag in tree.find_all(element.name):
|
||||||
|
new_tag = element.render(tag, templates)
|
||||||
|
if isinstance(new_tag, Iterable):
|
||||||
|
tag.extend(new_tag)
|
||||||
|
tag.unwrap()
|
||||||
|
else:
|
||||||
|
tag.replace_with(new_tag)
|
||||||
|
|
||||||
|
return tree
|
||||||
|
|
||||||
|
pg = Puppygirl([PuppygirlSlot])
|
||||||
|
tree = pg.fetch("mdn.html")
|
||||||
|
print(tree.prettify())
|
Loading…
Add table
Reference in a new issue