initial commit
This commit is contained in:
commit
86f1023583
3 changed files with 205 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
__pycache__/
|
||||
.venv
|
||||
*.html
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
|
@ -0,0 +1 @@
|
|||
beautifulsoup4~=4.14.2
|
201
src/puppygirl/__init__.py
Normal file
201
src/puppygirl/__init__.py
Normal file
|
@ -0,0 +1,201 @@
|
|||
from copy import copy
|
||||
from typing import BinaryIO, Callable, Iterable, Protocol, Self, TextIO, TypeAlias, runtime_checkable
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from dataclasses import InitVar, dataclass, field
|
||||
|
||||
Parsable: TypeAlias = "BeautifulSoup | Tag | str | bytes | TextIO | BinaryIO"
|
||||
ElementLike: TypeAlias = "BeautifulSoup | Tag | str | Element"
|
||||
ElementLikeList: TypeAlias = Iterable[ElementLike]
|
||||
RenderableElement: TypeAlias = "Element | Iterable[Element]"
|
||||
Templatable: TypeAlias = "Template | TemplateInstance"
|
||||
TemplateDict: TypeAlias = "dict[str, Template]"
|
||||
|
||||
class Clonable[T = Self](Protocol):
|
||||
def clone(self) -> T:
|
||||
return copy(self)
|
||||
|
||||
def to_element(value: ElementLike) -> "Element":
|
||||
if isinstance(value, Element):
|
||||
return value
|
||||
return Element(value)
|
||||
|
||||
# tag names
|
||||
PgSlotName = "puppygirl-slot"
|
||||
TemplateName = "template"
|
||||
SlotName = "slot"
|
||||
|
||||
IdAttr = "id"
|
||||
NameAttr = "name"
|
||||
TemplateAttr = "template"
|
||||
UnnamedSlotId = "unnamed"
|
||||
|
||||
@dataclass
|
||||
class Element(Clonable):
|
||||
value: InitVar[ElementLike]
|
||||
|
||||
def __post_init__(self, value: ElementLike):
|
||||
if isinstance(value, str):
|
||||
value = BeautifulSoup(value, features = "html.parser")
|
||||
|
||||
if isinstance(value, BeautifulSoup):
|
||||
value = next(iter(value))
|
||||
|
||||
self.value = value
|
||||
|
||||
def clone(self) -> Self:
|
||||
return Element(copy(self.value))
|
||||
|
||||
# proxy all calls to inner template
|
||||
def __getattr__(self, name):
|
||||
return getattr(self.value, name)
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self.value[index]
|
||||
|
||||
@dataclass
|
||||
class Template(Element, Clonable["TemplateInstance"]):
|
||||
def __post_init__(self, value: ElementLike):
|
||||
Element.__post_init__(self, value)
|
||||
|
||||
if value.name != TemplateName:
|
||||
raise TypeError(f"{value} is not a template")
|
||||
|
||||
if not value.has_attr(IdAttr):
|
||||
raise TypeError(f"{value} missing id attribute")
|
||||
|
||||
def clone(self) -> "TemplateInstance":
|
||||
return TemplateInstance(copy(self.value))
|
||||
|
||||
class TemplateSlot(Element):
|
||||
is_default: bool = True
|
||||
|
||||
def append(self, value):
|
||||
if self.is_default:
|
||||
self.is_default = False
|
||||
self.value.clear()
|
||||
self.value.append(value)
|
||||
|
||||
@dataclass
|
||||
class TemplateInstance(Element, Clonable):
|
||||
slots: dict[str, Tag] = field(init = False)
|
||||
slot_instances: dict[str, TemplateSlot] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self, value: ElementLike):
|
||||
Element.__post_init__(self, value)
|
||||
|
||||
slots = {}
|
||||
for slot in value.find_all(SlotName):
|
||||
if slot.has_attr(NameAttr):
|
||||
slots[slot[NameAttr]] = slot
|
||||
else:
|
||||
slots[UnnamedSlotId] = slot
|
||||
|
||||
self.slots = slots
|
||||
|
||||
def _get_slot(self, slot_name: str) -> TemplateSlot:
|
||||
slot = self.slot_instances.get(slot_name)
|
||||
if slot is not None: return slot
|
||||
|
||||
slot = TemplateSlot(self.slots.get(slot_name))
|
||||
self.slot_instances[slot_name] = slot
|
||||
return slot
|
||||
|
||||
def insert_content(self, content: ElementLike):
|
||||
slot_name = content.get(SlotName)
|
||||
|
||||
if slot_name is None:
|
||||
slot_name = UnnamedSlotId
|
||||
|
||||
slot = self._get_slot(slot_name)
|
||||
|
||||
if slot is not None:
|
||||
slot.append(content)
|
||||
|
||||
def remove_slots(self):
|
||||
for slot in self.slots.values():
|
||||
slot.unwrap()
|
||||
|
||||
@runtime_checkable
|
||||
class Renderable(Protocol):
|
||||
def render(self, element: Element, templates: TemplateDict) -> RenderableElement:
|
||||
return element
|
||||
|
||||
@dataclass
|
||||
class PuppygirlTag(Renderable):
|
||||
puppygirl: "Puppygirl"
|
||||
|
||||
@dataclass
|
||||
class PuppygirlSlot(PuppygirlTag):
|
||||
name: str = "puppygirl-slot"
|
||||
|
||||
def apply_template(self, element: Element, template: Clonable[TemplateInstance]) -> Iterable[Element]:
|
||||
instance = template.clone()
|
||||
|
||||
for content in element.find_all(recursive=False):
|
||||
instance.insert_content(content)
|
||||
del content[SlotName]
|
||||
|
||||
instance.remove_slots()
|
||||
return instance.value
|
||||
|
||||
def render(self, element: Element, templates: TemplateDict) -> RenderableElement:
|
||||
if not element.has_attr(TemplateAttr):
|
||||
return element
|
||||
|
||||
template = templates.get(element[TemplateAttr])
|
||||
return self.apply_template(element, template)
|
||||
|
||||
class Puppygirl:
|
||||
elements: list[Renderable]
|
||||
templates: TemplateDict
|
||||
|
||||
def __init__(self, elements: list[Callable[[Self], Renderable]] = [], templates: ElementLikeList = []):
|
||||
self.templates = Puppygirl._create_template_dict(templates)
|
||||
self.elements = [self._instantiate(el) for el in elements]
|
||||
|
||||
def _instantiate(self, value: Callable[[Self], Renderable] | Renderable) -> Renderable:
|
||||
if(isinstance(value, Callable)):
|
||||
return value(self)
|
||||
return value
|
||||
|
||||
def _create_template_dict(templates: Iterable[ElementLike]) -> TemplateDict:
|
||||
templates = [Template(t) for t in templates]
|
||||
return {t[IdAttr]: t for t in templates}
|
||||
|
||||
def add_template(self, template: ElementLike):
|
||||
template = Template(template)
|
||||
self._templates[template[IdAttr]] = template
|
||||
|
||||
def _find_local_templates(tree: BeautifulSoup) -> TemplateDict:
|
||||
templates = tree.find_all(TemplateName)
|
||||
templates = filter(lambda t: t.has_attr(IdAttr), templates)
|
||||
return Puppygirl._create_template_dict(templates)
|
||||
|
||||
def fetch(self, path: str) -> BeautifulSoup:
|
||||
with open(path, "r") as f:
|
||||
return self.parse(f)
|
||||
|
||||
def parse(self, value: Parsable) -> BeautifulSoup:
|
||||
if isinstance(value, BeautifulSoup) or isinstance(value, Tag):
|
||||
return self.parse_tree(value)
|
||||
|
||||
return self.parse_tree(BeautifulSoup(value, features='html.parser'))
|
||||
|
||||
def parse_tree(self, tree: BeautifulSoup) -> BeautifulSoup:
|
||||
templates = Puppygirl._find_local_templates(tree) | self.templates
|
||||
|
||||
for element in self.elements:
|
||||
if hasattr(element, "name"):
|
||||
for tag in tree.find_all(element.name):
|
||||
new_tag = element.render(tag, templates)
|
||||
if isinstance(new_tag, Iterable):
|
||||
tag.extend(new_tag)
|
||||
tag.unwrap()
|
||||
else:
|
||||
tag.replace_with(new_tag)
|
||||
|
||||
return tree
|
||||
|
||||
pg = Puppygirl([PuppygirlSlot])
|
||||
tree = pg.fetch("mdn.html")
|
||||
print(tree.prettify())
|
Loading…
Add table
Reference in a new issue