modm_data.html.text

 1# Copyright 2022, Niklas Hauser
 2# SPDX-License-Identifier: MPL-2.0
 3
 4import re
 5
 6
 7def replace(html, **substitutions) -> str:
 8    subs = {
 9        "u": "*", "i": "*", "b": "*",
10        "sub": "*", "sup": "*",
11        "br": "*", "p": "*"
12    }
13    subs.update(substitutions)
14    for tag, replacement in subs.items():
15        if tag in {"u", "i", "b", "p", "br", "sup", "sub"}:
16            if replacement == "*":
17                try:
18                    html = re.sub(f"</?{tag}>", "", html)
19                except:
20                    print(html)
21                    raise
22            else:
23                html = re.sub(f"<{tag}>(.*?)</{tag}>", replacement, html)
24        else:
25            html = re.sub(tag, replacement, html)
26    return html
27
28
29def listify(text, pattern=None, strip=True) -> list[str]:
30    if pattern is None: pattern = " |,|/|<br>"
31    text = re.split(pattern, text)
32    if strip:
33        return [t.strip() for t in text if t.strip()]
34    else:
35        return [t for t in text if t]
36
37
38class ReDict(dict):
39    def match_value(self, pattern, default=None, **subs):
40        keys = self.match_keys(pattern, **subs)
41        if not keys:
42            return default
43        if len(keys) > 1:
44            raise ValueError(f"Multiple key matches for {pattern}: {keys}!")
45        return self[keys[0]]
46
47    def match_values(self, pattern, **subs) -> list:
48        return [self[k] for k in self.match_keys(pattern, **subs)]
49
50    def match_key(self, pattern, default=None, **subs) -> str:
51        keys = self.match_keys(pattern, **subs)
52        if default is None: assert len(keys) == 1
53        if len(keys) != 1: return default
54        return keys[0]
55
56    def match_keys(self, pattern, **subs) -> list:
57        return [k for k in self.keys() if re.search(pattern, replace(k, **subs), re.IGNORECASE)]
58
59
60class Text:
61    def __init__(self, html=None):
62        self.html = html or ""
63
64    def text(self, **filters) -> str:
65        return replace(self.html, **filters)
66
67    def __repr__(self) -> str:
68        return f"Text({self.html[:70]})"
69
70
71class Heading(Text):
72    def __repr__(self) -> str:
73        return f"Heading({self.html[:70]})"
def replace(html, **substitutions) -> str:
 8def replace(html, **substitutions) -> str:
 9    subs = {
10        "u": "*", "i": "*", "b": "*",
11        "sub": "*", "sup": "*",
12        "br": "*", "p": "*"
13    }
14    subs.update(substitutions)
15    for tag, replacement in subs.items():
16        if tag in {"u", "i", "b", "p", "br", "sup", "sub"}:
17            if replacement == "*":
18                try:
19                    html = re.sub(f"</?{tag}>", "", html)
20                except:
21                    print(html)
22                    raise
23            else:
24                html = re.sub(f"<{tag}>(.*?)</{tag}>", replacement, html)
25        else:
26            html = re.sub(tag, replacement, html)
27    return html
def listify(text, pattern=None, strip=True) -> list[str]:
30def listify(text, pattern=None, strip=True) -> list[str]:
31    if pattern is None: pattern = " |,|/|<br>"
32    text = re.split(pattern, text)
33    if strip:
34        return [t.strip() for t in text if t.strip()]
35    else:
36        return [t for t in text if t]
class ReDict(builtins.dict):
39class ReDict(dict):
40    def match_value(self, pattern, default=None, **subs):
41        keys = self.match_keys(pattern, **subs)
42        if not keys:
43            return default
44        if len(keys) > 1:
45            raise ValueError(f"Multiple key matches for {pattern}: {keys}!")
46        return self[keys[0]]
47
48    def match_values(self, pattern, **subs) -> list:
49        return [self[k] for k in self.match_keys(pattern, **subs)]
50
51    def match_key(self, pattern, default=None, **subs) -> str:
52        keys = self.match_keys(pattern, **subs)
53        if default is None: assert len(keys) == 1
54        if len(keys) != 1: return default
55        return keys[0]
56
57    def match_keys(self, pattern, **subs) -> list:
58        return [k for k in self.keys() if re.search(pattern, replace(k, **subs), re.IGNORECASE)]
def match_value(self, pattern, default=None, **subs):
40    def match_value(self, pattern, default=None, **subs):
41        keys = self.match_keys(pattern, **subs)
42        if not keys:
43            return default
44        if len(keys) > 1:
45            raise ValueError(f"Multiple key matches for {pattern}: {keys}!")
46        return self[keys[0]]
def match_values(self, pattern, **subs) -> list:
48    def match_values(self, pattern, **subs) -> list:
49        return [self[k] for k in self.match_keys(pattern, **subs)]
def match_key(self, pattern, default=None, **subs) -> str:
51    def match_key(self, pattern, default=None, **subs) -> str:
52        keys = self.match_keys(pattern, **subs)
53        if default is None: assert len(keys) == 1
54        if len(keys) != 1: return default
55        return keys[0]
def match_keys(self, pattern, **subs) -> list:
57    def match_keys(self, pattern, **subs) -> list:
58        return [k for k in self.keys() if re.search(pattern, replace(k, **subs), re.IGNORECASE)]
Inherited Members
builtins.dict
get
setdefault
pop
popitem
keys
items
values
update
fromkeys
clear
copy
class Text:
61class Text:
62    def __init__(self, html=None):
63        self.html = html or ""
64
65    def text(self, **filters) -> str:
66        return replace(self.html, **filters)
67
68    def __repr__(self) -> str:
69        return f"Text({self.html[:70]})"
Text(html=None)
62    def __init__(self, html=None):
63        self.html = html or ""
html
def text(self, **filters) -> str:
65    def text(self, **filters) -> str:
66        return replace(self.html, **filters)
class Heading(Text):
72class Heading(Text):
73    def __repr__(self) -> str:
74        return f"Heading({self.html[:70]})"
Inherited Members
Text
Text
html
text