modm_data.html.text
1# Copyright 2022, Niklas Hauser 2# SPDX-License-Identifier: MPL-2.0 3 4import re 5 6 7def replace(html, **substitutions) -> str: 8 subs = { 9 "u": "*", "i": "*", "b": "*", 10 "sub": "*", "sup": "*", 11 "br": "*", "p": "*" 12 } 13 subs.update(substitutions) 14 for tag, replacement in subs.items(): 15 if tag in {"u", "i", "b", "p", "br", "sup", "sub"}: 16 if replacement == "*": 17 try: 18 html = re.sub(f"</?{tag}>", "", html) 19 except: 20 print(html) 21 raise 22 else: 23 html = re.sub(f"<{tag}>(.*?)</{tag}>", replacement, html) 24 else: 25 html = re.sub(tag, replacement, html) 26 return html 27 28 29def listify(text, pattern=None, strip=True) -> list[str]: 30 if pattern is None: pattern = " |,|/|<br>" 31 text = re.split(pattern, text) 32 if strip: 33 return [t.strip() for t in text if t.strip()] 34 else: 35 return [t for t in text if t] 36 37 38class ReDict(dict): 39 def match_value(self, pattern, default=None, **subs): 40 keys = self.match_keys(pattern, **subs) 41 if not keys: 42 return default 43 if len(keys) > 1: 44 raise ValueError(f"Multiple key matches for {pattern}: {keys}!") 45 return self[keys[0]] 46 47 def match_values(self, pattern, **subs) -> list: 48 return [self[k] for k in self.match_keys(pattern, **subs)] 49 50 def match_key(self, pattern, default=None, **subs) -> str: 51 keys = self.match_keys(pattern, **subs) 52 if default is None: assert len(keys) == 1 53 if len(keys) != 1: return default 54 return keys[0] 55 56 def match_keys(self, pattern, **subs) -> list: 57 return [k for k in self.keys() if re.search(pattern, replace(k, **subs), re.IGNORECASE)] 58 59 60class Text: 61 def __init__(self, html=None): 62 self.html = html or "" 63 64 def text(self, **filters) -> str: 65 return replace(self.html, **filters) 66 67 def __repr__(self) -> str: 68 return f"Text({self.html[:70]})" 69 70 71class Heading(Text): 72 def __repr__(self) -> str: 73 return f"Heading({self.html[:70]})"
def
replace(html, **substitutions) -> str:
8def replace(html, **substitutions) -> str: 9 subs = { 10 "u": "*", "i": "*", "b": "*", 11 "sub": "*", "sup": "*", 12 "br": "*", "p": "*" 13 } 14 subs.update(substitutions) 15 for tag, replacement in subs.items(): 16 if tag in {"u", "i", "b", "p", "br", "sup", "sub"}: 17 if replacement == "*": 18 try: 19 html = re.sub(f"</?{tag}>", "", html) 20 except: 21 print(html) 22 raise 23 else: 24 html = re.sub(f"<{tag}>(.*?)</{tag}>", replacement, html) 25 else: 26 html = re.sub(tag, replacement, html) 27 return html
def
listify(text, pattern=None, strip=True) -> list[str]:
class
ReDict(builtins.dict):
39class ReDict(dict): 40 def match_value(self, pattern, default=None, **subs): 41 keys = self.match_keys(pattern, **subs) 42 if not keys: 43 return default 44 if len(keys) > 1: 45 raise ValueError(f"Multiple key matches for {pattern}: {keys}!") 46 return self[keys[0]] 47 48 def match_values(self, pattern, **subs) -> list: 49 return [self[k] for k in self.match_keys(pattern, **subs)] 50 51 def match_key(self, pattern, default=None, **subs) -> str: 52 keys = self.match_keys(pattern, **subs) 53 if default is None: assert len(keys) == 1 54 if len(keys) != 1: return default 55 return keys[0] 56 57 def match_keys(self, pattern, **subs) -> list: 58 return [k for k in self.keys() if re.search(pattern, replace(k, **subs), re.IGNORECASE)]
Inherited Members
- builtins.dict
- get
- setdefault
- pop
- popitem
- keys
- items
- values
- update
- fromkeys
- clear
- copy
class
Text: