modm_data.pdf2html.stmicro.ast

  1# Copyright 2022, Niklas Hauser
  2# SPDX-License-Identifier: MPL-2.0
  3
  4import logging
  5from lxml import etree
  6import anytree
  7from anytree import RenderTree
  8from collections import defaultdict
  9from ...utils import list_strip, Rectangle, ReversePreOrderIter
 10from .table import VirtualTable, TableCell
 11
 12LOGGER = logging.getLogger(__name__)
 13
 14
 15def _normalize_area(area):
 16    for child in ReversePreOrderIter(area):
 17        if child.name.startswith("list"):
 18            # We need to normalize the xpos back to the first character
 19            child.xpos = int(child.obj.bbox.left) - area.xpos
 20        else:
 21            # And then make the xpos relative to the area left for consistent comparisons
 22            child.xpos -= area.xpos
 23    area.xpos = 0
 24    return area
 25
 26
 27def merge_area(document, area, debug=False):
 28    if document is None:
 29        document = anytree.Node("document", xpos=0, _page=area.page, _doc=area.page.pdf, _end=None)
 30        document._end = document
 31    if not area.children:
 32        return document
 33    if debug: print()
 34
 35    def _find_end(node):
 36        # Find the last leaf node but skip lines, paragraphs, captions/tables/figures
 37        return next((c for c in ReversePreOrderIter(node)
 38                     if any(c.name.startswith(name) for name in {"head", "list", "note"})),
 39                    next(ReversePreOrderIter(node), node))
 40    def _find_ancestor(filter_):
 41        if filter_(document._end): return document._end
 42        return next((c for c in document._end.iter_path_reverse()
 43                     if filter_(c)), document.root)
 44
 45    area = _normalize_area(area)
 46    if debug: print(RenderTree(area))
 47    children = area.children
 48    # All area nodes up to the next top-level element must now be
 49    # xpos-aligned with the previous area's last leaf node
 50    connect_index = next((ii for ii, c in enumerate(children)
 51                          if c.name.startswith("head")), len(children))
 52    x_em = area.page._spacing["x_em"]
 53
 54    if debug: print("area=", area, "connect_index=", connect_index)
 55    # Align these children with the last leaf node xpos
 56    for child in children[:connect_index]:
 57        if any(child.name.startswith(name) for name in {"list"}):
 58            # Find the node that is left of the current node but not too far left
 59            host = _find_ancestor(lambda c: -4 * x_em < (c.xpos - child.xpos) < -x_em or
 60                                            c.name.startswith("head"))
 61        elif (child.name == "para" and document._end.name == "note" and
 62              child.children[0].obj.contains_font("Italic", "Oblique")):
 63            host = document._end
 64        else:
 65            # Insert underneath the next heading
 66            host = _find_ancestor(lambda c: c.name.startswith("head"))
 67
 68        child.parent = host
 69        document._end = _find_end(document)
 70        if debug:
 71            print("child=", child)
 72            print("host=", host)
 73            print("end=", document._end)
 74            print()
 75
 76    # Add the remaining top-level children to connect index node
 77    if connect_index < len(children):
 78        children[connect_index].parent = document
 79        for child in children[connect_index + 1:]:
 80            child.parent = children[connect_index]
 81
 82    document._end = _find_end(document)
 83
 84    if debug:
 85        print()
 86        print()
 87
 88    return document
 89
 90
 91def _normalize_lists(node):
 92    lists = []
 93    current = []
 94    current_name = None
 95    for child in node.children:
 96        # Normalize the lists from the leaves up
 97        _normalize_lists(child)
 98        # then split the children based on their names
 99        if current_name is None or child.name == current_name:
100            current.append(child)
101        else:
102            lists.append(current)
103            current = [child]
104        current_name = child.name
105    if current:
106        lists.append(current)
107
108    # Create a new list of children
109    new_children = []
110    for llist in lists:
111        # Insert a new list group node and redirect all children to it
112        if llist[0].name.startswith("list"):
113            nlist = anytree.Node(llist[0].name, obj=llist[0].obj,
114                                 start=llist[0].value, xpos=llist[0].xpos)
115            for lnode in llist:
116                lnode.name = "element"
117                lnode.parent = nlist
118
119            new_children.append(nlist)
120        else:
121            new_children.extend(llist)
122
123    # Set the new children which have the same order
124    node.children = new_children
125    return node
126
127
128def _normalize_paragraphs(document):
129    paras = anytree.search.findall(document, filter_=lambda n: n.name == "para")
130    parents = set(p.parent for p in paras if p.parent.name in {"element", "caption", "document", "cell"})
131    for parent in parents:
132        # Replace the paragraph only if it's the *only* paragraph in this node
133        if parent.name in {"caption"} or sum(1 for p in parent.children if p.name == "para") == 1:
134            # Replace like this to preserve children order
135            parent.children = [p.children[0] if p.name == "para" else p for p in parent.children]
136            # Now we need to merge the text tags into the first one
137            texts = [p for p in parent.children if p.name == "text"]
138            if len(texts) > 1:
139                first_text = texts[0]
140                for text in texts[1:]:
141                    for line in text.children:
142                        line.parent = first_text
143                    text.parent = None
144    return document
145
146
147def _normalize_lines(document):
148    paras = anytree.search.findall(document, filter_=lambda n: n.name == "para")
149    for para in paras:
150        text = anytree.Node("text")
151        for line in para.children:
152            line.parent = text
153        para.children = [text]
154    return document
155
156
157def _normalize_captions(document):
158    captions = anytree.search.findall(document, filter_=lambda n: n.name == "caption")
159    for caption in captions:
160        cindex = caption.parent.children.index(caption)
161        # Find the next table for this caption within 5 nodes
162        for sibling in caption.parent.children[cindex:cindex + 6]:
163            if sibling.name == caption._type:
164                caption.parent = sibling
165                sibling.number = caption.number
166                break
167        else:
168            LOGGER.error(f"Discarding caption {caption}!\n{RenderTree(caption)}")
169            caption.parent = None
170    return document
171
172
173def _normalize_headings(document):
174    headings = anytree.search.findall(document, filter_=lambda n: n.name.startswith("head"))
175    for heading in headings:
176        para = heading.children[0]
177        if not para.children[0].children:
178            # Remove empty headers
179            para.parent = None
180        else:
181            # Rename paragraph to heading
182            para.__dict__["marker"] = heading.marker
183            para.name = heading.name
184        heading.name = "section"
185    return document
186
187
188def _normalize_registers(document):
189    bits_list = []
190    sections = anytree.search.findall(document, filter_=lambda n: n.name == "section")
191    for section in (sections + (document,)):
192        new_children = []
193        bits = None
194        for child in section.children:
195            if child.name == "bit":
196                # Insert a new bits group node and redirect all children to it
197                if bits is None or bits._page != child._page:
198                    bits = anytree.Node("table", xpos=child.xpos, obj=None,
199                                        _type="bits", _width=1, _page=child._page)
200                    new_children.append(bits)
201                    bits_list.append(bits)
202                child.parent = bits
203            else:
204                bits = None
205                new_children.append(child)
206        # Set the new children which have the same order
207        section.children = new_children
208
209    # Reformat the bits nodes into tables
210    for bits in bits_list:
211        cells = []
212        for ypos, bit in enumerate(bits.children):
213            bit.parent = None
214            # The top is the first line, the bottom by the last line
215            top = next(c.obj.bbox.top for c in bit.descendants if c.name == "line")
216            bottom = next(c.obj.bbox.bottom for c in reversed(bit.descendants) if c.name == "line")
217            # Left table cell contains Bits
218            left_bbox = Rectangle(bit._left, bottom, bit._middle, top)
219            cells.append(TableCell(None, (ypos, 0), left_bbox, (1,1,1,1), is_simple=True))
220            # Right cell contains description
221            right_bbox = Rectangle(bit._middle, bottom, bit._right, top)
222            cells.append(TableCell(None, (ypos, 1), right_bbox, (1,1,1,1)))
223        tbbox = Rectangle(min(c.bbox.left for c in cells),
224                          min(c.bbox.bottom for c in cells),
225                          max(c.bbox.right for c in cells),
226                          max(c.bbox.top for c in cells))
227        bits.obj = VirtualTable(bits._page, tbbox, cells, "bitfield")
228
229    return document
230
231
232def _normalize_tables(document):
233    content_tables = defaultdict(list)
234    register_tables = []
235    bits_tables = []
236    current_rtables = []
237    current_bitstables = []
238
239    def _push():
240        nonlocal current_rtables, register_tables
241        nonlocal current_bitstables, bits_tables
242        if current_rtables:
243            register_tables.append(current_rtables)
244            current_rtables = []
245        if current_bitstables:
246            bits_tables.append(current_bitstables)
247            current_bitstables = []
248
249    sections = anytree.search.findall(document, filter_=lambda n: n.name == "section")
250    last_number = 0
251    for section in (sections + (document,)):
252        current_rtables = []
253        current_bitstables = []
254        for child in section.children:
255            if child.name == "table":
256                if child._type == "table":
257                    if child.number > 0:
258                        # Collect tables with the same number together
259                        content_tables[child.number].append(child)
260                        if document._page._template == "blue_gray":
261                            last_number = child.number
262                    elif last_number > 0:
263                        # Tables without caption may follow
264                        content_tables[last_number].append(child)
265                    _push()
266                elif child._type == "register":
267                    # Collect register tables that follow each other directly
268                    current_rtables.append(child)
269                elif child._type == "bits":
270                    # Collect bits tables that follow each other directly
271                    current_bitstables.append(child)
272                else:
273                    last_number = 0
274            else:
275                _push()
276                last_number = 0
277        _push()
278        last_number = 0
279    _push()
280
281    # Merge all tables of the same number by appending at the bottom
282    for number, tables in content_tables.items():
283        for table in tables[1:]:
284            print(f"T{table.obj._page.number} ", end="")
285            if tables[0].obj.append_bottom(table.obj):
286                table.parent = None
287    # Merge all register tables by appending to the right
288    for tables in register_tables:
289        for table in tables[1:]:
290            if tables[0].obj.append_side(table.obj, expand=True):
291                table.parent = None
292    # Merge all bits tables by appending at the bottom
293    for tables in bits_tables:
294        for table in tables[1:]:
295            if tables[0].obj.append_bottom(table.obj, merge_headers=False):
296                table.parent = None
297
298    return document
299
300
301def _normalize_chapters(document) -> list:
302    headings = anytree.search.findall(document, filter_=lambda n: n.name in ["head1", "head2"], maxlevel=3)
303    idxs = [document.children.index(h.parent) for h in headings] + [len(document.children)]
304    if idxs[0] != 0:
305        idxs = [0] + idxs
306    if idxs[-1] != len(document.children):
307        idxs += [len(document.children)]
308
309    cleaner = str.maketrans(" /()-,:", "_______")
310
311    chapters = []
312    for idx0, idx1 in zip(idxs, idxs[1:]):
313        # Find the chapter name
314        heading = document.children[idx0].children[0]
315        lines = anytree.search.findall(heading, filter_=lambda n: n.name == "line")
316        chapter_name = ("".join(c.char for c in line.obj.chars).strip() for line in lines)
317        chapter_name = " ".join(chapter_name)
318        if heading.name == "head1":
319            chapter_name = "0 " + chapter_name
320        filename = chapter_name.lower().translate(cleaner)
321        chapters.append( (chapter_name, filename, document.children[idx0:idx1 + 1]) )
322
323    for title, filename, nodes in chapters:
324        chapter = anytree.Node("chapter", title=title, _filename=filename, parent=document)
325        for node in nodes:
326            node.parent = chapter
327
328    return document
329
330
331def normalize_document(document):
332    def _debug(func, indata, debug=0):
333        print(func.__name__[1:])
334        if debug == -1:
335            print(RenderTree(indata))
336            print()
337        outdata = func(indata)
338        if debug == 1:
339            print(RenderTree(outdata))
340            print()
341        return outdata
342
343    document = _debug(_normalize_lines, document)
344    document = _debug(_normalize_captions, document)
345    document = _debug(_normalize_lists, document)
346    document = _debug(_normalize_paragraphs, document)
347    document = _debug(_normalize_headings, document)
348    document = _debug(_normalize_registers, document)
349    document = _debug(_normalize_tables, document)
350    # document = _debug(_normalize_chapters, document)
351    return document
352
353
354def _format_html_figure(xmlnode, figurenode):
355    tnode = etree.Element("table")
356    tnode.set("width", f"{int(figurenode._width * 50)}%")
357    xmlnode.append(tnode)
358
359    captionnode = next((c for c in figurenode.children if c.name == "caption"), None)
360    if captionnode is not None:
361        tnode.set("id", f"figure{captionnode.number}")
362        caption = etree.Element("caption")
363        tnode.append(caption)
364        _format_html(caption, captionnode, with_newlines=True)
365
366    ynode = etree.Element("tr")
367    tnode.append(ynode)
368
369    xynode = etree.Element("td")
370    ynode.append(xynode)
371    xynode.text = "(omitted)"
372
373
374def _format_html_table(xmlnode, tablenode):
375    tnode = etree.Element("table")
376    xmlnode.append(tnode)
377    # Format the caption
378    captionnode = next((c for c in tablenode.children if c.name == "caption"), None)
379    if captionnode is not None:
380        tnode.set("id", f"table{captionnode.number}")
381        caption = etree.Element("caption")
382        tnode.append(caption)
383        _format_html(caption, captionnode, with_newlines=True)
384    if tablenode.obj._type == "register":
385        tnode.set("class", "rt")
386    if tablenode.obj._type == "bitfield":
387        tnode.set("class", "bt")
388
389    # Cells are ordered (y, x) positions
390    ypos = -1
391    ynode = None
392    header_rows = tablenode.obj.header_rows
393    for cell in tablenode.obj.cells:
394        # Add another row to the table
395        if ypos != cell.y or ynode is None:
396            ypos = cell.y
397            ynode = etree.Element("tr")
398            tnode.append(ynode)
399
400        # Add the right cell with spans and style
401        xynodespan = xynode = etree.Element("th" if cell.is_header else "td")
402        ynode.append(xynode)
403        if cell.xspan > 1:
404            xynode.set("colspan", str(cell.xspan))
405        if cell.yspan > 1:
406            xynode.set("rowspan", str(cell.yspan))
407        if not cell.rotation and tablenode.obj._type != "register" and cell.left_aligned:
408            xynode.set("class", "tl")
409        if cell.rotation:
410            xynodespan = etree.Element("span")
411            xynodespan.set("class", "tv")
412            xynode.append(xynodespan)
413        if (cell.y + cell.yspan) == header_rows:
414            if cl := xynode.get("class"):
415                xynode.set("class", "thb " + cl)
416            else:
417                xynode.set("class", "thb")
418
419        if cell._is_simple:
420            xynodespan.text = cell.content.strip()
421        else:
422            cell_doc = anytree.Node("document", _page=cell.ast.page)
423            cell.ast.parent = cell_doc
424            cell_doc = _normalize_lines(cell_doc)
425            cell_doc = _normalize_lists(cell_doc)
426            cell_doc = _normalize_paragraphs(cell_doc)
427            # print(RenderTree(cell_doc))
428            _format_html(xynodespan, cell_doc, with_newlines=True,
429                         ignore_formatting={"bold"} if cell.is_header else None)
430
431
432def _format_char(node, state, chars, ignore):
433    NOFMT = {
434        "superscript": False,
435        "subscript": False,
436        "italic": False,
437        "bold": False,
438        "underline": False,
439    }
440    if state is None: state = NOFMT
441    char = chars[0]
442    if char["char"] in {'\r'}:
443        return (True, node, state)
444
445    # print(node, state, char["char"])
446    diffs = {}
447    for key in NOFMT:
448        if state[key] != char[key] and key not in ignore:
449            diffs[key] = char[key]
450    # if diffs: print(diffs)
451    if not diffs:
452        prev_name = node.children[-1].name if node.children else None
453        # print(node)
454        if prev_name != "newline" and char["char"] == '\n':
455            # if not (prev_name == "chars" and node.children[-1].chars[-1] == " "):
456            anytree.Node("newline", parent=node)
457        elif prev_name != "chars":
458            anytree.Node("chars", parent=node, chars=char["char"])
459        else:
460            node.children[-1].chars += char["char"]
461        return (True, node, state)
462    else:
463        disable = [key for key, value in diffs.items() if not value]
464        if disable:
465            state[node.name] = False
466            return (False, node.parent, state)
467        else:
468            enable = [key for key, value in diffs.items() if value][0]
469            fmtnode = anytree.Node(enable, parent=node)
470            state[enable] = True
471            return (False, fmtnode, state)
472
473
474def _format_lines(textnode, ignore, with_newlines, with_start):
475    char_props = textnode.root._page._char_properties
476    formatn = anytree.Node("format")
477    chars = []
478    for line in textnode.children:
479        if line.name == "line":
480            for char in line.obj.chars[0 if with_start else line.start:]:
481                if not with_newlines and char.unicode in {0xa, 0xd}:
482                    continue
483                chars.append(char_props(line.obj, char))
484            if with_newlines and chars[-1]["char"] not in {'\n'}:
485                char = char_props(line.obj, line.obj.chars[-1])
486                char["char"] = '\n'
487                chars.append(char)
488
489    chars = list_strip(chars, lambda c: c["char"] in {' ', '\n'})
490    state = None
491    node = formatn
492    while chars:
493        popchar, node, state = _format_char(node, state, chars, ignore)
494        if popchar: chars.pop(0)
495    return formatn
496
497
498def _format_html_fmt(xmlnode, treenode, tail=False):
499    CONV = {
500        "superscript": "sup",
501        "subscript": "sub",
502        "italic": "i",
503        "bold": "b",
504        "underline": "u",
505        "newline": "br",
506    }
507    # print(xmlnode, treenode)
508    if treenode.name == "chars":
509        # print(f"{'tail' if tail else 'text'} char={treenode.chars}")
510        if tail:
511            xmlnode.tail = (xmlnode.tail or "") + treenode.chars
512        else:
513            xmlnode.text = (xmlnode.text or "") + treenode.chars
514        return (tail, xmlnode)
515    else:
516        # print(f"sub {treenode.name}")
517        if tail: xmlnode = xmlnode.getparent()
518        subnode = etree.SubElement(xmlnode, CONV[treenode.name])
519        tail = False
520        iternode = subnode
521        for child in treenode.children:
522            tail, iternode = _format_html_fmt(iternode, child, tail)
523        return (True, subnode)
524
525
526def _format_html_text(xmlnode, treenode, ignore=None, with_newlines=False, with_start=True):
527    fmttree = _format_lines(treenode, ignore or set(), with_newlines, with_start)
528    tail = False
529    fmtnode = xmlnode
530    for child in fmttree.children:
531        tail, fmtnode = _format_html_fmt(fmtnode, child, tail)
532
533    # print(RenderTree(fmttree))
534    # print(etree.tostring(xmlnode, pretty_print=True).decode("utf-8"))
535
536
537def _format_html(xmlnode, treenode, ignore_formatting=None,
538                 with_newlines=False, with_start=True):
539    if ignore_formatting is None:
540        ignore_formatting = set()
541    # print(xmlnode, treenode.name)
542    current = xmlnode
543    if treenode.name.startswith("head"):
544        current = etree.Element(f"h{treenode.name[4]}")
545        if treenode.marker:
546            current.set("id", f"section{treenode.marker}")
547        xmlnode.append(current)
548        ignore_formatting = ignore_formatting | {"bold", "italic", "underline"}
549
550    elif treenode.name in {"para"}:
551        current = etree.Element("p")
552        xmlnode.append(current)
553
554    elif treenode.name in {"note"}:
555        current = etree.Element("div")
556        current.set("class", "nt")
557        xmlnode.append(current)
558
559    elif treenode.name == "text":
560        _format_html_text(xmlnode, treenode, ignore_formatting, with_newlines, with_start)
561
562    elif treenode.name == "page":
563        if not current.get("id"):
564            current.set("id", f"page{treenode.number}")
565        print(f"{treenode.number}.", end="", flush=True)
566        return
567
568    elif treenode.name == "table":
569        _format_html_table(xmlnode, treenode)
570        return
571
572    elif treenode.name == "figure":
573        _format_html_figure(xmlnode, treenode)
574        return
575
576    elif treenode.name == "bits":
577        _format_html_bits(xmlnode, treenode)
578        return
579
580    elif treenode.name.startswith("list"):
581        if treenode.name[4] in {"b", "s"}:
582            current = etree.Element("ul")
583        else:
584            current = etree.Element("ol")
585        xmlnode.append(current)
586
587    elif treenode.name == "element":
588        current = etree.Element("li")
589        if xmlnode.tag == "ol":
590            current.set("value", str(treenode.value))
591        xmlnode.append(current)
592        with_start = False
593
594    for child in treenode.children:
595        _format_html(current, child, ignore_formatting, with_newlines, with_start)
596
597
598def format_document(document):
599    html = etree.Element("html")
600
601    head = etree.Element("head")
602    html.append(head)
603
604    link = etree.Element("link")
605    link.set("rel", "stylesheet")
606    link.set("href", "../style.css")
607    head.append(link)
608
609    body = etree.Element("body")
610    html.append(body)
611
612    _format_html(body, document, with_newlines=True)
613
614    html = etree.ElementTree(html)
615    return html
616
617
618def write_html(html, path, pretty=True):
619    with open(path, "wb") as f:
620        html.write(f, pretty_print=pretty, doctype="<!DOCTYPE html>")
LOGGER = <Logger modm_data.pdf2html.stmicro.ast (WARNING)>
def merge_area(document, area, debug=False):
28def merge_area(document, area, debug=False):
29    if document is None:
30        document = anytree.Node("document", xpos=0, _page=area.page, _doc=area.page.pdf, _end=None)
31        document._end = document
32    if not area.children:
33        return document
34    if debug: print()
35
36    def _find_end(node):
37        # Find the last leaf node but skip lines, paragraphs, captions/tables/figures
38        return next((c for c in ReversePreOrderIter(node)
39                     if any(c.name.startswith(name) for name in {"head", "list", "note"})),
40                    next(ReversePreOrderIter(node), node))
41    def _find_ancestor(filter_):
42        if filter_(document._end): return document._end
43        return next((c for c in document._end.iter_path_reverse()
44                     if filter_(c)), document.root)
45
46    area = _normalize_area(area)
47    if debug: print(RenderTree(area))
48    children = area.children
49    # All area nodes up to the next top-level element must now be
50    # xpos-aligned with the previous area's last leaf node
51    connect_index = next((ii for ii, c in enumerate(children)
52                          if c.name.startswith("head")), len(children))
53    x_em = area.page._spacing["x_em"]
54
55    if debug: print("area=", area, "connect_index=", connect_index)
56    # Align these children with the last leaf node xpos
57    for child in children[:connect_index]:
58        if any(child.name.startswith(name) for name in {"list"}):
59            # Find the node that is left of the current node but not too far left
60            host = _find_ancestor(lambda c: -4 * x_em < (c.xpos - child.xpos) < -x_em or
61                                            c.name.startswith("head"))
62        elif (child.name == "para" and document._end.name == "note" and
63              child.children[0].obj.contains_font("Italic", "Oblique")):
64            host = document._end
65        else:
66            # Insert underneath the next heading
67            host = _find_ancestor(lambda c: c.name.startswith("head"))
68
69        child.parent = host
70        document._end = _find_end(document)
71        if debug:
72            print("child=", child)
73            print("host=", host)
74            print("end=", document._end)
75            print()
76
77    # Add the remaining top-level children to connect index node
78    if connect_index < len(children):
79        children[connect_index].parent = document
80        for child in children[connect_index + 1:]:
81            child.parent = children[connect_index]
82
83    document._end = _find_end(document)
84
85    if debug:
86        print()
87        print()
88
89    return document
def normalize_document(document):
332def normalize_document(document):
333    def _debug(func, indata, debug=0):
334        print(func.__name__[1:])
335        if debug == -1:
336            print(RenderTree(indata))
337            print()
338        outdata = func(indata)
339        if debug == 1:
340            print(RenderTree(outdata))
341            print()
342        return outdata
343
344    document = _debug(_normalize_lines, document)
345    document = _debug(_normalize_captions, document)
346    document = _debug(_normalize_lists, document)
347    document = _debug(_normalize_paragraphs, document)
348    document = _debug(_normalize_headings, document)
349    document = _debug(_normalize_registers, document)
350    document = _debug(_normalize_tables, document)
351    # document = _debug(_normalize_chapters, document)
352    return document
def format_document(document):
599def format_document(document):
600    html = etree.Element("html")
601
602    head = etree.Element("head")
603    html.append(head)
604
605    link = etree.Element("link")
606    link.set("rel", "stylesheet")
607    link.set("href", "../style.css")
608    head.append(link)
609
610    body = etree.Element("body")
611    html.append(body)
612
613    _format_html(body, document, with_newlines=True)
614
615    html = etree.ElementTree(html)
616    return html
def write_html(html, path, pretty=True):
619def write_html(html, path, pretty=True):
620    with open(path, "wb") as f:
621        html.write(f, pretty_print=pretty, doctype="<!DOCTYPE html>")