modm_data.pdf2html.stmicro.convert

 1# Copyright 2022, Niklas Hauser
 2# SPDX-License-Identifier: MPL-2.0
 3
 4from anytree import RenderTree
 5
 6from .ast import merge_area, normalize_document
 7from .ast import format_document, write_html
 8from ..render import render_page_pdf
 9from ...utils import pkg_apply_patch, pkg_file_exists
10import pypdfium2 as pp
11import subprocess
12
13
14def convert(doc, page_range, output_path, format_chapters=False, pretty=True,
15            render_html=True, render_pdf=False, render_all=False,
16            show_ast=False, show_tree=False, show_tags=False) -> bool:
17
18    document = None
19    debug_doc = None
20    debug_index = 0
21    for page in doc.pages(page_range):
22        if not render_all and any(c in page.top for c in {"Contents", "List of ", "Index"}):
23            continue
24        print(f"\n\n=== {page.top} #{page.number} ===\n")
25
26        if show_tags:
27            for struct in page.structures:
28                print(struct.descr())
29
30        if show_tree or render_html or show_ast:
31            areas = page.content_ast
32            if show_ast:
33                print()
34                for area in areas:
35                    print(RenderTree(area))
36            if show_tree or render_html:
37                for area in areas:
38                    document = merge_area(document, area)
39
40        if render_pdf:
41            debug_doc = render_page_pdf(doc, page, debug_doc, debug_index)
42            debug_index += 1
43
44    if render_pdf:
45        with open(f"debug_{output_path.stem}.pdf", 'wb') as file_handle:
46            pp.PdfDocument(debug_doc).save(file_handle)
47
48    if show_tree or render_html:
49        if document is None:
50            print("No pages parsed, empty document!")
51            return True
52
53        document = normalize_document(document)
54        if show_tree:
55            print(RenderTree(document))
56
57        if render_html:
58            if format_chapters:
59                for chapter in document.children:
60                    if chapter.name == "chapter":
61                        print(f"\nFormatting HTML for '{chapter.title}'")
62                        html = format_document(chapter)
63                        output_file = f"{output_path}/chapter_{chapter._filename}.html"
64                        print(f"\nWriting HTML '{output_file}'")
65                        write_html(html, output_file, pretty=pretty)
66            else:
67                print("\nFormatting HTML")
68                html = format_document(document)
69                print(f"\nWriting HTML '{str(output_path)}'")
70                write_html(html, str(output_path), pretty=pretty)
71
72    return True
73
74
75def patch(doc, output_path, patch_file=None) -> bool:
76    if patch_file is None:
77        from . import data
78        # First try the patch file for the specific version
79        patch_file = f"{doc.name}.patch"
80        if not pkg_file_exists(data, patch_file):
81            # Then try the patch file shared between versions
82            patch_file = f"{doc.name.split('-')[0]}.patch"
83            if not pkg_file_exists(data, patch_file):
84                return True
85        return pkg_apply_patch(data, patch_file, output_path)
86    return apply_patch(patch_file, output_path)
def convert( doc, page_range, output_path, format_chapters=False, pretty=True, render_html=True, render_pdf=False, render_all=False, show_ast=False, show_tree=False, show_tags=False) -> bool:
15def convert(doc, page_range, output_path, format_chapters=False, pretty=True,
16            render_html=True, render_pdf=False, render_all=False,
17            show_ast=False, show_tree=False, show_tags=False) -> bool:
18
19    document = None
20    debug_doc = None
21    debug_index = 0
22    for page in doc.pages(page_range):
23        if not render_all and any(c in page.top for c in {"Contents", "List of ", "Index"}):
24            continue
25        print(f"\n\n=== {page.top} #{page.number} ===\n")
26
27        if show_tags:
28            for struct in page.structures:
29                print(struct.descr())
30
31        if show_tree or render_html or show_ast:
32            areas = page.content_ast
33            if show_ast:
34                print()
35                for area in areas:
36                    print(RenderTree(area))
37            if show_tree or render_html:
38                for area in areas:
39                    document = merge_area(document, area)
40
41        if render_pdf:
42            debug_doc = render_page_pdf(doc, page, debug_doc, debug_index)
43            debug_index += 1
44
45    if render_pdf:
46        with open(f"debug_{output_path.stem}.pdf", 'wb') as file_handle:
47            pp.PdfDocument(debug_doc).save(file_handle)
48
49    if show_tree or render_html:
50        if document is None:
51            print("No pages parsed, empty document!")
52            return True
53
54        document = normalize_document(document)
55        if show_tree:
56            print(RenderTree(document))
57
58        if render_html:
59            if format_chapters:
60                for chapter in document.children:
61                    if chapter.name == "chapter":
62                        print(f"\nFormatting HTML for '{chapter.title}'")
63                        html = format_document(chapter)
64                        output_file = f"{output_path}/chapter_{chapter._filename}.html"
65                        print(f"\nWriting HTML '{output_file}'")
66                        write_html(html, output_file, pretty=pretty)
67            else:
68                print("\nFormatting HTML")
69                html = format_document(document)
70                print(f"\nWriting HTML '{str(output_path)}'")
71                write_html(html, str(output_path), pretty=pretty)
72
73    return True
def patch(doc, output_path, patch_file=None) -> bool:
76def patch(doc, output_path, patch_file=None) -> bool:
77    if patch_file is None:
78        from . import data
79        # First try the patch file for the specific version
80        patch_file = f"{doc.name}.patch"
81        if not pkg_file_exists(data, patch_file):
82            # Then try the patch file shared between versions
83            patch_file = f"{doc.name.split('-')[0]}.patch"
84            if not pkg_file_exists(data, patch_file):
85                return True
86        return pkg_apply_patch(data, patch_file, output_path)
87    return apply_patch(patch_file, output_path)