modm_data.pdf2html.stmicro.convert
1# Copyright 2022, Niklas Hauser 2# SPDX-License-Identifier: MPL-2.0 3 4from anytree import RenderTree 5 6from .ast import merge_area, normalize_document 7from .ast import format_document, write_html 8from ..render import render_page_pdf 9from ...utils import pkg_apply_patch, pkg_file_exists 10import pypdfium2 as pp 11import subprocess 12 13 14def convert(doc, page_range, output_path, format_chapters=False, pretty=True, 15 render_html=True, render_pdf=False, render_all=False, 16 show_ast=False, show_tree=False, show_tags=False) -> bool: 17 18 document = None 19 debug_doc = None 20 debug_index = 0 21 for page in doc.pages(page_range): 22 if not render_all and any(c in page.top for c in {"Contents", "List of ", "Index"}): 23 continue 24 print(f"\n\n=== {page.top} #{page.number} ===\n") 25 26 if show_tags: 27 for struct in page.structures: 28 print(struct.descr()) 29 30 if show_tree or render_html or show_ast: 31 areas = page.content_ast 32 if show_ast: 33 print() 34 for area in areas: 35 print(RenderTree(area)) 36 if show_tree or render_html: 37 for area in areas: 38 document = merge_area(document, area) 39 40 if render_pdf: 41 debug_doc = render_page_pdf(doc, page, debug_doc, debug_index) 42 debug_index += 1 43 44 if render_pdf: 45 with open(f"debug_{output_path.stem}.pdf", 'wb') as file_handle: 46 pp.PdfDocument(debug_doc).save(file_handle) 47 48 if show_tree or render_html: 49 if document is None: 50 print("No pages parsed, empty document!") 51 return True 52 53 document = normalize_document(document) 54 if show_tree: 55 print(RenderTree(document)) 56 57 if render_html: 58 if format_chapters: 59 for chapter in document.children: 60 if chapter.name == "chapter": 61 print(f"\nFormatting HTML for '{chapter.title}'") 62 html = format_document(chapter) 63 output_file = f"{output_path}/chapter_{chapter._filename}.html" 64 print(f"\nWriting HTML '{output_file}'") 65 write_html(html, output_file, pretty=pretty) 66 else: 67 print("\nFormatting HTML") 68 html = format_document(document) 69 print(f"\nWriting HTML '{str(output_path)}'") 70 write_html(html, str(output_path), pretty=pretty) 71 72 return True 73 74 75def patch(doc, output_path, patch_file=None) -> bool: 76 if patch_file is None: 77 from . import data 78 # First try the patch file for the specific version 79 patch_file = f"{doc.name}.patch" 80 if not pkg_file_exists(data, patch_file): 81 # Then try the patch file shared between versions 82 patch_file = f"{doc.name.split('-')[0]}.patch" 83 if not pkg_file_exists(data, patch_file): 84 return True 85 return pkg_apply_patch(data, patch_file, output_path) 86 return apply_patch(patch_file, output_path)
def
convert( doc, page_range, output_path, format_chapters=False, pretty=True, render_html=True, render_pdf=False, render_all=False, show_ast=False, show_tree=False, show_tags=False) -> bool:
15def convert(doc, page_range, output_path, format_chapters=False, pretty=True, 16 render_html=True, render_pdf=False, render_all=False, 17 show_ast=False, show_tree=False, show_tags=False) -> bool: 18 19 document = None 20 debug_doc = None 21 debug_index = 0 22 for page in doc.pages(page_range): 23 if not render_all and any(c in page.top for c in {"Contents", "List of ", "Index"}): 24 continue 25 print(f"\n\n=== {page.top} #{page.number} ===\n") 26 27 if show_tags: 28 for struct in page.structures: 29 print(struct.descr()) 30 31 if show_tree or render_html or show_ast: 32 areas = page.content_ast 33 if show_ast: 34 print() 35 for area in areas: 36 print(RenderTree(area)) 37 if show_tree or render_html: 38 for area in areas: 39 document = merge_area(document, area) 40 41 if render_pdf: 42 debug_doc = render_page_pdf(doc, page, debug_doc, debug_index) 43 debug_index += 1 44 45 if render_pdf: 46 with open(f"debug_{output_path.stem}.pdf", 'wb') as file_handle: 47 pp.PdfDocument(debug_doc).save(file_handle) 48 49 if show_tree or render_html: 50 if document is None: 51 print("No pages parsed, empty document!") 52 return True 53 54 document = normalize_document(document) 55 if show_tree: 56 print(RenderTree(document)) 57 58 if render_html: 59 if format_chapters: 60 for chapter in document.children: 61 if chapter.name == "chapter": 62 print(f"\nFormatting HTML for '{chapter.title}'") 63 html = format_document(chapter) 64 output_file = f"{output_path}/chapter_{chapter._filename}.html" 65 print(f"\nWriting HTML '{output_file}'") 66 write_html(html, output_file, pretty=pretty) 67 else: 68 print("\nFormatting HTML") 69 html = format_document(document) 70 print(f"\nWriting HTML '{str(output_path)}'") 71 write_html(html, str(output_path), pretty=pretty) 72 73 return True
def
patch(doc, output_path, patch_file=None) -> bool:
76def patch(doc, output_path, patch_file=None) -> bool: 77 if patch_file is None: 78 from . import data 79 # First try the patch file for the specific version 80 patch_file = f"{doc.name}.patch" 81 if not pkg_file_exists(data, patch_file): 82 # Then try the patch file shared between versions 83 patch_file = f"{doc.name.split('-')[0]}.patch" 84 if not pkg_file_exists(data, patch_file): 85 return True 86 return pkg_apply_patch(data, patch_file, output_path) 87 return apply_patch(patch_file, output_path)