modm_data.pdf2html.render

 1# Copyright 2022, Niklas Hauser
 2# SPDX-License-Identifier: MPL-2.0
 3
 4import math
 5import pypdfium2 as pp
 6from ..utils import VLine, HLine
 7from ..pdf.render import render_page_pdf as pdf_render_page_pdf
 8from ..pdf.render import _vline, _hline, _line, _rect
 9
10
11
12def render_page_pdf(doc, page, new_doc = None, index = 0):
13    """
14    Test doc string
15
16    :param doc: PDF document
17    :param page: PDF page
18    :param new_doc: Empty PDF document to copy debug renders to
19    """
20    new_doc = pdf_render_page_pdf(doc, page, new_doc, index)
21    # return new_doc
22    new_page = pp.raw.FPDF_LoadPage(new_doc, index)
23    rotation = page.rotation
24    width, height = page.width, page.height
25
26    if False:
27        for ii in range(20):
28            _vline(new_page, rotation, page.width * ii / 20, 0, page.height, width=1, stroke="black")
29            _hline(new_page, rotation, page.height * ii / 20, 0, page.width, width=1, stroke="black")
30
31    # for name, distance in page._spacing.items():
32    #     if name.startswith("x_"):
33    #         _vline(new_page, rotation, distance, 0, page.height, width=0.5, stroke=0xFFA500)
34    #     else:
35    #         _hline(new_page, rotation, distance, 0, page.width, width=0.5, stroke=0xFFA500)
36
37    for name, area in page._areas.items():
38        if isinstance(area, list):
39            for rect in area:
40                _rect(new_page, rotation, rect, width=0.5, stroke=0xFFA500)
41        else:
42            _rect(new_page, rotation, area, width=0.5, stroke=0xFFA500)
43
44    for obj in page.content_graphics:
45        if obj.cbbox is not None:
46            _rect(new_page, rotation, obj.cbbox, width=2, stroke=0x9ACD32)
47        if obj.bbox is not None:
48            _rect(new_page, rotation, obj.bbox, width=2, stroke=0x00ff00)
49
50    for table in page.content_tables:
51        _rect(new_page, rotation, table.bbox, width=1.5, stroke=0x0000ff)
52
53        for lines in table._xgrid.values():
54            for line in lines:
55                _line(new_page, rotation, line, width=0.75, stroke=0x0000ff)
56        for lines in table._ygrid.values():
57            for line in lines:
58                _line(new_page, rotation, line, width=0.75, stroke=0x0000ff)
59
60        for cell in table.cells:
61            for line in cell.lines:
62                for cluster in line.clusters():
63                    _rect(new_page, rotation, cluster.bbox, width=0.33, stroke=0x808080)
64            if cell.b.l:
65                _vline(new_page, rotation, cell.bbox.left, cell.bbox.bottom, cell.bbox.top,
66                       width=cell.b.l, stroke=0xff0000)
67            if cell.b.r:
68                _vline(new_page, rotation, cell.bbox.right, cell.bbox.bottom, cell.bbox.top,
69                       width=cell.b.r, stroke=0x0000ff)
70            if cell.b.b:
71                _hline(new_page, rotation, cell.bbox.bottom, cell.bbox.left, cell.bbox.right,
72                       width=cell.b.b, stroke=0x00ff00)
73            if cell.b.t:
74                _hline(new_page, rotation, cell.bbox.top, cell.bbox.left, cell.bbox.right,
75                       width=cell.b.t, stroke=0x808080)
76
77    assert pp.raw.FPDFPage_GenerateContent(new_page)
78    pp.raw.FPDF_ClosePage(new_page)
79    return new_doc
def render_page_pdf(doc, page, new_doc=None, index=0):
13def render_page_pdf(doc, page, new_doc = None, index = 0):
14    """
15    Test doc string
16
17    :param doc: PDF document
18    :param page: PDF page
19    :param new_doc: Empty PDF document to copy debug renders to
20    """
21    new_doc = pdf_render_page_pdf(doc, page, new_doc, index)
22    # return new_doc
23    new_page = pp.raw.FPDF_LoadPage(new_doc, index)
24    rotation = page.rotation
25    width, height = page.width, page.height
26
27    if False:
28        for ii in range(20):
29            _vline(new_page, rotation, page.width * ii / 20, 0, page.height, width=1, stroke="black")
30            _hline(new_page, rotation, page.height * ii / 20, 0, page.width, width=1, stroke="black")
31
32    # for name, distance in page._spacing.items():
33    #     if name.startswith("x_"):
34    #         _vline(new_page, rotation, distance, 0, page.height, width=0.5, stroke=0xFFA500)
35    #     else:
36    #         _hline(new_page, rotation, distance, 0, page.width, width=0.5, stroke=0xFFA500)
37
38    for name, area in page._areas.items():
39        if isinstance(area, list):
40            for rect in area:
41                _rect(new_page, rotation, rect, width=0.5, stroke=0xFFA500)
42        else:
43            _rect(new_page, rotation, area, width=0.5, stroke=0xFFA500)
44
45    for obj in page.content_graphics:
46        if obj.cbbox is not None:
47            _rect(new_page, rotation, obj.cbbox, width=2, stroke=0x9ACD32)
48        if obj.bbox is not None:
49            _rect(new_page, rotation, obj.bbox, width=2, stroke=0x00ff00)
50
51    for table in page.content_tables:
52        _rect(new_page, rotation, table.bbox, width=1.5, stroke=0x0000ff)
53
54        for lines in table._xgrid.values():
55            for line in lines:
56                _line(new_page, rotation, line, width=0.75, stroke=0x0000ff)
57        for lines in table._ygrid.values():
58            for line in lines:
59                _line(new_page, rotation, line, width=0.75, stroke=0x0000ff)
60
61        for cell in table.cells:
62            for line in cell.lines:
63                for cluster in line.clusters():
64                    _rect(new_page, rotation, cluster.bbox, width=0.33, stroke=0x808080)
65            if cell.b.l:
66                _vline(new_page, rotation, cell.bbox.left, cell.bbox.bottom, cell.bbox.top,
67                       width=cell.b.l, stroke=0xff0000)
68            if cell.b.r:
69                _vline(new_page, rotation, cell.bbox.right, cell.bbox.bottom, cell.bbox.top,
70                       width=cell.b.r, stroke=0x0000ff)
71            if cell.b.b:
72                _hline(new_page, rotation, cell.bbox.bottom, cell.bbox.left, cell.bbox.right,
73                       width=cell.b.b, stroke=0x00ff00)
74            if cell.b.t:
75                _hline(new_page, rotation, cell.bbox.top, cell.bbox.left, cell.bbox.right,
76                       width=cell.b.t, stroke=0x808080)
77
78    assert pp.raw.FPDFPage_GenerateContent(new_page)
79    pp.raw.FPDF_ClosePage(new_page)
80    return new_doc

Test doc string

Parameters
  • doc: PDF document
  • page: PDF page
  • new_doc: Empty PDF document to copy debug renders to