modm_data.pdf2html.cell
1# Copyright 2022, Niklas Hauser 2# SPDX-License-Identifier: MPL-2.0 3 4from functools import cached_property 5from anytree import Node 6from dataclasses import dataclass 7from ..utils import Rectangle, Point 8from .line import CharLine 9 10 11@dataclass 12class Borders: 13 """The four borders of a `Cell`""" 14 15 left: bool 16 bottom: bool 17 right: bool 18 top: bool 19 20 21class Cell: 22 def __init__(self, table, position: Point, bbox: Rectangle, borders: Borders, is_simple: bool = False): 23 self._table = table 24 self._bboxes = [bbox] 25 self._is_simple = is_simple 26 self.borders: Borders = borders 27 """Borders of the cell""" 28 self.positions: list[Point] = [position] 29 """Index positions of the cell""" 30 self.is_header: bool = False 31 """Is this cell a header?""" 32 33 def _merge(self, other): 34 self.positions.extend(other.positions) 35 self.positions.sort() 36 self._bboxes.append(other.bbox) 37 self._invalidate() 38 39 def _move(self, x, y): 40 self.positions = [(py + y, px + x) for (py, px) in self.positions] 41 self.positions.sort() 42 self._invalidate() 43 44 def _expand(self, dx, dy): 45 ymax, xmax = self.positions[-1] 46 for yi in range(ymax, ymax + dy + 1): 47 for xi in range(xmax, xmax + dx + 1): 48 self.positions.append((yi, xi)) 49 self.positions.sort() 50 self._invalidate() 51 52 def _invalidate(self): 53 for key, value in self.__class__.__dict__.items(): 54 if isinstance(value, cached_property): 55 self.__dict__.pop(key, None) 56 57 @cached_property 58 def x(self) -> int: 59 """The horizontal position of the cell.""" 60 return self.positions[0][1] 61 62 @cached_property 63 def y(self) -> int: 64 """The vertical position of the cell.""" 65 return self.positions[0][0] 66 67 @cached_property 68 def xspan(self) -> int: 69 """The horizontal span of the cell.""" 70 return self.positions[-1][1] - self.positions[0][1] + 1 71 72 @cached_property 73 def yspan(self) -> int: 74 """The vertical span of the cell.""" 75 return self.positions[-1][0] - self.positions[0][0] + 1 76 77 @cached_property 78 def rotation(self) -> int: 79 """The rotation of the cell text.""" 80 if not self.lines: 81 return 0 82 return self.lines[0].rotation 83 84 @cached_property 85 def bbox(self) -> Rectangle: 86 """The tight bounding box of this cell.""" 87 return Rectangle( 88 min(bbox.left for bbox in self._bboxes), 89 min(bbox.bottom for bbox in self._bboxes), 90 max(bbox.right for bbox in self._bboxes), 91 max(bbox.top for bbox in self._bboxes), 92 ) 93 94 @cached_property 95 def lines(self) -> list[CharLine]: 96 """The character lines in this cell.""" 97 return self._table._page.charlines_in_area(self.bbox) 98 99 @cached_property 100 def content(self): 101 """The concatenated text content of the table cell.""" 102 return "".join(c.char for line in self.lines for c in line.chars) 103 104 @cached_property 105 def is_left_aligned(self) -> bool: 106 """Is the text in the cell left aligned?""" 107 x_em = self._table._page._spacing["x_em"] 108 for line in self.lines: 109 if (line.bbox.left - self.bbox.left + x_em) < (self.bbox.right - line.bbox.right): 110 return True 111 return False 112 113 @cached_property 114 def ast(self) -> Node: 115 """The abstract syntax tree of the cell without graphics.""" 116 ast = self._table._page.ast_in_area( 117 self.bbox, with_graphics=False, ignore_xpos=not self.is_left_aligned, with_bits=False, with_notes=False 118 ) 119 ast.name = "cell" 120 return ast 121 122 def __repr__(self) -> str: 123 positions = ",".join(f"({p[1]},{p[0]})" for p in self.positions) 124 borders = "" 125 if self.borders.left: 126 borders += "[" 127 if self.borders.bottom: 128 borders += "_" 129 if self.borders.top: 130 borders += "^" 131 if self.borders.right: 132 borders += "]" 133 start = "CellH" if self.is_header else "Cell" 134 return start + f"[{positions}] {borders}"
@dataclass
class
Borders:
12@dataclass 13class Borders: 14 """The four borders of a `Cell`""" 15 16 left: bool 17 bottom: bool 18 right: bool 19 top: bool
The four borders of a Cell
class
Cell:
22class Cell: 23 def __init__(self, table, position: Point, bbox: Rectangle, borders: Borders, is_simple: bool = False): 24 self._table = table 25 self._bboxes = [bbox] 26 self._is_simple = is_simple 27 self.borders: Borders = borders 28 """Borders of the cell""" 29 self.positions: list[Point] = [position] 30 """Index positions of the cell""" 31 self.is_header: bool = False 32 """Is this cell a header?""" 33 34 def _merge(self, other): 35 self.positions.extend(other.positions) 36 self.positions.sort() 37 self._bboxes.append(other.bbox) 38 self._invalidate() 39 40 def _move(self, x, y): 41 self.positions = [(py + y, px + x) for (py, px) in self.positions] 42 self.positions.sort() 43 self._invalidate() 44 45 def _expand(self, dx, dy): 46 ymax, xmax = self.positions[-1] 47 for yi in range(ymax, ymax + dy + 1): 48 for xi in range(xmax, xmax + dx + 1): 49 self.positions.append((yi, xi)) 50 self.positions.sort() 51 self._invalidate() 52 53 def _invalidate(self): 54 for key, value in self.__class__.__dict__.items(): 55 if isinstance(value, cached_property): 56 self.__dict__.pop(key, None) 57 58 @cached_property 59 def x(self) -> int: 60 """The horizontal position of the cell.""" 61 return self.positions[0][1] 62 63 @cached_property 64 def y(self) -> int: 65 """The vertical position of the cell.""" 66 return self.positions[0][0] 67 68 @cached_property 69 def xspan(self) -> int: 70 """The horizontal span of the cell.""" 71 return self.positions[-1][1] - self.positions[0][1] + 1 72 73 @cached_property 74 def yspan(self) -> int: 75 """The vertical span of the cell.""" 76 return self.positions[-1][0] - self.positions[0][0] + 1 77 78 @cached_property 79 def rotation(self) -> int: 80 """The rotation of the cell text.""" 81 if not self.lines: 82 return 0 83 return self.lines[0].rotation 84 85 @cached_property 86 def bbox(self) -> Rectangle: 87 """The tight bounding box of this cell.""" 88 return Rectangle( 89 min(bbox.left for bbox in self._bboxes), 90 min(bbox.bottom for bbox in self._bboxes), 91 max(bbox.right for bbox in self._bboxes), 92 max(bbox.top for bbox in self._bboxes), 93 ) 94 95 @cached_property 96 def lines(self) -> list[CharLine]: 97 """The character lines in this cell.""" 98 return self._table._page.charlines_in_area(self.bbox) 99 100 @cached_property 101 def content(self): 102 """The concatenated text content of the table cell.""" 103 return "".join(c.char for line in self.lines for c in line.chars) 104 105 @cached_property 106 def is_left_aligned(self) -> bool: 107 """Is the text in the cell left aligned?""" 108 x_em = self._table._page._spacing["x_em"] 109 for line in self.lines: 110 if (line.bbox.left - self.bbox.left + x_em) < (self.bbox.right - line.bbox.right): 111 return True 112 return False 113 114 @cached_property 115 def ast(self) -> Node: 116 """The abstract syntax tree of the cell without graphics.""" 117 ast = self._table._page.ast_in_area( 118 self.bbox, with_graphics=False, ignore_xpos=not self.is_left_aligned, with_bits=False, with_notes=False 119 ) 120 ast.name = "cell" 121 return ast 122 123 def __repr__(self) -> str: 124 positions = ",".join(f"({p[1]},{p[0]})" for p in self.positions) 125 borders = "" 126 if self.borders.left: 127 borders += "[" 128 if self.borders.bottom: 129 borders += "_" 130 if self.borders.top: 131 borders += "^" 132 if self.borders.right: 133 borders += "]" 134 start = "CellH" if self.is_header else "Cell" 135 return start + f"[{positions}] {borders}"
Cell( table, position: modm_data.utils.Point, bbox: modm_data.utils.Rectangle, borders: Borders, is_simple: bool = False)
23 def __init__(self, table, position: Point, bbox: Rectangle, borders: Borders, is_simple: bool = False): 24 self._table = table 25 self._bboxes = [bbox] 26 self._is_simple = is_simple 27 self.borders: Borders = borders 28 """Borders of the cell""" 29 self.positions: list[Point] = [position] 30 """Index positions of the cell""" 31 self.is_header: bool = False 32 """Is this cell a header?"""
x: int
58 @cached_property 59 def x(self) -> int: 60 """The horizontal position of the cell.""" 61 return self.positions[0][1]
The horizontal position of the cell.
y: int
63 @cached_property 64 def y(self) -> int: 65 """The vertical position of the cell.""" 66 return self.positions[0][0]
The vertical position of the cell.
xspan: int
68 @cached_property 69 def xspan(self) -> int: 70 """The horizontal span of the cell.""" 71 return self.positions[-1][1] - self.positions[0][1] + 1
The horizontal span of the cell.
yspan: int
73 @cached_property 74 def yspan(self) -> int: 75 """The vertical span of the cell.""" 76 return self.positions[-1][0] - self.positions[0][0] + 1
The vertical span of the cell.
rotation: int
78 @cached_property 79 def rotation(self) -> int: 80 """The rotation of the cell text.""" 81 if not self.lines: 82 return 0 83 return self.lines[0].rotation
The rotation of the cell text.
85 @cached_property 86 def bbox(self) -> Rectangle: 87 """The tight bounding box of this cell.""" 88 return Rectangle( 89 min(bbox.left for bbox in self._bboxes), 90 min(bbox.bottom for bbox in self._bboxes), 91 max(bbox.right for bbox in self._bboxes), 92 max(bbox.top for bbox in self._bboxes), 93 )
The tight bounding box of this cell.
lines: list[modm_data.pdf2html.line.CharLine]
95 @cached_property 96 def lines(self) -> list[CharLine]: 97 """The character lines in this cell.""" 98 return self._table._page.charlines_in_area(self.bbox)
The character lines in this cell.
content
100 @cached_property 101 def content(self): 102 """The concatenated text content of the table cell.""" 103 return "".join(c.char for line in self.lines for c in line.chars)
The concatenated text content of the table cell.
is_left_aligned: bool
105 @cached_property 106 def is_left_aligned(self) -> bool: 107 """Is the text in the cell left aligned?""" 108 x_em = self._table._page._spacing["x_em"] 109 for line in self.lines: 110 if (line.bbox.left - self.bbox.left + x_em) < (self.bbox.right - line.bbox.right): 111 return True 112 return False
Is the text in the cell left aligned?
ast: anytree.node.node.Node
114 @cached_property 115 def ast(self) -> Node: 116 """The abstract syntax tree of the cell without graphics.""" 117 ast = self._table._page.ast_in_area( 118 self.bbox, with_graphics=False, ignore_xpos=not self.is_left_aligned, with_bits=False, with_notes=False 119 ) 120 ast.name = "cell" 121 return ast
The abstract syntax tree of the cell without graphics.