modm_data.pdf2html.cell

  1# Copyright 2022, Niklas Hauser
  2# SPDX-License-Identifier: MPL-2.0
  3
  4from functools import cached_property
  5from anytree import Node
  6from dataclasses import dataclass
  7from ..utils import Rectangle, Point
  8from .line import CharLine
  9
 10
 11@dataclass
 12class Borders:
 13    """The four borders of a `Cell`"""
 14
 15    left: bool
 16    bottom: bool
 17    right: bool
 18    top: bool
 19
 20
 21class Cell:
 22    def __init__(self, table, position: Point, bbox: Rectangle, borders: Borders, is_simple: bool = False):
 23        self._table = table
 24        self._bboxes = [bbox]
 25        self._is_simple = is_simple
 26        self.borders: Borders = borders
 27        """Borders of the cell"""
 28        self.positions: list[Point] = [position]
 29        """Index positions of the cell"""
 30        self.is_header: bool = False
 31        """Is this cell a header?"""
 32
 33    def _merge(self, other):
 34        self.positions.extend(other.positions)
 35        self.positions.sort()
 36        self._bboxes.append(other.bbox)
 37        self._invalidate()
 38
 39    def _move(self, x, y):
 40        self.positions = [(py + y, px + x) for (py, px) in self.positions]
 41        self.positions.sort()
 42        self._invalidate()
 43
 44    def _expand(self, dx, dy):
 45        ymax, xmax = self.positions[-1]
 46        for yi in range(ymax, ymax + dy + 1):
 47            for xi in range(xmax, xmax + dx + 1):
 48                self.positions.append((yi, xi))
 49        self.positions.sort()
 50        self._invalidate()
 51
 52    def _invalidate(self):
 53        for key, value in self.__class__.__dict__.items():
 54            if isinstance(value, cached_property):
 55                self.__dict__.pop(key, None)
 56
 57    @cached_property
 58    def x(self) -> int:
 59        """The horizontal position of the cell."""
 60        return self.positions[0][1]
 61
 62    @cached_property
 63    def y(self) -> int:
 64        """The vertical position of the cell."""
 65        return self.positions[0][0]
 66
 67    @cached_property
 68    def xspan(self) -> int:
 69        """The horizontal span of the cell."""
 70        return self.positions[-1][1] - self.positions[0][1] + 1
 71
 72    @cached_property
 73    def yspan(self) -> int:
 74        """The vertical span of the cell."""
 75        return self.positions[-1][0] - self.positions[0][0] + 1
 76
 77    @cached_property
 78    def rotation(self) -> int:
 79        """The rotation of the cell text."""
 80        if not self.lines:
 81            return 0
 82        return self.lines[0].rotation
 83
 84    @cached_property
 85    def bbox(self) -> Rectangle:
 86        """The tight bounding box of this cell."""
 87        return Rectangle(
 88            min(bbox.left for bbox in self._bboxes),
 89            min(bbox.bottom for bbox in self._bboxes),
 90            max(bbox.right for bbox in self._bboxes),
 91            max(bbox.top for bbox in self._bboxes),
 92        )
 93
 94    @cached_property
 95    def lines(self) -> list[CharLine]:
 96        """The character lines in this cell."""
 97        return self._table._page.charlines_in_area(self.bbox)
 98
 99    @cached_property
100    def content(self):
101        """The concatenated text content of the table cell."""
102        return "".join(c.char for line in self.lines for c in line.chars)
103
104    @cached_property
105    def is_left_aligned(self) -> bool:
106        """Is the text in the cell left aligned?"""
107        x_em = self._table._page._spacing["x_em"]
108        for line in self.lines:
109            if (line.bbox.left - self.bbox.left + x_em) < (self.bbox.right - line.bbox.right):
110                return True
111        return False
112
113    @cached_property
114    def ast(self) -> Node:
115        """The abstract syntax tree of the cell without graphics."""
116        ast = self._table._page.ast_in_area(
117            self.bbox, with_graphics=False, ignore_xpos=not self.is_left_aligned, with_bits=False, with_notes=False
118        )
119        ast.name = "cell"
120        return ast
121
122    def __repr__(self) -> str:
123        positions = ",".join(f"({p[1]},{p[0]})" for p in self.positions)
124        borders = ""
125        if self.borders.left:
126            borders += "["
127        if self.borders.bottom:
128            borders += "_"
129        if self.borders.top:
130            borders += "^"
131        if self.borders.right:
132            borders += "]"
133        start = "CellH" if self.is_header else "Cell"
134        return start + f"[{positions}] {borders}"
@dataclass
class Borders:
12@dataclass
13class Borders:
14    """The four borders of a `Cell`"""
15
16    left: bool
17    bottom: bool
18    right: bool
19    top: bool

The four borders of a Cell

Borders(left: bool, bottom: bool, right: bool, top: bool)
left: bool
bottom: bool
right: bool
top: bool
class Cell:
 22class Cell:
 23    def __init__(self, table, position: Point, bbox: Rectangle, borders: Borders, is_simple: bool = False):
 24        self._table = table
 25        self._bboxes = [bbox]
 26        self._is_simple = is_simple
 27        self.borders: Borders = borders
 28        """Borders of the cell"""
 29        self.positions: list[Point] = [position]
 30        """Index positions of the cell"""
 31        self.is_header: bool = False
 32        """Is this cell a header?"""
 33
 34    def _merge(self, other):
 35        self.positions.extend(other.positions)
 36        self.positions.sort()
 37        self._bboxes.append(other.bbox)
 38        self._invalidate()
 39
 40    def _move(self, x, y):
 41        self.positions = [(py + y, px + x) for (py, px) in self.positions]
 42        self.positions.sort()
 43        self._invalidate()
 44
 45    def _expand(self, dx, dy):
 46        ymax, xmax = self.positions[-1]
 47        for yi in range(ymax, ymax + dy + 1):
 48            for xi in range(xmax, xmax + dx + 1):
 49                self.positions.append((yi, xi))
 50        self.positions.sort()
 51        self._invalidate()
 52
 53    def _invalidate(self):
 54        for key, value in self.__class__.__dict__.items():
 55            if isinstance(value, cached_property):
 56                self.__dict__.pop(key, None)
 57
 58    @cached_property
 59    def x(self) -> int:
 60        """The horizontal position of the cell."""
 61        return self.positions[0][1]
 62
 63    @cached_property
 64    def y(self) -> int:
 65        """The vertical position of the cell."""
 66        return self.positions[0][0]
 67
 68    @cached_property
 69    def xspan(self) -> int:
 70        """The horizontal span of the cell."""
 71        return self.positions[-1][1] - self.positions[0][1] + 1
 72
 73    @cached_property
 74    def yspan(self) -> int:
 75        """The vertical span of the cell."""
 76        return self.positions[-1][0] - self.positions[0][0] + 1
 77
 78    @cached_property
 79    def rotation(self) -> int:
 80        """The rotation of the cell text."""
 81        if not self.lines:
 82            return 0
 83        return self.lines[0].rotation
 84
 85    @cached_property
 86    def bbox(self) -> Rectangle:
 87        """The tight bounding box of this cell."""
 88        return Rectangle(
 89            min(bbox.left for bbox in self._bboxes),
 90            min(bbox.bottom for bbox in self._bboxes),
 91            max(bbox.right for bbox in self._bboxes),
 92            max(bbox.top for bbox in self._bboxes),
 93        )
 94
 95    @cached_property
 96    def lines(self) -> list[CharLine]:
 97        """The character lines in this cell."""
 98        return self._table._page.charlines_in_area(self.bbox)
 99
100    @cached_property
101    def content(self):
102        """The concatenated text content of the table cell."""
103        return "".join(c.char for line in self.lines for c in line.chars)
104
105    @cached_property
106    def is_left_aligned(self) -> bool:
107        """Is the text in the cell left aligned?"""
108        x_em = self._table._page._spacing["x_em"]
109        for line in self.lines:
110            if (line.bbox.left - self.bbox.left + x_em) < (self.bbox.right - line.bbox.right):
111                return True
112        return False
113
114    @cached_property
115    def ast(self) -> Node:
116        """The abstract syntax tree of the cell without graphics."""
117        ast = self._table._page.ast_in_area(
118            self.bbox, with_graphics=False, ignore_xpos=not self.is_left_aligned, with_bits=False, with_notes=False
119        )
120        ast.name = "cell"
121        return ast
122
123    def __repr__(self) -> str:
124        positions = ",".join(f"({p[1]},{p[0]})" for p in self.positions)
125        borders = ""
126        if self.borders.left:
127            borders += "["
128        if self.borders.bottom:
129            borders += "_"
130        if self.borders.top:
131            borders += "^"
132        if self.borders.right:
133            borders += "]"
134        start = "CellH" if self.is_header else "Cell"
135        return start + f"[{positions}] {borders}"
Cell( table, position: modm_data.utils.Point, bbox: modm_data.utils.Rectangle, borders: Borders, is_simple: bool = False)
23    def __init__(self, table, position: Point, bbox: Rectangle, borders: Borders, is_simple: bool = False):
24        self._table = table
25        self._bboxes = [bbox]
26        self._is_simple = is_simple
27        self.borders: Borders = borders
28        """Borders of the cell"""
29        self.positions: list[Point] = [position]
30        """Index positions of the cell"""
31        self.is_header: bool = False
32        """Is this cell a header?"""
borders: Borders

Borders of the cell

positions: list[modm_data.utils.Point]

Index positions of the cell

is_header: bool

Is this cell a header?

x: int
58    @cached_property
59    def x(self) -> int:
60        """The horizontal position of the cell."""
61        return self.positions[0][1]

The horizontal position of the cell.

y: int
63    @cached_property
64    def y(self) -> int:
65        """The vertical position of the cell."""
66        return self.positions[0][0]

The vertical position of the cell.

xspan: int
68    @cached_property
69    def xspan(self) -> int:
70        """The horizontal span of the cell."""
71        return self.positions[-1][1] - self.positions[0][1] + 1

The horizontal span of the cell.

yspan: int
73    @cached_property
74    def yspan(self) -> int:
75        """The vertical span of the cell."""
76        return self.positions[-1][0] - self.positions[0][0] + 1

The vertical span of the cell.

rotation: int
78    @cached_property
79    def rotation(self) -> int:
80        """The rotation of the cell text."""
81        if not self.lines:
82            return 0
83        return self.lines[0].rotation

The rotation of the cell text.

bbox: modm_data.utils.Rectangle
85    @cached_property
86    def bbox(self) -> Rectangle:
87        """The tight bounding box of this cell."""
88        return Rectangle(
89            min(bbox.left for bbox in self._bboxes),
90            min(bbox.bottom for bbox in self._bboxes),
91            max(bbox.right for bbox in self._bboxes),
92            max(bbox.top for bbox in self._bboxes),
93        )

The tight bounding box of this cell.

lines: list[modm_data.pdf2html.line.CharLine]
95    @cached_property
96    def lines(self) -> list[CharLine]:
97        """The character lines in this cell."""
98        return self._table._page.charlines_in_area(self.bbox)

The character lines in this cell.

content
100    @cached_property
101    def content(self):
102        """The concatenated text content of the table cell."""
103        return "".join(c.char for line in self.lines for c in line.chars)

The concatenated text content of the table cell.

is_left_aligned: bool
105    @cached_property
106    def is_left_aligned(self) -> bool:
107        """Is the text in the cell left aligned?"""
108        x_em = self._table._page._spacing["x_em"]
109        for line in self.lines:
110            if (line.bbox.left - self.bbox.left + x_em) < (self.bbox.right - line.bbox.right):
111                return True
112        return False

Is the text in the cell left aligned?

ast: anytree.node.node.Node
114    @cached_property
115    def ast(self) -> Node:
116        """The abstract syntax tree of the cell without graphics."""
117        ast = self._table._page.ast_in_area(
118            self.bbox, with_graphics=False, ignore_xpos=not self.is_left_aligned, with_bits=False, with_notes=False
119        )
120        ast.name = "cell"
121        return ast

The abstract syntax tree of the cell without graphics.