
  1# Copyright 2022, Niklas Hauser
  2# SPDX-License-Identifier: MPL-2.0
  4import math
  5from enum import Enum
  6from functools import cached_property
  8import pypdfium2 as pp
  9# PDF User Cordinate System is mathematical: x *right*, y *upwards*
 12class Point:
 13    def __init__(self, *xy, type: Enum = None):
 14        if isinstance(xy[0], tuple):
 15            self.x = xy[0][0]
 16            self.y = xy[0][1]
 17        else:
 18            self.x = xy[0]
 19            self.y = xy[1]
 20        self.type = type
 22    def isclose(self, other, rtol: float = 1e-09, atol: float = 0.0) -> bool:
 23        return (math.isclose(self.x, other.x, rel_tol=rtol, abs_tol=atol) and
 24                math.isclose(self.y, other.y, rel_tol=rtol, abs_tol=atol))
 26    def distance_squared(self, other) -> float:
 27        return math.pow(self.x - other.x, 2) + math.pow(self.y - other.y, 2)
 29    def distance(self, other) -> float:
 30        return math.sqrt(self.distance_squared(other))
 32    def __neg__(self):
 33        return Point(-self.x, -self.y)
 35    def __hash__(self):
 36        return hash(f"{self.x} {self.y}")
 38    def __repr__(self) -> str:
 39        x = f"{self.x:.1f}" if isinstance(self.x, float) else self.x
 40        y = f"{self.y:.1f}" if isinstance(self.y, float) else self.y
 41        out = [x, y] if self.type is None else [x, y,]
 42        return f"({','.join(out)})"
 45class Line:
 46    class Direction(Enum):
 47        ANGLE = 0
 48        VERTICAL = 1
 49        HORIZONTAL = 2
 51    def __init__(self, *r, width: float = None, type: Enum = None):
 52        if isinstance(r[0], Rectangle):
 53            self.p0 = r[0].p0
 54            self.p1 = r[0].p1
 55        elif isinstance(r[0], Point):
 56            self.p0 = r[0]
 57            self.p1 = r[1]
 58        elif isinstance(r[0], tuple):
 59            self.p0 = Point(r[0][0], r[0][1])
 60            self.p1 = Point(r[1][0], r[1][1])
 61        else:
 62            self.p0 = Point(r[0], r[1])
 63            self.p1 = Point(r[2], r[3])
 65        self.width = 0.1 if width is None else width
 66        self.type = type
 68    @cached_property
 69    def bbox(self):
 70        return Rectangle(min(self.p0.x, self.p1.x),
 71                         min(self.p0.y, self.p1.y),
 72                         max(self.p0.x, self.p1.x),
 73                         max(self.p0.y, self.p1.y))
 75    def isclose(self, other, rtol: float = 1e-09, atol: float = 0.0) -> bool:
 76        return (self.p0.isclose(other.p0, rtol, atol) and
 77                self.p1.isclose(other.p1, rtol, atol))
 79    def contains(self, point, atol: float = 0.0) -> bool:
 80        # if the point lies on the line (A-C---B), the distance A-C + C-B = A-B
 81        ac = self.p0.distance_squared(point)
 82        cb = self.p1.distance_squared(point)
 83        ab = self.p0.distance_squared(self.p1)
 84        return (ac + cb + math.pow(atol, 2)) <= ab
 86    @property
 87    def direction(self):
 88        if math.isclose(self.p0.x, self.p1.x):
 89            return Line.Direction.VERTICAL
 90        if math.isclose(self.p0.y, self.p1.y):
 91            return Line.Direction.HORIZONTAL
 92        return Line.Direction.ANGLE
 94    def specialize(self):
 95        if self.direction == Line.Direction.VERTICAL:
 96            return VLine(self.p0.x, self.p0.y, self.p1.y, self.width)
 97        if self.direction == Line.Direction.HORIZONTAL:
 98            return HLine(self.p0.y, self.p0.x, self.p1.x, self.width)
 99        return Line(self.p0, self.p1, width=self.width)
101    def __repr__(self) -> str:
102        data = [repr(self.p0), repr(self.p1)]
103        if self.width: data += [f"{self.width:.1f}"]
104        if self.type is not None: data += []
105        return f"<{','.join(data)}>"
108class VLine(Line):
109    def __init__(self, x: float, y0: float, y1: float, width: float = None):
110        if y0 > y1: y0, y1 = y1, y0
111        super().__init__(Point(x, y0), Point(x, y1), width=width)
112        self.length = y1 - y0
114    @property
115    def direction(self):
116        return Line.Direction.VERTICAL
118    def __repr__(self) -> str:
119        x = f"{self.p0.x:.1f}" if isinstance(self.p0.x, float) else self.p0.x
120        y0 = f"{self.p0.y:.1f}" if isinstance(self.p0.y, float) else self.p0.y
121        y1 = f"{self.p1.y:.1f}" if isinstance(self.p1.y, float) else self.p1.y
122        out = f"<X{x}:{y0},{y1}"
123        if self.width: out += f"|{self.width:.1f}"
124        return out + ">"
127class HLine(Line):
128    def __init__(self, y: float, x0: float, x1: float, width: float = None):
129        if x0 > x1: x0, x1 = x1, x0
130        super().__init__(Point(x0, y), Point(x1, y), width=width)
131        self.length = x1 - x0
133    @property
134    def direction(self):
135        return Line.Direction.HORIZONTAL
137    def __repr__(self) -> str:
138        y = f"{self.p0.y:.1f}" if isinstance(self.p0.y, float) else self.p0.y
139        x0 = f"{self.p0.x:.1f}" if isinstance(self.p0.x, float) else self.p0.x
140        x1 = f"{self.p1.x:.1f}" if isinstance(self.p1.x, float) else self.p1.x
141        out = f"<Y{y}:{x0},{x1}"
142        if self.width: out += f"|{self.width:.1f}"
143        return out + ">"
146class Rectangle:
147    def __init__(self, *r):
148        # P0 is left, bottom
149        # P1 is right, top
150        if isinstance(r[0], pp.raw.FS_RECTF):
151            self.p0 = Point(r[0].left, r[0].bottom)
152            self.p1 = Point(r[0].right, r[0].top)
153        elif isinstance(r[0], Point):
154            self.p0 = r[0]
155            self.p1 = r[1]
156        elif isinstance(r[0], tuple):
157            self.p0 = Point(r[0][0], r[0][1])
158            self.p1 = Point(r[1][0], r[1][1])
159        else:
160            self.p0 = Point(r[0], r[1])
161            self.p1 = Point(r[2], r[3])
163        # Ensure the correct ordering of point values
164        if self.p0.x > self.p1.x:
165            self.p0.x, self.p1.x = self.p1.x, self.p0.x
166        if self.p0.y > self.p1.y:
167            self.p0.y, self.p1.y = self.p1.y, self.p0.y
169        # assert self.p0.x <= self.p1.x
170        # assert self.p0.y <= self.p1.y
172        self.x = self.p0.x
173        self.y = self.p0.y
174        self.left = self.p0.x
175        self.bottom = self.p0.y
177        self.right = self.p1.x
178 = self.p1.y
180        self.width = self.p1.x - self.p0.x
181        self.height = self.p1.y - self.p0.y
183    def contains(self, other) -> bool:
184        if isinstance(other, Point):
185            return (self.bottom <= other.y <= and
186                    self.left <= other.x <= self.right)
187        # Comparing y-axis first may be faster for "content areas filtering"
188        # when doing subparsing of page content (like in tables)
189        return (self.bottom <= other.bottom and <= and
190                self.left <= other.left and other.right <= self.right)
192    def overlaps(self, other) -> bool:
193        return self.contains(other.p0) or self.contains(other.p1)
195    def isclose(self, other, rtol: float = 1e-09, atol: float = 0.0) -> bool:
196        return (self.p0.isclose(other.p0, rtol, atol) and
197                self.p1.isclose(other.p1, rtol, atol))
199    @cached_property
200    def midpoint(self) -> Point:
201        return Point((self.p1.x + self.p0.x) / 2, (self.p1.y + self.p0.y) / 2)
203    @cached_property
204    def points(self) -> list[Point]:
205        return [self.p0, Point(self.right, self.bottom),
206                self.p1, Point(self.left,]
208    def offset(self, offset):
209        return Rectangle(self.p0.x - offset, self.p0.y - offset,
210                         self.p1.x + offset, self.p1.y + offset)
212    def offset_x(self, offset):
213        return Rectangle(self.p0.x - offset, self.p0.y,
214                         self.p1.x + offset, self.p1.y)
216    def offset_y(self, offset):
217        return Rectangle(self.p0.x, self.p0.y - offset,
218                         self.p1.x, self.p1.y + offset)
220    def translated(self, point):
221        return Rectangle(self.p0.x + point.x, self.p0.y + point.y,
222                         self.p1.x + point.x, self.p1.y + point.y)
224    def rotated(self, rotation):
225        cos = math.cos(math.radians(rotation))
226        sin = math.sin(math.radians(rotation))
227        return Rectangle(self.p0.x * cos - self.p0.y * sin,
228                         self.p0.x * sin + self.p0.y * cos,
229                         self.p1.x * cos - self.p1.y * sin,
230                         self.p1.x * sin + self.p1.y * cos)
232    def joined(self, other):
233        return Rectangle(min(self.p0.x, other.p0.x),
234                         min(self.p0.y, other.p0.y),
235                         max(self.p1.x, other.p1.x),
236                         max(self.p1.y, other.p1.y))
238    def round(self, accuracy=0):
239        return Rectangle(round(self.p0.x, accuracy), round(self.p0.y, accuracy),
240                         round(self.p1.x, accuracy), round(self.p1.y, accuracy))
242    def __hash__(self):
243        return hash(self.p0) + hash(self.p1)
245    def __repr__(self) -> str:
246        return f"[{repr(self.p0)},{repr(self.p1)}]"
249class Region:
250    def __init__(self, v0, v1, obj=None):
251        if v0 > v1: v0, v1 = v1, v0
252        self.v0 = v0
253        self.v1 = v1
254        self.objs = [] if obj is None else [obj]
255        self.subregions = []
257    def overlaps(self, o0, o1, atol=0) -> bool:
258        if o0 > o1: o0, o1 = o1, o0
259        # if reg top is lower then o0
260        if (self.v1 + atol) <= o0:
261            return False
262        # if reg bottom is higher than o1
263        if o1 <= (self.v0 - atol):
264            return False
265        return True
267    def contains(self, v, atol=0) -> bool:
268        return self.v0 - atol <= v <= self.v1 + atol
270    @property
271    def delta(self) -> float:
272        return self.v1 - self.v0
274    def __repr__(self):
275        r = f"<{int(self.v0)}->{int(self.v1)}"
276        if self.objs:
277            r += f"|{len(self.objs)}"
278        if self.subregions:
279            r += f"|{repr(self.subregions)}"
280        return r + ">"
