modm_data.utils.math
1# Copyright 2022, Niklas Hauser 2# SPDX-License-Identifier: MPL-2.0 3 4import math 5from enum import Enum 6from functools import cached_property 7 8import pypdfium2 as pp 9# PDF User Cordinate System is mathematical: x *right*, y *upwards* 10 11 12class Point: 13 def __init__(self, *xy, type: Enum = None): 14 if isinstance(xy[0], tuple): 15 self.x = xy[0][0] 16 self.y = xy[0][1] 17 else: 18 self.x = xy[0] 19 self.y = xy[1] 20 self.type = type 21 22 def isclose(self, other, rtol: float = 1e-09, atol: float = 0.0) -> bool: 23 return (math.isclose(self.x, other.x, rel_tol=rtol, abs_tol=atol) and 24 math.isclose(self.y, other.y, rel_tol=rtol, abs_tol=atol)) 25 26 def distance_squared(self, other) -> float: 27 return math.pow(self.x - other.x, 2) + math.pow(self.y - other.y, 2) 28 29 def distance(self, other) -> float: 30 return math.sqrt(self.distance_squared(other)) 31 32 def __neg__(self): 33 return Point(-self.x, -self.y) 34 35 def __hash__(self): 36 return hash(f"{self.x} {self.y}") 37 38 def __repr__(self) -> str: 39 x = f"{self.x:.1f}" if isinstance(self.x, float) else self.x 40 y = f"{self.y:.1f}" if isinstance(self.y, float) else self.y 41 out = [x, y] if self.type is None else [x, y, self.type.name] 42 return f"({','.join(out)})" 43 44 45class Line: 46 class Direction(Enum): 47 ANGLE = 0 48 VERTICAL = 1 49 HORIZONTAL = 2 50 51 def __init__(self, *r, width: float = None, type: Enum = None): 52 if isinstance(r[0], Rectangle): 53 self.p0 = r[0].p0 54 self.p1 = r[0].p1 55 elif isinstance(r[0], Point): 56 self.p0 = r[0] 57 self.p1 = r[1] 58 elif isinstance(r[0], tuple): 59 self.p0 = Point(r[0][0], r[0][1]) 60 self.p1 = Point(r[1][0], r[1][1]) 61 else: 62 self.p0 = Point(r[0], r[1]) 63 self.p1 = Point(r[2], r[3]) 64 65 self.width = 0.1 if width is None else width 66 self.type = type 67 68 @cached_property 69 def bbox(self): 70 return Rectangle(min(self.p0.x, self.p1.x), 71 min(self.p0.y, self.p1.y), 72 max(self.p0.x, self.p1.x), 73 max(self.p0.y, self.p1.y)) 74 75 def isclose(self, other, rtol: float = 1e-09, atol: float = 0.0) -> bool: 76 return (self.p0.isclose(other.p0, rtol, atol) and 77 self.p1.isclose(other.p1, rtol, atol)) 78 79 def contains(self, point, atol: float = 0.0) -> bool: 80 # if the point lies on the line (A-C---B), the distance A-C + C-B = A-B 81 ac = self.p0.distance_squared(point) 82 cb = self.p1.distance_squared(point) 83 ab = self.p0.distance_squared(self.p1) 84 return (ac + cb + math.pow(atol, 2)) <= ab 85 86 @property 87 def direction(self): 88 if math.isclose(self.p0.x, self.p1.x): 89 return Line.Direction.VERTICAL 90 if math.isclose(self.p0.y, self.p1.y): 91 return Line.Direction.HORIZONTAL 92 return Line.Direction.ANGLE 93 94 def specialize(self): 95 if self.direction == Line.Direction.VERTICAL: 96 return VLine(self.p0.x, self.p0.y, self.p1.y, self.width) 97 if self.direction == Line.Direction.HORIZONTAL: 98 return HLine(self.p0.y, self.p0.x, self.p1.x, self.width) 99 return Line(self.p0, self.p1, width=self.width) 100 101 def __repr__(self) -> str: 102 data = [repr(self.p0), repr(self.p1)] 103 if self.width: data += [f"{self.width:.1f}"] 104 if self.type is not None: data += [self.type.name] 105 return f"<{','.join(data)}>" 106 107 108class VLine(Line): 109 def __init__(self, x: float, y0: float, y1: float, width: float = None): 110 if y0 > y1: y0, y1 = y1, y0 111 super().__init__(Point(x, y0), Point(x, y1), width=width) 112 self.length = y1 - y0 113 114 @property 115 def direction(self): 116 return Line.Direction.VERTICAL 117 118 def __repr__(self) -> str: 119 x = f"{self.p0.x:.1f}" if isinstance(self.p0.x, float) else self.p0.x 120 y0 = f"{self.p0.y:.1f}" if isinstance(self.p0.y, float) else self.p0.y 121 y1 = f"{self.p1.y:.1f}" if isinstance(self.p1.y, float) else self.p1.y 122 out = f"<X{x}:{y0},{y1}" 123 if self.width: out += f"|{self.width:.1f}" 124 return out + ">" 125 126 127class HLine(Line): 128 def __init__(self, y: float, x0: float, x1: float, width: float = None): 129 if x0 > x1: x0, x1 = x1, x0 130 super().__init__(Point(x0, y), Point(x1, y), width=width) 131 self.length = x1 - x0 132 133 @property 134 def direction(self): 135 return Line.Direction.HORIZONTAL 136 137 def __repr__(self) -> str: 138 y = f"{self.p0.y:.1f}" if isinstance(self.p0.y, float) else self.p0.y 139 x0 = f"{self.p0.x:.1f}" if isinstance(self.p0.x, float) else self.p0.x 140 x1 = f"{self.p1.x:.1f}" if isinstance(self.p1.x, float) else self.p1.x 141 out = f"<Y{y}:{x0},{x1}" 142 if self.width: out += f"|{self.width:.1f}" 143 return out + ">" 144 145 146class Rectangle: 147 def __init__(self, *r): 148 # P0 is left, bottom 149 # P1 is right, top 150 if isinstance(r[0], pp.raw.FS_RECTF): 151 self.p0 = Point(r[0].left, r[0].bottom) 152 self.p1 = Point(r[0].right, r[0].top) 153 elif isinstance(r[0], Point): 154 self.p0 = r[0] 155 self.p1 = r[1] 156 elif isinstance(r[0], tuple): 157 self.p0 = Point(r[0][0], r[0][1]) 158 self.p1 = Point(r[1][0], r[1][1]) 159 else: 160 self.p0 = Point(r[0], r[1]) 161 self.p1 = Point(r[2], r[3]) 162 163 # Ensure the correct ordering of point values 164 if self.p0.x > self.p1.x: 165 self.p0.x, self.p1.x = self.p1.x, self.p0.x 166 if self.p0.y > self.p1.y: 167 self.p0.y, self.p1.y = self.p1.y, self.p0.y 168 169 # assert self.p0.x <= self.p1.x 170 # assert self.p0.y <= self.p1.y 171 172 self.x = self.p0.x 173 self.y = self.p0.y 174 self.left = self.p0.x 175 self.bottom = self.p0.y 176 177 self.right = self.p1.x 178 self.top = self.p1.y 179 180 self.width = self.p1.x - self.p0.x 181 self.height = self.p1.y - self.p0.y 182 183 def contains(self, other) -> bool: 184 if isinstance(other, Point): 185 return (self.bottom <= other.y <= self.top and 186 self.left <= other.x <= self.right) 187 # Comparing y-axis first may be faster for "content areas filtering" 188 # when doing subparsing of page content (like in tables) 189 return (self.bottom <= other.bottom and other.top <= self.top and 190 self.left <= other.left and other.right <= self.right) 191 192 def overlaps(self, other) -> bool: 193 return self.contains(other.p0) or self.contains(other.p1) 194 195 def isclose(self, other, rtol: float = 1e-09, atol: float = 0.0) -> bool: 196 return (self.p0.isclose(other.p0, rtol, atol) and 197 self.p1.isclose(other.p1, rtol, atol)) 198 199 @cached_property 200 def midpoint(self) -> Point: 201 return Point((self.p1.x + self.p0.x) / 2, (self.p1.y + self.p0.y) / 2) 202 203 @cached_property 204 def points(self) -> list[Point]: 205 return [self.p0, Point(self.right, self.bottom), 206 self.p1, Point(self.left, self.top)] 207 208 def offset(self, offset): 209 return Rectangle(self.p0.x - offset, self.p0.y - offset, 210 self.p1.x + offset, self.p1.y + offset) 211 212 def offset_x(self, offset): 213 return Rectangle(self.p0.x - offset, self.p0.y, 214 self.p1.x + offset, self.p1.y) 215 216 def offset_y(self, offset): 217 return Rectangle(self.p0.x, self.p0.y - offset, 218 self.p1.x, self.p1.y + offset) 219 220 def translated(self, point): 221 return Rectangle(self.p0.x + point.x, self.p0.y + point.y, 222 self.p1.x + point.x, self.p1.y + point.y) 223 224 def rotated(self, rotation): 225 cos = math.cos(math.radians(rotation)) 226 sin = math.sin(math.radians(rotation)) 227 return Rectangle(self.p0.x * cos - self.p0.y * sin, 228 self.p0.x * sin + self.p0.y * cos, 229 self.p1.x * cos - self.p1.y * sin, 230 self.p1.x * sin + self.p1.y * cos) 231 232 def joined(self, other): 233 return Rectangle(min(self.p0.x, other.p0.x), 234 min(self.p0.y, other.p0.y), 235 max(self.p1.x, other.p1.x), 236 max(self.p1.y, other.p1.y)) 237 238 def round(self, accuracy=0): 239 return Rectangle(round(self.p0.x, accuracy), round(self.p0.y, accuracy), 240 round(self.p1.x, accuracy), round(self.p1.y, accuracy)) 241 242 def __hash__(self): 243 return hash(self.p0) + hash(self.p1) 244 245 def __repr__(self) -> str: 246 return f"[{repr(self.p0)},{repr(self.p1)}]" 247 248 249class Region: 250 def __init__(self, v0, v1, obj=None): 251 if v0 > v1: v0, v1 = v1, v0 252 self.v0 = v0 253 self.v1 = v1 254 self.objs = [] if obj is None else [obj] 255 self.subregions = [] 256 257 def overlaps(self, o0, o1, atol=0) -> bool: 258 if o0 > o1: o0, o1 = o1, o0 259 # if reg top is lower then o0 260 if (self.v1 + atol) <= o0: 261 return False 262 # if reg bottom is higher than o1 263 if o1 <= (self.v0 - atol): 264 return False 265 return True 266 267 def contains(self, v, atol=0) -> bool: 268 return self.v0 - atol <= v <= self.v1 + atol 269 270 @property 271 def delta(self) -> float: 272 return self.v1 - self.v0 273 274 def __repr__(self): 275 r = f"<{int(self.v0)}->{int(self.v1)}" 276 if self.objs: 277 r += f"|{len(self.objs)}" 278 if self.subregions: 279 r += f"|{repr(self.subregions)}" 280 return r + ">"
class
Point:
13class Point: 14 def __init__(self, *xy, type: Enum = None): 15 if isinstance(xy[0], tuple): 16 self.x = xy[0][0] 17 self.y = xy[0][1] 18 else: 19 self.x = xy[0] 20 self.y = xy[1] 21 self.type = type 22 23 def isclose(self, other, rtol: float = 1e-09, atol: float = 0.0) -> bool: 24 return (math.isclose(self.x, other.x, rel_tol=rtol, abs_tol=atol) and 25 math.isclose(self.y, other.y, rel_tol=rtol, abs_tol=atol)) 26 27 def distance_squared(self, other) -> float: 28 return math.pow(self.x - other.x, 2) + math.pow(self.y - other.y, 2) 29 30 def distance(self, other) -> float: 31 return math.sqrt(self.distance_squared(other)) 32 33 def __neg__(self): 34 return Point(-self.x, -self.y) 35 36 def __hash__(self): 37 return hash(f"{self.x} {self.y}") 38 39 def __repr__(self) -> str: 40 x = f"{self.x:.1f}" if isinstance(self.x, float) else self.x 41 y = f"{self.y:.1f}" if isinstance(self.y, float) else self.y 42 out = [x, y] if self.type is None else [x, y, self.type.name] 43 return f"({','.join(out)})"
class
Line:
46class Line: 47 class Direction(Enum): 48 ANGLE = 0 49 VERTICAL = 1 50 HORIZONTAL = 2 51 52 def __init__(self, *r, width: float = None, type: Enum = None): 53 if isinstance(r[0], Rectangle): 54 self.p0 = r[0].p0 55 self.p1 = r[0].p1 56 elif isinstance(r[0], Point): 57 self.p0 = r[0] 58 self.p1 = r[1] 59 elif isinstance(r[0], tuple): 60 self.p0 = Point(r[0][0], r[0][1]) 61 self.p1 = Point(r[1][0], r[1][1]) 62 else: 63 self.p0 = Point(r[0], r[1]) 64 self.p1 = Point(r[2], r[3]) 65 66 self.width = 0.1 if width is None else width 67 self.type = type 68 69 @cached_property 70 def bbox(self): 71 return Rectangle(min(self.p0.x, self.p1.x), 72 min(self.p0.y, self.p1.y), 73 max(self.p0.x, self.p1.x), 74 max(self.p0.y, self.p1.y)) 75 76 def isclose(self, other, rtol: float = 1e-09, atol: float = 0.0) -> bool: 77 return (self.p0.isclose(other.p0, rtol, atol) and 78 self.p1.isclose(other.p1, rtol, atol)) 79 80 def contains(self, point, atol: float = 0.0) -> bool: 81 # if the point lies on the line (A-C---B), the distance A-C + C-B = A-B 82 ac = self.p0.distance_squared(point) 83 cb = self.p1.distance_squared(point) 84 ab = self.p0.distance_squared(self.p1) 85 return (ac + cb + math.pow(atol, 2)) <= ab 86 87 @property 88 def direction(self): 89 if math.isclose(self.p0.x, self.p1.x): 90 return Line.Direction.VERTICAL 91 if math.isclose(self.p0.y, self.p1.y): 92 return Line.Direction.HORIZONTAL 93 return Line.Direction.ANGLE 94 95 def specialize(self): 96 if self.direction == Line.Direction.VERTICAL: 97 return VLine(self.p0.x, self.p0.y, self.p1.y, self.width) 98 if self.direction == Line.Direction.HORIZONTAL: 99 return HLine(self.p0.y, self.p0.x, self.p1.x, self.width) 100 return Line(self.p0, self.p1, width=self.width) 101 102 def __repr__(self) -> str: 103 data = [repr(self.p0), repr(self.p1)] 104 if self.width: data += [f"{self.width:.1f}"] 105 if self.type is not None: data += [self.type.name] 106 return f"<{','.join(data)}>"
Line(*r, width: float = None, type: enum.Enum = None)
52 def __init__(self, *r, width: float = None, type: Enum = None): 53 if isinstance(r[0], Rectangle): 54 self.p0 = r[0].p0 55 self.p1 = r[0].p1 56 elif isinstance(r[0], Point): 57 self.p0 = r[0] 58 self.p1 = r[1] 59 elif isinstance(r[0], tuple): 60 self.p0 = Point(r[0][0], r[0][1]) 61 self.p1 = Point(r[1][0], r[1][1]) 62 else: 63 self.p0 = Point(r[0], r[1]) 64 self.p1 = Point(r[2], r[3]) 65 66 self.width = 0.1 if width is None else width 67 self.type = type
class
Line.Direction(enum.Enum):
Create a collection of name/value pairs.
Example enumeration:
>>> class Color(Enum):
... RED = 1
... BLUE = 2
... GREEN = 3
Access them by:
attribute access:
>>> Color.RED <Color.RED: 1>
value lookup:
>>> Color(1) <Color.RED: 1>
name lookup:
>>> Color['RED'] <Color.RED: 1>
Enumerations can be iterated over, and know how many members they have:
>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]
Methods can be added to enumerations, and members can have their own attributes -- see the documentation for details.
Inherited Members
- enum.Enum
- name
- value
109class VLine(Line): 110 def __init__(self, x: float, y0: float, y1: float, width: float = None): 111 if y0 > y1: y0, y1 = y1, y0 112 super().__init__(Point(x, y0), Point(x, y1), width=width) 113 self.length = y1 - y0 114 115 @property 116 def direction(self): 117 return Line.Direction.VERTICAL 118 119 def __repr__(self) -> str: 120 x = f"{self.p0.x:.1f}" if isinstance(self.p0.x, float) else self.p0.x 121 y0 = f"{self.p0.y:.1f}" if isinstance(self.p0.y, float) else self.p0.y 122 y1 = f"{self.p1.y:.1f}" if isinstance(self.p1.y, float) else self.p1.y 123 out = f"<X{x}:{y0},{y1}" 124 if self.width: out += f"|{self.width:.1f}" 125 return out + ">"
128class HLine(Line): 129 def __init__(self, y: float, x0: float, x1: float, width: float = None): 130 if x0 > x1: x0, x1 = x1, x0 131 super().__init__(Point(x0, y), Point(x1, y), width=width) 132 self.length = x1 - x0 133 134 @property 135 def direction(self): 136 return Line.Direction.HORIZONTAL 137 138 def __repr__(self) -> str: 139 y = f"{self.p0.y:.1f}" if isinstance(self.p0.y, float) else self.p0.y 140 x0 = f"{self.p0.x:.1f}" if isinstance(self.p0.x, float) else self.p0.x 141 x1 = f"{self.p1.x:.1f}" if isinstance(self.p1.x, float) else self.p1.x 142 out = f"<Y{y}:{x0},{x1}" 143 if self.width: out += f"|{self.width:.1f}" 144 return out + ">"
class
Rectangle:
147class Rectangle: 148 def __init__(self, *r): 149 # P0 is left, bottom 150 # P1 is right, top 151 if isinstance(r[0], pp.raw.FS_RECTF): 152 self.p0 = Point(r[0].left, r[0].bottom) 153 self.p1 = Point(r[0].right, r[0].top) 154 elif isinstance(r[0], Point): 155 self.p0 = r[0] 156 self.p1 = r[1] 157 elif isinstance(r[0], tuple): 158 self.p0 = Point(r[0][0], r[0][1]) 159 self.p1 = Point(r[1][0], r[1][1]) 160 else: 161 self.p0 = Point(r[0], r[1]) 162 self.p1 = Point(r[2], r[3]) 163 164 # Ensure the correct ordering of point values 165 if self.p0.x > self.p1.x: 166 self.p0.x, self.p1.x = self.p1.x, self.p0.x 167 if self.p0.y > self.p1.y: 168 self.p0.y, self.p1.y = self.p1.y, self.p0.y 169 170 # assert self.p0.x <= self.p1.x 171 # assert self.p0.y <= self.p1.y 172 173 self.x = self.p0.x 174 self.y = self.p0.y 175 self.left = self.p0.x 176 self.bottom = self.p0.y 177 178 self.right = self.p1.x 179 self.top = self.p1.y 180 181 self.width = self.p1.x - self.p0.x 182 self.height = self.p1.y - self.p0.y 183 184 def contains(self, other) -> bool: 185 if isinstance(other, Point): 186 return (self.bottom <= other.y <= self.top and 187 self.left <= other.x <= self.right) 188 # Comparing y-axis first may be faster for "content areas filtering" 189 # when doing subparsing of page content (like in tables) 190 return (self.bottom <= other.bottom and other.top <= self.top and 191 self.left <= other.left and other.right <= self.right) 192 193 def overlaps(self, other) -> bool: 194 return self.contains(other.p0) or self.contains(other.p1) 195 196 def isclose(self, other, rtol: float = 1e-09, atol: float = 0.0) -> bool: 197 return (self.p0.isclose(other.p0, rtol, atol) and 198 self.p1.isclose(other.p1, rtol, atol)) 199 200 @cached_property 201 def midpoint(self) -> Point: 202 return Point((self.p1.x + self.p0.x) / 2, (self.p1.y + self.p0.y) / 2) 203 204 @cached_property 205 def points(self) -> list[Point]: 206 return [self.p0, Point(self.right, self.bottom), 207 self.p1, Point(self.left, self.top)] 208 209 def offset(self, offset): 210 return Rectangle(self.p0.x - offset, self.p0.y - offset, 211 self.p1.x + offset, self.p1.y + offset) 212 213 def offset_x(self, offset): 214 return Rectangle(self.p0.x - offset, self.p0.y, 215 self.p1.x + offset, self.p1.y) 216 217 def offset_y(self, offset): 218 return Rectangle(self.p0.x, self.p0.y - offset, 219 self.p1.x, self.p1.y + offset) 220 221 def translated(self, point): 222 return Rectangle(self.p0.x + point.x, self.p0.y + point.y, 223 self.p1.x + point.x, self.p1.y + point.y) 224 225 def rotated(self, rotation): 226 cos = math.cos(math.radians(rotation)) 227 sin = math.sin(math.radians(rotation)) 228 return Rectangle(self.p0.x * cos - self.p0.y * sin, 229 self.p0.x * sin + self.p0.y * cos, 230 self.p1.x * cos - self.p1.y * sin, 231 self.p1.x * sin + self.p1.y * cos) 232 233 def joined(self, other): 234 return Rectangle(min(self.p0.x, other.p0.x), 235 min(self.p0.y, other.p0.y), 236 max(self.p1.x, other.p1.x), 237 max(self.p1.y, other.p1.y)) 238 239 def round(self, accuracy=0): 240 return Rectangle(round(self.p0.x, accuracy), round(self.p0.y, accuracy), 241 round(self.p1.x, accuracy), round(self.p1.y, accuracy)) 242 243 def __hash__(self): 244 return hash(self.p0) + hash(self.p1) 245 246 def __repr__(self) -> str: 247 return f"[{repr(self.p0)},{repr(self.p1)}]"
Rectangle(*r)
148 def __init__(self, *r): 149 # P0 is left, bottom 150 # P1 is right, top 151 if isinstance(r[0], pp.raw.FS_RECTF): 152 self.p0 = Point(r[0].left, r[0].bottom) 153 self.p1 = Point(r[0].right, r[0].top) 154 elif isinstance(r[0], Point): 155 self.p0 = r[0] 156 self.p1 = r[1] 157 elif isinstance(r[0], tuple): 158 self.p0 = Point(r[0][0], r[0][1]) 159 self.p1 = Point(r[1][0], r[1][1]) 160 else: 161 self.p0 = Point(r[0], r[1]) 162 self.p1 = Point(r[2], r[3]) 163 164 # Ensure the correct ordering of point values 165 if self.p0.x > self.p1.x: 166 self.p0.x, self.p1.x = self.p1.x, self.p0.x 167 if self.p0.y > self.p1.y: 168 self.p0.y, self.p1.y = self.p1.y, self.p0.y 169 170 # assert self.p0.x <= self.p1.x 171 # assert self.p0.y <= self.p1.y 172 173 self.x = self.p0.x 174 self.y = self.p0.y 175 self.left = self.p0.x 176 self.bottom = self.p0.y 177 178 self.right = self.p1.x 179 self.top = self.p1.y 180 181 self.width = self.p1.x - self.p0.x 182 self.height = self.p1.y - self.p0.y
def
contains(self, other) -> bool:
184 def contains(self, other) -> bool: 185 if isinstance(other, Point): 186 return (self.bottom <= other.y <= self.top and 187 self.left <= other.x <= self.right) 188 # Comparing y-axis first may be faster for "content areas filtering" 189 # when doing subparsing of page content (like in tables) 190 return (self.bottom <= other.bottom and other.top <= self.top and 191 self.left <= other.left and other.right <= self.right)
midpoint: Point
points: list[Point]
class
Region:
250class Region: 251 def __init__(self, v0, v1, obj=None): 252 if v0 > v1: v0, v1 = v1, v0 253 self.v0 = v0 254 self.v1 = v1 255 self.objs = [] if obj is None else [obj] 256 self.subregions = [] 257 258 def overlaps(self, o0, o1, atol=0) -> bool: 259 if o0 > o1: o0, o1 = o1, o0 260 # if reg top is lower then o0 261 if (self.v1 + atol) <= o0: 262 return False 263 # if reg bottom is higher than o1 264 if o1 <= (self.v0 - atol): 265 return False 266 return True 267 268 def contains(self, v, atol=0) -> bool: 269 return self.v0 - atol <= v <= self.v1 + atol 270 271 @property 272 def delta(self) -> float: 273 return self.v1 - self.v0 274 275 def __repr__(self): 276 r = f"<{int(self.v0)}->{int(self.v1)}" 277 if self.objs: 278 r += f"|{len(self.objs)}" 279 if self.subregions: 280 r += f"|{repr(self.subregions)}" 281 return r + ">"