modm_data.pdf.link
Inter-PDF References and External Links
PDF contains two types of links:
Both types can be extracted by calling the modm_data.pdf.page.Page.objlinks
and modm_data.pdf.page.Page.weblinks
properties.
1# Copyright 2022, Niklas Hauser 2# SPDX-License-Identifier: MPL-2.0 3 4""" 5# Inter-PDF References and External Links 6 7PDF contains two types of links: 81. Internal references to other objects by identifier: `ObjLink`. 92. External links to URLs: `WebLink`. 10 11Both types can be extracted by calling the `modm_data.pdf.page.Page.objlinks` 12and `modm_data.pdf.page.Page.weblinks` properties. 13""" 14 15import copy 16import ctypes 17from functools import cached_property 18import pypdfium2 as pp 19from ..utils import Rectangle 20 21 22class ObjLink: 23 """A link to a PDF object giving the bounding box and destination page.""" 24 def __init__(self, page: "modm_data.pdf.Page", link: pp.raw.FPDF_LINK): 25 """ 26 :param page: Page containing the link, used to compute bounding box. 27 :param link: Raw link object. 28 """ 29 self._page = page 30 self._dest = pp.raw.FPDFLink_GetDest(page.pdf, link) 31 32 bbox = pp.raw.FS_RECTF() 33 assert pp.raw.FPDFLink_GetAnnotRect(link, bbox) 34 bbox = Rectangle(bbox) 35 if page.rotation: 36 bbox = Rectangle(bbox.p0.y, page.height - bbox.p1.x, 37 bbox.p1.y, page.height - bbox.p0.x) 38 self.bbox: Rectangle = bbox 39 """Bounding box of the link source""" 40 41 @cached_property 42 def page_index(self) -> int: 43 """0-indexed page number of the link destination.""" 44 return pp.raw.FPDFDest_GetDestPageIndex(self._page.pdf, self._dest) 45 46 def __repr__(self) -> str: 47 return f"Obj({self.page_index})" 48 49 50class WebLink: 51 """A weblink object giving the bounding box and destination URL.""" 52 def __init__(self, page: "modm_data.pdf.Page", index: int): 53 """ 54 :param page: Page containing the link, used to compute bounding box. 55 :param index: 0-index of the weblink object. 56 """ 57 self._page = page 58 self._link = page._linkpage 59 self._index = index 60 61 @cached_property 62 def bbox_count(self) -> int: 63 """The number of bounding boxes associated with this weblink.""" 64 return pp.raw.FPDFLink_CountRects(self._link, self._index) 65 66 @cached_property 67 def bboxes(self) -> list[Rectangle]: 68 """The bounding boxes associated with this weblink.""" 69 bboxes = [] 70 for ii in range(self.bbox_count): 71 x0, y0 = ctypes.c_double(), ctypes.c_double() 72 x1, y1 = ctypes.c_double(), ctypes.c_double() 73 assert pp.raw.FPDFLink_GetRect(self._link, self._index, ii, x0, y1, x1, y0) 74 bboxes.append(Rectangle(x0.value, y0.value, x1.value, y1.value)) 75 if self._page.rotation: 76 bboxes = [Rectangle(bbox.p0.y, self._page.height - bbox.p1.x, 77 bbox.p1.y, self._page.height - bbox.p0.x) 78 for bbox in bboxes] 79 return bboxes 80 81 @cached_property 82 def range(self) -> tuple[int, int]: 83 """Start and end index of the characters associated with this link.""" 84 cstart = ctypes.c_int() 85 ccount = ctypes.c_int() 86 assert pp.raw.FPDFLink_GetTextRange(self._link, self._index, cstart, ccount) 87 return (cstart.value, cstart.value + ccount.value) 88 89 @cached_property 90 def url(self) -> str: 91 """The URL string of this link.""" 92 length = 1000 93 cbuffer = ctypes.c_ushort * length 94 cbuffer = cbuffer() 95 retlen = pp.raw.FPDFLink_GetURL(self._link, self._index, cbuffer, length) 96 assert retlen < length 97 return bytes(cbuffer).decode("utf-16-le").strip("\x00") 98 99 def __repr__(self) -> str: 100 return f"Url({self.url})"
class
ObjLink:
23class ObjLink: 24 """A link to a PDF object giving the bounding box and destination page.""" 25 def __init__(self, page: "modm_data.pdf.Page", link: pp.raw.FPDF_LINK): 26 """ 27 :param page: Page containing the link, used to compute bounding box. 28 :param link: Raw link object. 29 """ 30 self._page = page 31 self._dest = pp.raw.FPDFLink_GetDest(page.pdf, link) 32 33 bbox = pp.raw.FS_RECTF() 34 assert pp.raw.FPDFLink_GetAnnotRect(link, bbox) 35 bbox = Rectangle(bbox) 36 if page.rotation: 37 bbox = Rectangle(bbox.p0.y, page.height - bbox.p1.x, 38 bbox.p1.y, page.height - bbox.p0.x) 39 self.bbox: Rectangle = bbox 40 """Bounding box of the link source""" 41 42 @cached_property 43 def page_index(self) -> int: 44 """0-indexed page number of the link destination.""" 45 return pp.raw.FPDFDest_GetDestPageIndex(self._page.pdf, self._dest) 46 47 def __repr__(self) -> str: 48 return f"Obj({self.page_index})"
A link to a PDF object giving the bounding box and destination page.
ObjLink( page: modm_data.pdf.page.Page, link: pypdfium2_raw.bindings.LP_struct_fpdf_link_t__)
25 def __init__(self, page: "modm_data.pdf.Page", link: pp.raw.FPDF_LINK): 26 """ 27 :param page: Page containing the link, used to compute bounding box. 28 :param link: Raw link object. 29 """ 30 self._page = page 31 self._dest = pp.raw.FPDFLink_GetDest(page.pdf, link) 32 33 bbox = pp.raw.FS_RECTF() 34 assert pp.raw.FPDFLink_GetAnnotRect(link, bbox) 35 bbox = Rectangle(bbox) 36 if page.rotation: 37 bbox = Rectangle(bbox.p0.y, page.height - bbox.p1.x, 38 bbox.p1.y, page.height - bbox.p0.x) 39 self.bbox: Rectangle = bbox 40 """Bounding box of the link source"""
Parameters
- page: Page containing the link, used to compute bounding box.
- link: Raw link object.
class
WebLink:
51class WebLink: 52 """A weblink object giving the bounding box and destination URL.""" 53 def __init__(self, page: "modm_data.pdf.Page", index: int): 54 """ 55 :param page: Page containing the link, used to compute bounding box. 56 :param index: 0-index of the weblink object. 57 """ 58 self._page = page 59 self._link = page._linkpage 60 self._index = index 61 62 @cached_property 63 def bbox_count(self) -> int: 64 """The number of bounding boxes associated with this weblink.""" 65 return pp.raw.FPDFLink_CountRects(self._link, self._index) 66 67 @cached_property 68 def bboxes(self) -> list[Rectangle]: 69 """The bounding boxes associated with this weblink.""" 70 bboxes = [] 71 for ii in range(self.bbox_count): 72 x0, y0 = ctypes.c_double(), ctypes.c_double() 73 x1, y1 = ctypes.c_double(), ctypes.c_double() 74 assert pp.raw.FPDFLink_GetRect(self._link, self._index, ii, x0, y1, x1, y0) 75 bboxes.append(Rectangle(x0.value, y0.value, x1.value, y1.value)) 76 if self._page.rotation: 77 bboxes = [Rectangle(bbox.p0.y, self._page.height - bbox.p1.x, 78 bbox.p1.y, self._page.height - bbox.p0.x) 79 for bbox in bboxes] 80 return bboxes 81 82 @cached_property 83 def range(self) -> tuple[int, int]: 84 """Start and end index of the characters associated with this link.""" 85 cstart = ctypes.c_int() 86 ccount = ctypes.c_int() 87 assert pp.raw.FPDFLink_GetTextRange(self._link, self._index, cstart, ccount) 88 return (cstart.value, cstart.value + ccount.value) 89 90 @cached_property 91 def url(self) -> str: 92 """The URL string of this link.""" 93 length = 1000 94 cbuffer = ctypes.c_ushort * length 95 cbuffer = cbuffer() 96 retlen = pp.raw.FPDFLink_GetURL(self._link, self._index, cbuffer, length) 97 assert retlen < length 98 return bytes(cbuffer).decode("utf-16-le").strip("\x00") 99 100 def __repr__(self) -> str: 101 return f"Url({self.url})"
A weblink object giving the bounding box and destination URL.
WebLink(page: modm_data.pdf.page.Page, index: int)
53 def __init__(self, page: "modm_data.pdf.Page", index: int): 54 """ 55 :param page: Page containing the link, used to compute bounding box. 56 :param index: 0-index of the weblink object. 57 """ 58 self._page = page 59 self._link = page._linkpage 60 self._index = index
Parameters
- page: Page containing the link, used to compute bounding box.
- index: 0-index of the weblink object.
bbox_count: int
62 @cached_property 63 def bbox_count(self) -> int: 64 """The number of bounding boxes associated with this weblink.""" 65 return pp.raw.FPDFLink_CountRects(self._link, self._index)
The number of bounding boxes associated with this weblink.
bboxes: list[modm_data.utils.math.Rectangle]
67 @cached_property 68 def bboxes(self) -> list[Rectangle]: 69 """The bounding boxes associated with this weblink.""" 70 bboxes = [] 71 for ii in range(self.bbox_count): 72 x0, y0 = ctypes.c_double(), ctypes.c_double() 73 x1, y1 = ctypes.c_double(), ctypes.c_double() 74 assert pp.raw.FPDFLink_GetRect(self._link, self._index, ii, x0, y1, x1, y0) 75 bboxes.append(Rectangle(x0.value, y0.value, x1.value, y1.value)) 76 if self._page.rotation: 77 bboxes = [Rectangle(bbox.p0.y, self._page.height - bbox.p1.x, 78 bbox.p1.y, self._page.height - bbox.p0.x) 79 for bbox in bboxes] 80 return bboxes
The bounding boxes associated with this weblink.
range: tuple[int, int]
82 @cached_property 83 def range(self) -> tuple[int, int]: 84 """Start and end index of the characters associated with this link.""" 85 cstart = ctypes.c_int() 86 ccount = ctypes.c_int() 87 assert pp.raw.FPDFLink_GetTextRange(self._link, self._index, cstart, ccount) 88 return (cstart.value, cstart.value + ccount.value)
Start and end index of the characters associated with this link.
url: str
90 @cached_property 91 def url(self) -> str: 92 """The URL string of this link.""" 93 length = 1000 94 cbuffer = ctypes.c_ushort * length 95 cbuffer = cbuffer() 96 retlen = pp.raw.FPDFLink_GetURL(self._link, self._index, cbuffer, length) 97 assert retlen < length 98 return bytes(cbuffer).decode("utf-16-le").strip("\x00")
The URL string of this link.