From 4bd54bd32d525ec41e23960146fbb3b0418a4715 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Sun, 28 Jul 2024 17:16:57 +0200 Subject: [PATCH 01/12] DEV: Test against Python 3.13 (#2776) * DEV: Test against Python 3.13 * fix typo * add missing setup-python * fix another typo * update Pillow version * attempt to update coverage package * update number of expected coverage files --- .github/workflows/github-ci.yaml | 10 +++++----- requirements/ci-3.11.txt | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index 820ccdcaa..1eb3d9bd0 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -57,7 +57,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev"] use-crypto-lib: ["cryptography"] include: - python-version: "3.7" @@ -90,7 +90,7 @@ jobs: cache-dependency-path: '**/requirements/ci.txt' - name: Setup Python (3.11+) uses: actions/setup-python@v5 - if: matrix.python-version == '3.11' || matrix.python-version == '3.12' + if: matrix.python-version == '3.11' || matrix.python-version == '3.12' || matrix.python-version == '3.13-dev' with: python-version: ${{ matrix.python-version }} allow-prereleases: true @@ -106,7 +106,7 @@ jobs: - name: Install requirements (Python 3.11+) run: | pip install -r requirements/ci-3.11.txt - if: matrix.python-version == '3.11' || matrix.python-version == '3.12' + if: matrix.python-version == '3.11' || matrix.python-version == '3.12' || matrix.python-version == '3.13-dev' - name: Remove pycryptodome and cryptography run: | pip uninstall pycryptodome cryptography -y @@ -215,8 +215,8 @@ jobs: - name: Check Number of Downloaded Files run: | downloaded_files_count=$(find \.coverage* -type f | wc -l) - if [ $downloaded_files_count -eq 8 ]; then - echo "The expected number of files (8) were downloaded." + if [ $downloaded_files_count -eq 9 ]; then + echo "The expected number of files (9) were downloaded." else echo "ERROR: Expected 8 files, but found $downloaded_files_count files." exit 1 diff --git a/requirements/ci-3.11.txt b/requirements/ci-3.11.txt index f382fe2b9..210177118 100644 --- a/requirements/ci-3.11.txt +++ b/requirements/ci-3.11.txt @@ -6,7 +6,7 @@ # attrs==23.1.0 # via flake8-bugbear -coverage[toml]==7.3.0 +coverage[toml]==7.6.0 # via # -r requirements/ci.in # pytest-cov @@ -35,7 +35,7 @@ mypy-extensions==1.0.0 # via mypy packaging==23.1 # via pytest -pillow==10.0.1 +pillow==10.4.0 # via # -r requirements/ci.in # fpdf2 From d4df20d14cb6a2839c1ab141b51e70652fb3d1f1 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 31 Jul 2024 10:46:08 +0100 Subject: [PATCH 02/12] STY: Remove boolean value comparison (#2779) PEP 8 recommendation. --- pypdf/annotations/_markup_annotations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pypdf/annotations/_markup_annotations.py b/pypdf/annotations/_markup_annotations.py index 4db8dfdbf..98a222483 100644 --- a/pypdf/annotations/_markup_annotations.py +++ b/pypdf/annotations/_markup_annotations.py @@ -104,9 +104,9 @@ def __init__( self[NameObject("/Rect")] = RectangleObject(rect) font_str = "font: " - if bold is True: + if bold: font_str = f"{font_str}bold " - if italic is True: + if italic: font_str = f"{font_str}italic " font_str = f"{font_str}{font} {font_size}" font_str = f"{font_str};text-align:left;color:#{font_color}" From 3ad9234c2ec08e7cd6a8b2ec962386eda394d76d Mon Sep 17 00:00:00 2001 From: "William G. Gagnon" Date: Fri, 2 Aug 2024 11:21:53 -0400 Subject: [PATCH 03/12] ROB: Handle images with empty data when processing an image from bytes (#2786) Closes #2783. --- CONTRIBUTORS.md | 1 + pypdf/_xobj_image_helpers.py | 9 ++++++--- pypdf/errors.py | 4 ++++ tests/test_xobject_image_helpers.py | 13 +++++++++++-- 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 84f0b6ee4..89fec3b14 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -19,6 +19,7 @@ history and [GitHub's 'Contributors' feature](https://github.com/py-pdf/pypdf/gr * [ediamondscience](https://github.com/ediamondscience) * [Ermeson, Felipe](https://github.com/FelipeErmeson) * [Freitag, François](https://github.com/francoisfreitag) +* [Gagnon, William G.](https://github.com/williamgagnon) * [Górny, Michał](https://github.com/mgorny) * [Grillo, Miguel](https://github.com/Ineffable22) * [Gutteridge, David H.](https://github.com/dhgutteridge) diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index 45b0c145b..5ae8894fa 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -6,7 +6,7 @@ from ._utils import check_if_whitespace_only, logger_warning from .constants import ColorSpaces -from .errors import PdfReadError +from .errors import EmptyImageDataError, PdfReadError from .generic import ( ArrayObject, DecodedStreamObject, @@ -148,9 +148,12 @@ def _extended_image_frombytes( img = Image.frombytes(mode, size, data) except ValueError as exc: nb_pix = size[0] * size[1] - if len(data) % nb_pix != 0: + data_length = len(data) + if data_length == 0: + raise EmptyImageDataError("Data is 0 bytes, cannot process an image from empty data.") from exc + if data_length % nb_pix != 0: raise exc - k = nb_pix * len(mode) / len(data) + k = nb_pix * len(mode) / data_length data = b"".join([bytes((x,) * int(k)) for x in data]) img = Image.frombytes(mode, size, data) return img diff --git a/pypdf/errors.py b/pypdf/errors.py index c962dec66..ad197ffc1 100644 --- a/pypdf/errors.py +++ b/pypdf/errors.py @@ -59,4 +59,8 @@ class EmptyFileError(PdfReadError): """Raised when a PDF file is empty or has no content.""" +class EmptyImageDataError(PyPdfError): + """Raised when trying to process an image that has no data.""" + + STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly" diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py index 63ecebd9b..39b7131fc 100644 --- a/tests/test_xobject_image_helpers.py +++ b/tests/test_xobject_image_helpers.py @@ -4,8 +4,8 @@ import pytest from pypdf import PdfReader -from pypdf._xobj_image_helpers import _handle_flate -from pypdf.errors import PdfReadError +from pypdf._xobj_image_helpers import _extended_image_frombytes, _handle_flate +from pypdf.errors import EmptyImageDataError, PdfReadError from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject from . import get_data_from_url @@ -113,3 +113,12 @@ def test_handle_flate__image_mode_1(): colors=2, obj_as_text="dummy", ) + + +def test_extended_image_frombytes_zero_data(): + mode = "RGB" + size = (1, 1) + data = b"" + + with pytest.raises(EmptyImageDataError, match="Data is 0 bytes, cannot process an image from empty data."): + _extended_image_frombytes(mode, size, data) From 582557e09a7e658fdcb19f26eb069d87875489f0 Mon Sep 17 00:00:00 2001 From: Diogo Teles Sant'Anna Date: Fri, 2 Aug 2024 15:49:29 -0300 Subject: [PATCH 04/12] SEC: Fix GitHub workflow vulnerable to script injection (#2787) Signed-off-by: Diogo Teles Sant'Anna --- .github/workflows/release.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 9f782ec08..b1a4fb27f 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -12,6 +12,9 @@ on: permissions: contents: write +env: + HEAD_COMMIT_MESSAGE: ${{ github.event.head_commit.message }} + jobs: build_and_publish: name: Publish a new version @@ -24,7 +27,7 @@ jobs: - name: Extract version from commit message id: extract_version run: | - VERSION=$(echo "${{ github.event.head_commit.message }}" | grep -oP '(?<=REL: )\d+\.\d+\.\d+') + VERSION=$(echo "$HEAD_COMMIT_MESSAGE" | grep -oP '(?<=REL: )\d+\.\d+\.\d+') echo "version=$VERSION" >> $GITHUB_OUTPUT - name: Extract tag message from commit message @@ -32,7 +35,7 @@ jobs: run: | VERSION="${{ steps.extract_version.outputs.version }}" delimiter="$(openssl rand -hex 8)" - MESSAGE=$(echo "${{ github.event.head_commit.message }}" | sed "0,/REL: $VERSION/s///" ) + MESSAGE=$(echo "$HEAD_COMMIT_MESSAGE" | sed "0,/REL: $VERSION/s///" ) echo "message<<${delimiter}" >> $GITHUB_OUTPUT echo "$MESSAGE" >> $GITHUB_OUTPUT echo "${delimiter}" >> $GITHUB_OUTPUT From 38f3925502c2971ad587fb616500b6f8b6333d03 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Mon, 5 Aug 2024 09:10:47 +0100 Subject: [PATCH 05/12] MAINT: Remove unused paeth_predictor (#2773) --- pypdf/_utils.py | 14 -------------- tests/test_utils.py | 18 ------------------ 2 files changed, 32 deletions(-) diff --git a/pypdf/_utils.py b/pypdf/_utils.py index 38c0d67d7..6569707b6 100644 --- a/pypdf/_utils.py +++ b/pypdf/_utils.py @@ -390,20 +390,6 @@ def ord_(b: Union[int, str, bytes]) -> Union[int, bytes]: WHITESPACES_AS_REGEXP = b"[" + WHITESPACES_AS_BYTES + b"]" -def paeth_predictor(left: int, up: int, up_left: int) -> int: - p = left + up - up_left - dist_left = abs(p - left) - dist_up = abs(p - up) - dist_up_left = abs(p - up_left) - - if dist_left <= dist_up and dist_left <= dist_up_left: - return left - elif dist_up <= dist_up_left: - return up - else: - return up_left - - def deprecate(msg: str, stacklevel: int = 3) -> None: warnings.warn(msg, DeprecationWarning, stacklevel=stacklevel) diff --git a/tests/test_utils.py b/tests/test_utils.py index 81fcf9fb4..856bedd86 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -132,24 +132,6 @@ def test_deprecate_no_replacement(): assert warn[0].message.args[0] == error_msg -@pytest.mark.parametrize( - ("left", "up", "upleft", "expected"), - [ - (0, 0, 0, 0), - (1, 0, 0, 1), - (0, 1, 0, 1), - (0, 0, 1, 0), - (1, 2, 3, 1), - (2, 1, 3, 1), - (1, 3, 2, 2), - (3, 1, 2, 2), - (3, 2, 1, 3), - ], -) -def test_paeth_predictor(left, up, upleft, expected): - assert pypdf._utils.paeth_predictor(left, up, upleft) == expected - - @pytest.mark.parametrize( ("dat", "pos", "to_read", "expected", "expected_pos"), [ From 09f9b7ed52193bfd9e98bdd018ccaf7cbe821687 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Mon, 5 Aug 2024 16:49:29 +0100 Subject: [PATCH 06/12] MAINT: Remove unused AnnotationFlag --- pypdf/annotations/_non_markup_annotations.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pypdf/annotations/_non_markup_annotations.py b/pypdf/annotations/_non_markup_annotations.py index dcdb3b0ff..6272cceee 100644 --- a/pypdf/annotations/_non_markup_annotations.py +++ b/pypdf/annotations/_non_markup_annotations.py @@ -1,6 +1,5 @@ from typing import TYPE_CHECKING, Any, Optional, Tuple, Union -from ..constants import AnnotationFlag from ..generic._base import ( BooleanObject, NameObject, @@ -12,8 +11,6 @@ from ..generic._rectangle import RectangleObject from ._base import AnnotationDictionary -DEFAULT_ANNOTATION_FLAG = AnnotationFlag(0) - class Link(AnnotationDictionary): def __init__( From b2d72043ab5221b58138c7d06c181b8cbc88ea8e Mon Sep 17 00:00:00 2001 From: owurman Date: Mon, 5 Aug 2024 12:14:18 -0700 Subject: [PATCH 07/12] BUG: Handle Sequence as an IndirectObject when extracting text with layout mode (#2788) * Handle Sequence as an IndirectObject The spec allows an int or float to be an IndirectObject as well, but this commit does not address that theoretical possibility. * Update pypdf/_text_extraction/_layout_mode/_font.py Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com> * Address PR comments -Rename w_1 to w_next_entry -Utilize ParseError instead of PdfReadError -Write a test (both positive and negative) * Handle unlikely case of IndirectObjects for float/int width elements Also adds a comment to clarify that we don't explicitly handle the IndexError exception. Rather, we let it be raised as an IndexError. * Yoda condition I removed * Last commit was a bad patch, confused by non-committed changes * Use test files from URL rather than resources * Update tests/test_text_extraction.py Co-authored-by: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> * Fix code style warnings in range() call --------- Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com> Co-authored-by: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> --- pypdf/_text_extraction/_layout_mode/_font.py | 26 +++++++++++++------- tests/test_text_extraction.py | 17 +++++++++++++ 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/pypdf/_text_extraction/_layout_mode/_font.py b/pypdf/_text_extraction/_layout_mode/_font.py index a912fddb2..40655b1b2 100644 --- a/pypdf/_text_extraction/_layout_mode/_font.py +++ b/pypdf/_text_extraction/_layout_mode/_font.py @@ -1,8 +1,9 @@ """Font constants and classes for "layout" mode text operations""" from dataclasses import dataclass, field -from typing import Any, Dict, Sequence, Union +from typing import Any, Dict, Sequence, Union, cast +from ...errors import ParseError from ...generic import IndirectObject from ._font_widths import STANDARD_WIDTHS @@ -58,6 +59,7 @@ def __post_init__(self) -> None: skip_count = 0 _w = d_font.get("/W", []) for idx, w_entry in enumerate(_w): + w_entry = w_entry.get_object() if skip_count: skip_count -= 1 continue @@ -66,13 +68,14 @@ def __post_init__(self) -> None: # warning and or use reader's "strict" to force an ex??? continue # check for format (1): `int [int int int int ...]` - if isinstance(_w[idx + 1], Sequence): - start_idx, width_list = _w[idx : idx + 2] + w_next_entry = _w[idx + 1].get_object() + if isinstance(w_next_entry, Sequence): + start_idx, width_list = w_entry, w_next_entry self.width_map.update( { ord_map[_cidx]: _width for _cidx, _width in zip( - range(start_idx, start_idx + len(width_list), 1), + range(cast(int, start_idx), cast(int, start_idx) + len(width_list), 1), width_list, ) if _cidx in ord_map @@ -80,18 +83,23 @@ def __post_init__(self) -> None: ) skip_count = 1 # check for format (2): `int int int` - if not isinstance(_w[idx + 1], Sequence) and not isinstance( - _w[idx + 2], Sequence - ): - start_idx, stop_idx, const_width = _w[idx : idx + 3] + elif isinstance(w_next_entry, (int, float)) and isinstance(_w[idx + 2].get_object(), (int, float)): + start_idx, stop_idx, const_width = w_entry, w_next_entry, _w[idx + 2].get_object() self.width_map.update( { ord_map[_cidx]: const_width - for _cidx in range(start_idx, stop_idx + 1, 1) + for _cidx in range(cast(int, start_idx), cast(int, stop_idx + 1), 1) if _cidx in ord_map } ) skip_count = 2 + else: + # Note: this doesn't handle the case of out of bounds (reaching the end of the width definitions + # while expecting more elements). This raises an IndexError which is sufficient. + raise ParseError( + f"Invalid font width definition. Next elements: {w_entry}, {w_next_entry}, {_w[idx + 2]}" + ) # pragma: no cover + if not self.width_map and "/BaseFont" in self.font_dictionary: for key in STANDARD_WIDTHS: if self.font_dictionary["/BaseFont"].startswith(f"/{key}"): diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py index 1ffa68a3e..dcd4e6cae 100644 --- a/tests/test_text_extraction.py +++ b/tests/test_text_extraction.py @@ -10,6 +10,7 @@ from pypdf import PdfReader, mult from pypdf._text_extraction import set_custom_rtl +from pypdf.errors import ParseError from . import get_data_from_url @@ -156,3 +157,19 @@ def test_layout_mode_type0_font_widths(): encoding="utf-8" ) assert expected == reader.pages[0].extract_text(extraction_mode="layout") + + +@pytest.mark.enable_socket() +def test_layout_mode_indirect_sequence_font_widths(): + # Cover the situation where the sequence for font widths is an IndirectObject + # ref https://github.com/py-pdf/pypdf/pull/2788 + url = "https://github.com/user-attachments/files/16491621/2788_example.pdf" + name ="2788_example.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + assert reader.pages[0].extract_text(extraction_mode="layout") == "" + url = "https://github.com/user-attachments/files/16491619/2788_example_malformed.pdf" + name = "2788_example_malformed.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + with pytest.raises(ParseError) as exc: + reader.pages[0].extract_text(extraction_mode="layout") + assert str(exc.value).startswith("Invalid font width definition") From 5abd590740a2718fc69b8477c656ce5515a0ab33 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 7 Aug 2024 12:14:17 +0100 Subject: [PATCH 08/12] STY: Refactor b_ (#2772) --- pypdf/_utils.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pypdf/_utils.py b/pypdf/_utils.py index 6569707b6..5fecb38e7 100644 --- a/pypdf/_utils.py +++ b/pypdf/_utils.py @@ -347,14 +347,11 @@ def b_(s: Union[str, bytes]) -> bytes: return bc[s] try: r = s.encode("latin-1") - if len(s) < 2: - bc[s] = r - return r - except Exception: + except UnicodeEncodeError: r = s.encode("utf-8") - if len(s) < 2: - bc[s] = r - return r + if len(s) < 2: + bc[s] = r + return r def str_(b: Any) -> str: From f0453b06b58463b9c25421189f0675cbc53e285e Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Fri, 9 Aug 2024 10:15:21 +0200 Subject: [PATCH 09/12] MAINT: remove support to Python 3.7 --- .github/workflows/github-ci.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index 1eb3d9bd0..809d161b9 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -57,12 +57,12 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev"] use-crypto-lib: ["cryptography"] include: - - python-version: "3.7" + - python-version: "3.8" use-crypto-lib: "pycryptodome" - - python-version: "3.7" + - python-version: "3.8" use-crypto-lib: "none" steps: - name: Update APT packages @@ -83,7 +83,7 @@ jobs: key: cache-downloaded-files - name: Setup Python uses: actions/setup-python@v5 - if: matrix.python-version == '3.7' || matrix.python-version == '3.8' || matrix.python-version == '3.9' || matrix.python-version == '3.10' + if: matrix.python-version == '3.8' || matrix.python-version == '3.9' || matrix.python-version == '3.10' with: python-version: ${{ matrix.python-version }} cache: 'pip' @@ -102,7 +102,7 @@ jobs: - name: Install requirements (Python 3) run: | pip install -r requirements/ci.txt - if: matrix.python-version == '3.7' || matrix.python-version == '3.8' || matrix.python-version == '3.9' || matrix.python-version == '3.10' + if: matrix.python-version == '3.8' || matrix.python-version == '3.9' || matrix.python-version == '3.10' - name: Install requirements (Python 3.11+) run: | pip install -r requirements/ci-3.11.txt From cfbaee90543ef6633793b1669c9b6dd780324f74 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Fri, 9 Aug 2024 10:28:57 +0200 Subject: [PATCH 10/12] fix coverage workflow --- .github/workflows/github-ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index 809d161b9..d5d9bb4d4 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -215,8 +215,8 @@ jobs: - name: Check Number of Downloaded Files run: | downloaded_files_count=$(find \.coverage* -type f | wc -l) - if [ $downloaded_files_count -eq 9 ]; then - echo "The expected number of files (9) were downloaded." + if [ $downloaded_files_count -eq 8 ]; then + echo "The expected number of files (8) were downloaded." else echo "ERROR: Expected 8 files, but found $downloaded_files_count files." exit 1 From 6f91492f846d8dd1885225abf5da47213137bac1 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Fri, 9 Aug 2024 10:42:56 +0200 Subject: [PATCH 11/12] fix coverage --- pypdf/_page.py | 5 +---- pypdf/_protocols.py | 6 +----- pypdf/_text_extraction/_layout_mode/_fixed_width_page.py | 5 +---- pypdf/_xobj_image_helpers.py | 7 +------ pypdf/types.py | 6 +----- 5 files changed, 5 insertions(+), 24 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 63038d9d0..0ef09c9bd 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -85,10 +85,7 @@ StreamObject, ) -if sys.version_info >= (3, 8): - from typing import Literal -else: - from typing_extensions import Literal +from typing import Literal MERGE_CROP_BOX = "cropbox" # pypdf<=3.4.0 used 'trimbox' diff --git a/pypdf/_protocols.py b/pypdf/_protocols.py index 9f413660b..39e5c1319 100644 --- a/pypdf/_protocols.py +++ b/pypdf/_protocols.py @@ -4,11 +4,7 @@ from pathlib import Path from typing import IO, Any, Dict, List, Optional, Tuple, Union -try: - # Python 3.8+: https://peps.python.org/pep-0586 - from typing import Protocol -except ImportError: - from typing_extensions import Protocol # type: ignore[assignment] +from typing import Protocol from ._utils import StrByteType, StreamType diff --git a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py index 1be500959..2d7f42ed3 100644 --- a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py +++ b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py @@ -12,10 +12,7 @@ from ._text_state_manager import TextStateManager from ._text_state_params import TextStateParams -if sys.version_info >= (3, 8): - from typing import Literal, TypedDict -else: - from typing_extensions import Literal, TypedDict +from typing import Literal, TypedDict class BTGroup(TypedDict): diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index 5ae8894fa..0ba873241 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -15,12 +15,7 @@ NullObject, ) -if sys.version_info[:2] >= (3, 8): - from typing import Literal -else: - # PEP 586 introduced typing.Literal with Python 3.8 - # For older Python versions, the backport typing_extensions is necessary: - from typing_extensions import Literal +from typing import Literal if sys.version_info[:2] >= (3, 10): from typing import TypeAlias diff --git a/pypdf/types.py b/pypdf/types.py index b8fbab92c..583b6ba5a 100644 --- a/pypdf/types.py +++ b/pypdf/types.py @@ -3,11 +3,7 @@ import sys from typing import List, Union -if sys.version_info[:2] >= (3, 8): - # Python 3.8+: https://peps.python.org/pep-0586 - from typing import Literal -else: - from typing_extensions import Literal +from typing import Literal if sys.version_info[:2] >= (3, 10): # Python 3.10+: https://www.python.org/dev/peps/pep-0484 From f8194af2d5afdf30ab99fe7727f351ab4951c1c8 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Fri, 9 Aug 2024 11:21:18 +0200 Subject: [PATCH 12/12] fix style --- pypdf/_page.py | 5 +---- pypdf/_protocols.py | 4 +--- pypdf/_text_extraction/_layout_mode/_fixed_width_page.py | 5 +---- pypdf/_xobj_image_helpers.py | 8 ++++---- pypdf/types.py | 4 +--- 5 files changed, 8 insertions(+), 18 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 0ef09c9bd..ee1dc7f60 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -28,7 +28,6 @@ # POSSIBILITY OF SUCH DAMAGE. import math -import sys from decimal import Decimal from pathlib import Path from typing import ( @@ -38,6 +37,7 @@ Iterable, Iterator, List, + Literal, Optional, Sequence, Set, @@ -85,9 +85,6 @@ StreamObject, ) -from typing import Literal - - MERGE_CROP_BOX = "cropbox" # pypdf<=3.4.0 used 'trimbox' diff --git a/pypdf/_protocols.py b/pypdf/_protocols.py index 39e5c1319..b5fa14879 100644 --- a/pypdf/_protocols.py +++ b/pypdf/_protocols.py @@ -2,9 +2,7 @@ from abc import abstractmethod from pathlib import Path -from typing import IO, Any, Dict, List, Optional, Tuple, Union - -from typing import Protocol +from typing import IO, Any, Dict, List, Optional, Protocol, Tuple, Union from ._utils import StrByteType, StreamType diff --git a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py index 2d7f42ed3..e7af1b234 100644 --- a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py +++ b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py @@ -1,10 +1,9 @@ """Extract PDF text preserving the layout of the source PDF""" -import sys from itertools import groupby from math import ceil from pathlib import Path -from typing import Any, Dict, Iterator, List, Optional, Tuple +from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, TypedDict from ..._utils import logger_warning from .. import LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS @@ -12,8 +11,6 @@ from ._text_state_manager import TextStateManager from ._text_state_params import TextStateParams -from typing import Literal, TypedDict - class BTGroup(TypedDict): """ diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index 0ba873241..7a3f40d95 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -2,7 +2,7 @@ import sys from io import BytesIO -from typing import Any, List, Tuple, Union, cast +from typing import Any, List, Literal, Tuple, Union, cast from ._utils import check_if_whitespace_only, logger_warning from .constants import ColorSpaces @@ -15,8 +15,6 @@ NullObject, ) -from typing import Literal - if sys.version_info[:2] >= (3, 10): from typing import TypeAlias else: @@ -145,7 +143,9 @@ def _extended_image_frombytes( nb_pix = size[0] * size[1] data_length = len(data) if data_length == 0: - raise EmptyImageDataError("Data is 0 bytes, cannot process an image from empty data.") from exc + raise EmptyImageDataError( + "Data is 0 bytes, cannot process an image from empty data." + ) from exc if data_length % nb_pix != 0: raise exc k = nb_pix * len(mode) / data_length diff --git a/pypdf/types.py b/pypdf/types.py index 583b6ba5a..e383dc7b1 100644 --- a/pypdf/types.py +++ b/pypdf/types.py @@ -1,9 +1,7 @@ """Helpers for working with PDF types.""" import sys -from typing import List, Union - -from typing import Literal +from typing import List, Literal, Union if sys.version_info[:2] >= (3, 10): # Python 3.10+: https://www.python.org/dev/peps/pep-0484