ValueError: not enough values to unpack (expected 3, got 2) is thrown when call insert_pdf
Description of the bug
Want to split a pdf file into multiple pdf by 5 pages per file. When insert_pdf is called, a ValueError is thrown.
example.pdf
How to reproduce the bug
from io import BytesIO
from pathlib import Path
import fitz
def _clean_up(file_path):
doc = fitz.open(file_path)
tmp = BytesIO()
tmp.write(doc.write(garbage=4, deflate=True))
doc = fitz.Document('pdf', tmp.getvalue())
tmp.close()
return doc
def split_pdf_by_page(file_path, pages_per_split, result_dir, num_splits=None, paging_prefix=""):
# Not split
if not pages_per_split:
return [shutil.copy(file_path, result_dir/f"{paging_prefix}_0.pdf")]
# Clean pdf
source_doc = _clean_up(file_path)
# Calculate the number of pages per split file and the number of split files
page_range = pages_per_split - 1
split_range = range(0, source_doc.page_count, pages_per_split)
num_splits = num_splits or len(split_range)
# Create a list to hold the paths of the split files
split_paths = []
# Loop through each split range and create a new PDF file
for i, start in enumerate(split_range):
output_doc = fitz.open()
# Determine the ending page for this split file
to_page = start + page_range if i < num_splits - 1 else -1
output_doc.insert_pdf(source_doc, from_page=start, to_page=to_page)
# Save the output document to a file and add the path to the list of split files
path = result_dir/f"{paging_prefix}_{i}.pdf"
output_doc.save(path, garbage=2)
split_paths.append(path)
# If this is the last split file, exit the loop
if to_page == -1:
break
return split_paths
if __name__ == "__main__":
file_path = Path("./example.pdf")
pages_per_file = 5
result_dir = Path("./result")
result_dir.mkdir()
split_pdf_by_page(file_path, pages_per_file, result_dir, paging_prefix="src")
PyMuPDF version
1.24.9
Operating system
Linux
Python version
3.8
This error does not happen in method .insert_pdf itself, but occurs when resolving names for deciphering LINK_NAMED hyperlinks.
You can avoid this by excluding link in source pages altogether when copying pages: .insert_pdf(source, links=False).
Will excluding links affect the result PDF's content?
Will excluding links affect the result PDF's content?
No, pages will look the same, but any hyperlinks will no longer respond when clicking on the respective area.
Fixed in 1.24.10.