This file is damaged but is being repaired - Adobe acrobat
Description of the bug
I use a code that populates the widget fields of a pdf through an excel database.
However it started to change the default font and when I open it with adobe acrobat, the fields appear empty, but when I open it with google chrome, or firefox, the fields appear filled, if I save it and try to open it with adobe acrobat again it tells me that:
"This file is damaged but is being repaired"
I do not understand why it happens and before, this error was not happening.
How to reproduce the bug
I use the following code (sorry if is not perfect, I am not an expert in python):
import fitz # PyMuPDF
import openpyxl
from datetime import datetime
# pdf part
def modify_multiple_fields(pdf_path, field_values):
"""
Modify multiple fields in a PDF.
Args:
pdf_path (str): Path to the PDF file.
field_values (dict): A dictionary where keys are field names and values are new values.
Example:
field_values = {
"Field1": "New Value 1",
"Field2": "New Value 2",
# Add more fields as needed
}
modify_multiple_fields("your_pdf_file.pdf", field_values)
"""
doc = fitz.open(pdf_path)
for page in doc:
widgets = page.widgets()
for widget in widgets:
if widget.field_name in field_values:
widget.field_value = field_values[widget.field_name]
# widget.text_font = 'HeLv'
# widget.text_fontsize = 9
widget.update()
elif widget.field_name == "topmostSubform[0].Page1[0].c1_10[0]": #MALE
if sheet[female_male].value == 'MALE':
widget.field_value = "Yes"
widget.update()
elif widget.field_name == "topmostSubform[0].Page1[0].c1_10[1]": #FEMALE
if sheet[female_male].value == 'FEMALE':
widget.field_value = "Yes"
widget.update()
# doc.delete_pages(1,4) # delete pages
if sheet[middle_name].value == None:
doc.save(f'OUTPUT/W-7 {sheet[first_name].value} {sheet[last_name].value}.pdf')
else:
doc.save(f'OUTPUT/W-7 {sheet[first_name].value} {sheet[middle_name].value} {sheet[last_name].value}.pdf')
doc.close()
# Load the Excel workbook
workbook_path = "Template.xlsx"
sheet_name = "Template"
workbook = openpyxl.load_workbook(workbook_path,data_only=True)
sheet = workbook[sheet_name]
# Find the last row and column with data
last_row = sheet.max_row
last_col = sheet.max_column
# Find the last row with data
last_row_number = len(sheet['B'])
for cell_number in range(5, (last_row_number+1)):
# where data starts
other = f'B{cell_number}'
first_name = f'C{cell_number}'
middle_name = f'D{cell_number}'
last_name = f'E{cell_number}'
street_address = f'F{cell_number}'
city_zip = f'G{cell_number}'
date_birth = f'H{cell_number}'
country = f'I{cell_number}'
birth_city = f'J{cell_number}'
female_male = f'K{cell_number}'
country_citiz = f'L{cell_number}'
issuedby = f'M{cell_number}'
passport_number = f'N{cell_number}'
exp_date = f'O{cell_number}'
# date_birth_format = (sheet[date_birth].value)
# # date_birth_format = date_birth_format[0:10]
# # exp_date = (sheet[exp_date].value)
# # # exp_date = exp_date[0:10]
# print(date_birth_format)
# print(exp_date)
print(sheet[first_name].value)
field_values = {
#W7 PAGE 1
"topmostSubform[0].Page1[0].f1_4[0]": sheet[other].value, #OTHER
"topmostSubform[0].Page1[0].f1_7[0]": sheet[first_name].value, #FIRST NAME
"topmostSubform[0].Page1[0].f1_8[0]": sheet[middle_name].value, #MIDDLE NAME
"topmostSubform[0].Page1[0].f1_9[0]": sheet[last_name].value, #LAST NAME
"topmostSubform[0].Page1[0].f1_15[0]": sheet[street_address].value, #CUSTOMER STREET ADDRESS
"topmostSubform[0].Page1[0].f1_16[0]": sheet[city_zip].value, #CITY,STATE,COUNTRY ZIP
"topmostSubform[0].Page1[0].f1_17[0]": sheet[date_birth].value, #BIRTH MM DD YYYY
"topmostSubform[0].Page1[0].f1_18[0]": sheet[country].value, #COUNTRY BIRTH
"topmostSubform[0].Page1[0].f1_19[0]": sheet[birth_city].value, #BIRTH CITY
"topmostSubform[0].Page1[0].f1_20[0]": sheet[country_citiz].value, #COUNTRY CITIZ
"topmostSubform[0].Page1[0].f1_24[0]": sheet[issuedby].value, #ISSUED BY
"topmostSubform[0].Page1[0].f1_25[0]": sheet[passport_number].value, #PASSPORT NUMBER
"topmostSubform[0].Page1[0].f1_26[0]": sheet[exp_date].value, #PASSPORT EXP DATE MM DD YYYY
# Add more fields as needed
}
modify_multiple_fields("FORM/W7 FORM TAX.pdf", field_values)
workbook.close()
Please find attached a output file of example, I use the W7 form from the IRS website.
You can see that the file was modified
W-7 A B.pdf
Could you please let me know why is not working anymore?
Thank you so much, I am really happy to use this tool, it is amazing
PyMuPDF version
1.23.26
Operating system
Windows
Python version
3.12