This file is damaged but is being repaired - Adobe acrobat

Open anjuba1017 opened this issue 1 year ago • 0 comments

Description of the bug

I use a code that populates the widget fields of a pdf through an excel database.

However it started to change the default font and when I open it with adobe acrobat, the fields appear empty, but when I open it with google chrome, or firefox, the fields appear filled, if I save it and try to open it with adobe acrobat again it tells me that:

"This file is damaged but is being repaired"

I do not understand why it happens and before, this error was not happening.

How to reproduce the bug

I use the following code (sorry if is not perfect, I am not an expert in python):

import fitz  # PyMuPDF
import openpyxl
from datetime import datetime


# pdf part

def modify_multiple_fields(pdf_path, field_values):
    """
    Modify multiple fields in a PDF.

    Args:
        pdf_path (str): Path to the PDF file.
        field_values (dict): A dictionary where keys are field names and values are new values.

    Example:
        field_values = {
            "Field1": "New Value 1",
            "Field2": "New Value 2",
            # Add more fields as needed
        }
        modify_multiple_fields("your_pdf_file.pdf", field_values)
    """
    doc = fitz.open(pdf_path)
    for page in doc:
        widgets = page.widgets()
        for widget in widgets:
            if widget.field_name in field_values:
                widget.field_value = field_values[widget.field_name]
                # widget.text_font = 'HeLv'
                # widget.text_fontsize = 9
                widget.update()
            elif widget.field_name == "topmostSubform[0].Page1[0].c1_10[0]": #MALE
                if sheet[female_male].value == 'MALE':
                    widget.field_value = "Yes"
                    widget.update()
            elif widget.field_name == "topmostSubform[0].Page1[0].c1_10[1]": #FEMALE
                if sheet[female_male].value == 'FEMALE':
                    widget.field_value = "Yes"
                    widget.update()

            
    # doc.delete_pages(1,4) # delete pages
    if sheet[middle_name].value == None:
        doc.save(f'OUTPUT/W-7 {sheet[first_name].value} {sheet[last_name].value}.pdf')
    else:
        doc.save(f'OUTPUT/W-7 {sheet[first_name].value} {sheet[middle_name].value} {sheet[last_name].value}.pdf')


    doc.close()



# Load the Excel workbook
workbook_path = "Template.xlsx"
sheet_name = "Template"
workbook = openpyxl.load_workbook(workbook_path,data_only=True)
sheet = workbook[sheet_name]


# Find the last row and column with data
last_row = sheet.max_row
last_col = sheet.max_column

# Find the last row with data

last_row_number = len(sheet['B'])



for cell_number in range(5, (last_row_number+1)):

    # where data starts
    
    other = f'B{cell_number}'
    first_name = f'C{cell_number}'
    middle_name = f'D{cell_number}'
    last_name = f'E{cell_number}'
    street_address = f'F{cell_number}'
    city_zip = f'G{cell_number}'
    date_birth = f'H{cell_number}'
    country = f'I{cell_number}'
    birth_city = f'J{cell_number}'
    female_male = f'K{cell_number}'
    country_citiz = f'L{cell_number}'
    issuedby = f'M{cell_number}'
    passport_number = f'N{cell_number}'
    exp_date = f'O{cell_number}'




    # date_birth_format = (sheet[date_birth].value)
    # # date_birth_format = date_birth_format[0:10]

    # # exp_date = (sheet[exp_date].value)
    # # # exp_date = exp_date[0:10]


    # print(date_birth_format)
    # print(exp_date)
    print(sheet[first_name].value)

    
    field_values = {
    #W7 PAGE 1
    "topmostSubform[0].Page1[0].f1_4[0]": sheet[other].value, #OTHER
    "topmostSubform[0].Page1[0].f1_7[0]": sheet[first_name].value, #FIRST NAME
    "topmostSubform[0].Page1[0].f1_8[0]": sheet[middle_name].value, #MIDDLE NAME
    "topmostSubform[0].Page1[0].f1_9[0]": sheet[last_name].value, #LAST NAME 
    "topmostSubform[0].Page1[0].f1_15[0]": sheet[street_address].value, #CUSTOMER STREET ADDRESS  
    "topmostSubform[0].Page1[0].f1_16[0]": sheet[city_zip].value, #CITY,STATE,COUNTRY ZIP
    "topmostSubform[0].Page1[0].f1_17[0]": sheet[date_birth].value, #BIRTH MM DD YYYY
    "topmostSubform[0].Page1[0].f1_18[0]": sheet[country].value, #COUNTRY BIRTH
    "topmostSubform[0].Page1[0].f1_19[0]": sheet[birth_city].value, #BIRTH CITY
    "topmostSubform[0].Page1[0].f1_20[0]": sheet[country_citiz].value, #COUNTRY CITIZ
    "topmostSubform[0].Page1[0].f1_24[0]": sheet[issuedby].value, #ISSUED BY
    "topmostSubform[0].Page1[0].f1_25[0]": sheet[passport_number].value, #PASSPORT NUMBER
    "topmostSubform[0].Page1[0].f1_26[0]": sheet[exp_date].value, #PASSPORT EXP DATE MM DD YYYY

    # Add more fields as needed
    
    }
    
    modify_multiple_fields("FORM/W7 FORM TAX.pdf", field_values)




workbook.close()

Please find attached a output file of example, I use the W7 form from the IRS website.

You can see that the file was modified W-7 A B.pdf

Could you please let me know why is not working anymore?

Thank you so much, I am really happy to use this tool, it is amazing

PyMuPDF version

1.23.26

Operating system

Windows

Python version

3.12

Mar 03 '24 22:03 anjuba1017