`infer_table_structure` in `partition_pdf` function causes CUDA RuntimeError

Open naity2 opened this issue 1 year ago • 0 comments

Calling partition_pdf with infer_table_structure=True throws a CUDA RuntimeError. I greatly appreciate any help in resolving this issue!

Code:

from unstructured.partition.pdf import partition_pdf

raw_pdf_elements = partition_pdf(
    filename="1_Presentation.pdf",
    extract_images_in_pdf=True,
    infer_table_structure=True,

Error:

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/documents/elements.py:539, in process_metadata.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    537 @functools.wraps(func)
    538 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> list[Element]:
--> 539     elements = func(*args, **kwargs)
    540     sig = inspect.signature(func)
    541     params: dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/file_utils/filetype.py:622, in add_filetype.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    620 @functools.wraps(func)
    621 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]:
--> 622     elements = func(*args, **kwargs)
    623     sig = inspect.signature(func)
    624     params: Dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/file_utils/filetype.py:582, in add_metadata.<locals>.wrapper(*args, **kwargs)
    580 @functools.wraps(func)
    581 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]:
--> 582     elements = func(*args, **kwargs)
    583     sig = inspect.signature(func)
    584     params: Dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/chunking/dispatch.py:83, in add_chunking_strategy.<locals>.wrapper(*args, **kwargs)
     80     return call_args
     82 # -- call the partitioning function to get the elements --
---> 83 elements = func(*args, **kwargs)
     85 # -- look for a chunking-strategy argument --
     86 call_args = get_call_args_applying_defaults()

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf.py:217, in partition_pdf(filename, file, include_page_breaks, strategy, infer_table_structure, ocr_languages, languages, include_metadata, metadata_filename, metadata_last_modified, chunking_strategy, links, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, date_from_file_object, **kwargs)
    213 exactly_one(filename=filename, file=file)
    215 languages = check_language_args(languages or [], ocr_languages) or ["eng"]
--> 217 return partition_pdf_or_image(
    218     filename=filename,
    219     file=file,
    220     include_page_breaks=include_page_breaks,
    221     strategy=strategy,
    222     infer_table_structure=infer_table_structure,
    223     languages=languages,
    224     metadata_last_modified=metadata_last_modified,
    225     hi_res_model_name=hi_res_model_name,
    226     extract_images_in_pdf=extract_images_in_pdf,
    227     extract_image_block_types=extract_image_block_types,
    228     extract_image_block_output_dir=extract_image_block_output_dir,
    229     extract_image_block_to_payload=extract_image_block_to_payload,
    230     date_from_file_object=date_from_file_object,
    231     **kwargs,
    232 )

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf.py:305, in partition_pdf_or_image(filename, file, is_image, include_page_breaks, strategy, infer_table_structure, ocr_languages, languages, metadata_last_modified, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, date_from_file_object, **kwargs)
    303     with warnings.catch_warnings():
    304         warnings.simplefilter("ignore")
--> 305         elements = _partition_pdf_or_image_local(
    306             filename=filename,
    307             file=spooled_to_bytes_io_if_needed(file),
    308             is_image=is_image,
    309             infer_table_structure=infer_table_structure,
    310             include_page_breaks=include_page_breaks,
    311             languages=languages,
    312             metadata_last_modified=metadata_last_modified or last_modification_date,
    313             hi_res_model_name=hi_res_model_name,
    314             pdf_text_extractable=pdf_text_extractable,
    315             extract_images_in_pdf=extract_images_in_pdf,
    316             extract_image_block_types=extract_image_block_types,
    317             extract_image_block_output_dir=extract_image_block_output_dir,
    318             extract_image_block_to_payload=extract_image_block_to_payload,
    319             **kwargs,
    320         )
    321         out_elements = _process_uncategorized_text_elements(elements)
    323 elif strategy == PartitionStrategy.FAST:

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/utils.py:220, in requires_dependencies.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    211 if len(missing_deps) > 0:
    212     raise ImportError(
    213         f"Following dependencies are missing: {', '.join(missing_deps)}. "
    214         + (
   (...)
    218         ),
    219     )
--> 220 return func(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf.py:464, in _partition_pdf_or_image_local(filename, file, is_image, infer_table_structure, include_page_breaks, languages, ocr_mode, model_name, hi_res_model_name, pdf_image_dpi, metadata_last_modified, pdf_text_extractable, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, analysis, analyzed_image_output_dir_path, **kwargs)
    458         # NOTE(christine): merged_document_layout = extracted_layout + inferred_layout
    459         merged_document_layout = merge_inferred_with_extracted_layout(
    460             inferred_document_layout=inferred_document_layout,
    461             extracted_layout=extracted_layout,
    462         )
--> 464         final_document_layout = process_file_with_ocr(
    465             filename,
    466             merged_document_layout,
    467             extracted_layout=extracted_layout,
    468             is_image=is_image,
    469             infer_table_structure=infer_table_structure,
    470             ocr_languages=ocr_languages,
    471             ocr_mode=ocr_mode,
    472             pdf_image_dpi=pdf_image_dpi,
    473         )
    474 else:
    475     inferred_document_layout = process_data_with_model(
    476         file,
    477         is_image=is_image,
    478         model_name=hi_res_model_name,
    479         pdf_image_dpi=pdf_image_dpi,
    480     )

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/utils.py:220, in requires_dependencies.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    211 if len(missing_deps) > 0:
    212     raise ImportError(
    213         f"Following dependencies are missing: {', '.join(missing_deps)}. "
    214         + (
   (...)
    218         ),
    219     )
--> 220 return func(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:177, in process_file_with_ocr(filename, out_layout, extracted_layout, is_image, infer_table_structure, ocr_languages, ocr_mode, pdf_image_dpi)
    175 except Exception as e:
    176     if os.path.isdir(filename) or os.path.isfile(filename):
--> 177         raise e
    178     else:
    179         raise FileNotFoundError(f'File "{filename}" not found!') from e

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:165, in process_file_with_ocr(filename, out_layout, extracted_layout, is_image, infer_table_structure, ocr_languages, ocr_mode, pdf_image_dpi)
    163     extracted_regions = extracted_layout[i] if i < len(extracted_layout) else None
    164     with PILImage.open(image_path) as image:
--> 165         merged_page_layout = supplement_page_layout_with_ocr(
    166             page_layout=out_layout.pages[i],
    167             image=image,
    168             infer_table_structure=infer_table_structure,
    169             ocr_languages=ocr_languages,
    170             ocr_mode=ocr_mode,
    171             extracted_regions=extracted_regions,
    172         )
    173         merged_page_layouts.append(merged_page_layout)
    174 return DocumentLayout.from_pages(merged_page_layouts)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/utils.py:220, in requires_dependencies.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    211 if len(missing_deps) > 0:
    212     raise ImportError(
    213         f"Following dependencies are missing: {', '.join(missing_deps)}. "
    214         + (
   (...)
    218         ),
    219     )
--> 220 return func(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:243, in supplement_page_layout_with_ocr(page_layout, image, infer_table_structure, ocr_languages, ocr_mode, extracted_regions)
    240     if tables.tables_agent is None:
    241         raise RuntimeError("Unable to load table extraction agent.")
--> 243     page_layout.elements[:] = supplement_element_with_table_extraction(
    244         elements=cast(List["LayoutElement"], page_layout.elements),
    245         image=image,
    246         tables_agent=tables.tables_agent,
    247         ocr_languages=ocr_languages,
    248         ocr_agent=ocr_agent,
    249         extracted_regions=extracted_regions,
    250     )
    252 return page_layout

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:287, in supplement_element_with_table_extraction(elements, image, tables_agent, ocr_languages, ocr_agent, extracted_regions)
    272     cropped_image = image.crop(
    273         (
    274             padded_element.bbox.x1,
   (...)
    278         ),
    279     )
    280     table_tokens = get_table_tokens(
    281         table_element_image=cropped_image,
    282         ocr_languages=ocr_languages,
   (...)
    285         table_element=padded_element,
    286     )
--> 287     element.text_as_html = tables_agent.predict(cropped_image, ocr_tokens=table_tokens)
    288 return elements

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured_inference/models/tables.py:47, in UnstructuredTableTransformerModel.predict(self, x, ocr_tokens)
     31 """Predict table structure deferring to run_prediction with ocr tokens
     32 
     33 Note:
   (...)
     44 FIXME: refactor token data into a dataclass so we have clear expectations of the fields
     45 """
     46 super().predict(x)
---> 47 return self.run_prediction(x, ocr_tokens=ocr_tokens)

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured_inference/models/tables.py:96, in UnstructuredTableTransformerModel.run_prediction(self, x, pad_for_structure_detection, ocr_tokens, result_format)
     88 def run_prediction(
     89     self,
     90     x: Image,
   (...)
     93     result_format: Optional[str] = "html",
     94 ):
     95     """Predict table structure"""
---> 96     outputs_structure = self.get_structure(x, pad_for_structure_detection)
     97     if ocr_tokens is None:
     98         raise ValueError("Cannot predict table structure with no OCR tokens")

File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured_inference/models/tables.py:84, in UnstructuredTableTransformerModel.get_structure(self, x, pad_for_structure_detection)
     79 logger.info(f"padding image by {pad_for_structure_detection} for structure detection")
     80 encoding = self.feature_extractor(
     81     pad_image_with_background_color(x, pad_for_structure_detection),
     82     return_tensors="pt",
     83 ).to(self.device)
---> 84 outputs_structure = self.model(**encoding)
     85 outputs_structure["pad_for_structure_detection"] = pad_for_structure_detection
     86 return outputs_structure

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:1456, in TableTransformerForObjectDetection.forward(self, pixel_values, pixel_mask, decoder_attention_mask, encoder_outputs, inputs_embeds, decoder_inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
   1453 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
   1455 # First, sent images through TABLE_TRANSFORMER base model to obtain encoder + decoder outputs
-> 1456 outputs = self.model(
   1457     pixel_values,
   1458     pixel_mask=pixel_mask,
   1459     decoder_attention_mask=decoder_attention_mask,
   1460     encoder_outputs=encoder_outputs,
   1461     inputs_embeds=inputs_embeds,
   1462     decoder_inputs_embeds=decoder_inputs_embeds,
   1463     output_attentions=output_attentions,
   1464     output_hidden_states=output_hidden_states,
   1465     return_dict=return_dict,
   1466 )
   1468 sequence_output = outputs[0]
   1470 # class logits + predicted bounding boxes

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:1316, in TableTransformerModel.forward(self, pixel_values, pixel_mask, decoder_attention_mask, encoder_outputs, inputs_embeds, decoder_inputs_embeds, output_attentions, output_hidden_states, return_dict)
   1312 # Fourth, sent flattened_features + flattened_mask + object queries through encoder
   1313 # flattened_features is a Tensor of shape (batch_size, heigth*width, hidden_size)
   1314 # flattened_mask is a Tensor of shape (batch_size, heigth*width)
   1315 if encoder_outputs is None:
-> 1316     encoder_outputs = self.encoder(
   1317         inputs_embeds=flattened_features,
   1318         attention_mask=flattened_mask,
   1319         object_queries=object_queries,
   1320         output_attentions=output_attentions,
   1321         output_hidden_states=output_hidden_states,
   1322         return_dict=return_dict,
   1323     )
   1324 # If the user passed a tuple for encoder_outputs, we wrap it in a BaseModelOutput when return_dict=True
   1325 elif return_dict and not isinstance(encoder_outputs, BaseModelOutput):

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:977, in TableTransformerEncoder.forward(self, inputs_embeds, attention_mask, object_queries, output_attentions, output_hidden_states, return_dict)
    974     layer_outputs = (None, None)
    975 else:
    976     # we add object_queries as extra input to the encoder_layer
--> 977     layer_outputs = encoder_layer(
    978         hidden_states,
    979         attention_mask,
    980         object_queries=object_queries,
    981         output_attentions=output_attentions,
    982     )
    984     hidden_states = layer_outputs[0]
    986 if output_attentions:

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:643, in TableTransformerEncoderLayer.forward(self, hidden_states, attention_mask, object_queries, output_attentions)
    640 residual = hidden_states
    641 hidden_states = self.self_attn_layer_norm(hidden_states)
--> 643 hidden_states, attn_weights = self.self_attn(
    644     hidden_states=hidden_states,
    645     attention_mask=attention_mask,
    646     object_queries=object_queries,
    647     output_attentions=output_attentions,
    648 )
    650 hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
    651 hidden_states = residual + hidden_states

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:538, in TableTransformerAttention.forward(self, hidden_states, attention_mask, object_queries, key_value_states, spatial_position_embeddings, output_attentions, **kwargs)
    535     key_value_states = self.with_pos_embed(key_value_states, spatial_position_embeddings)
    537 # get query proj
--> 538 query_states = self.q_proj(hidden_states) * self.scaling
    539 # get key, value proj
    540 if is_cross_attention:
    541     # cross_attentions

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
   1509     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1510 else:
-> 1511     return self._call_impl(*args, **kwargs)

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
   1515 # If we don't have any hooks, we want to skip the rest of the logic in
   1516 # this function, and just call forward.
   1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1518         or _global_backward_pre_hooks or _global_backward_hooks
   1519         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520     return forward_call(*args, **kwargs)
   1522 try:
   1523     result = None

File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/linear.py:116, in Linear.forward(self, input)
    115 def forward(self, input: Tensor) -> Tensor:
--> 116     return F.linear(input, self.weight, self.bias)

RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`

Apr 22 '24 23:04 naity2