unstructured
unstructured copied to clipboard
`infer_table_structure` in `partition_pdf` function causes CUDA RuntimeError
Calling partition_pdf with infer_table_structure=True throws a CUDA RuntimeError. I greatly appreciate any help in resolving this issue!
Code:
from unstructured.partition.pdf import partition_pdf
raw_pdf_elements = partition_pdf(
filename="1_Presentation.pdf",
extract_images_in_pdf=True,
infer_table_structure=True,
Error:
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/documents/elements.py:539, in process_metadata.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
537 @functools.wraps(func)
538 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> list[Element]:
--> 539 elements = func(*args, **kwargs)
540 sig = inspect.signature(func)
541 params: dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/file_utils/filetype.py:622, in add_filetype.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
620 @functools.wraps(func)
621 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]:
--> 622 elements = func(*args, **kwargs)
623 sig = inspect.signature(func)
624 params: Dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/file_utils/filetype.py:582, in add_metadata.<locals>.wrapper(*args, **kwargs)
580 @functools.wraps(func)
581 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]:
--> 582 elements = func(*args, **kwargs)
583 sig = inspect.signature(func)
584 params: Dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/chunking/dispatch.py:83, in add_chunking_strategy.<locals>.wrapper(*args, **kwargs)
80 return call_args
82 # -- call the partitioning function to get the elements --
---> 83 elements = func(*args, **kwargs)
85 # -- look for a chunking-strategy argument --
86 call_args = get_call_args_applying_defaults()
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf.py:217, in partition_pdf(filename, file, include_page_breaks, strategy, infer_table_structure, ocr_languages, languages, include_metadata, metadata_filename, metadata_last_modified, chunking_strategy, links, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, date_from_file_object, **kwargs)
213 exactly_one(filename=filename, file=file)
215 languages = check_language_args(languages or [], ocr_languages) or ["eng"]
--> 217 return partition_pdf_or_image(
218 filename=filename,
219 file=file,
220 include_page_breaks=include_page_breaks,
221 strategy=strategy,
222 infer_table_structure=infer_table_structure,
223 languages=languages,
224 metadata_last_modified=metadata_last_modified,
225 hi_res_model_name=hi_res_model_name,
226 extract_images_in_pdf=extract_images_in_pdf,
227 extract_image_block_types=extract_image_block_types,
228 extract_image_block_output_dir=extract_image_block_output_dir,
229 extract_image_block_to_payload=extract_image_block_to_payload,
230 date_from_file_object=date_from_file_object,
231 **kwargs,
232 )
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf.py:305, in partition_pdf_or_image(filename, file, is_image, include_page_breaks, strategy, infer_table_structure, ocr_languages, languages, metadata_last_modified, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, date_from_file_object, **kwargs)
303 with warnings.catch_warnings():
304 warnings.simplefilter("ignore")
--> 305 elements = _partition_pdf_or_image_local(
306 filename=filename,
307 file=spooled_to_bytes_io_if_needed(file),
308 is_image=is_image,
309 infer_table_structure=infer_table_structure,
310 include_page_breaks=include_page_breaks,
311 languages=languages,
312 metadata_last_modified=metadata_last_modified or last_modification_date,
313 hi_res_model_name=hi_res_model_name,
314 pdf_text_extractable=pdf_text_extractable,
315 extract_images_in_pdf=extract_images_in_pdf,
316 extract_image_block_types=extract_image_block_types,
317 extract_image_block_output_dir=extract_image_block_output_dir,
318 extract_image_block_to_payload=extract_image_block_to_payload,
319 **kwargs,
320 )
321 out_elements = _process_uncategorized_text_elements(elements)
323 elif strategy == PartitionStrategy.FAST:
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/utils.py:220, in requires_dependencies.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
211 if len(missing_deps) > 0:
212 raise ImportError(
213 f"Following dependencies are missing: {', '.join(missing_deps)}. "
214 + (
(...)
218 ),
219 )
--> 220 return func(*args, **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf.py:464, in _partition_pdf_or_image_local(filename, file, is_image, infer_table_structure, include_page_breaks, languages, ocr_mode, model_name, hi_res_model_name, pdf_image_dpi, metadata_last_modified, pdf_text_extractable, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, analysis, analyzed_image_output_dir_path, **kwargs)
458 # NOTE(christine): merged_document_layout = extracted_layout + inferred_layout
459 merged_document_layout = merge_inferred_with_extracted_layout(
460 inferred_document_layout=inferred_document_layout,
461 extracted_layout=extracted_layout,
462 )
--> 464 final_document_layout = process_file_with_ocr(
465 filename,
466 merged_document_layout,
467 extracted_layout=extracted_layout,
468 is_image=is_image,
469 infer_table_structure=infer_table_structure,
470 ocr_languages=ocr_languages,
471 ocr_mode=ocr_mode,
472 pdf_image_dpi=pdf_image_dpi,
473 )
474 else:
475 inferred_document_layout = process_data_with_model(
476 file,
477 is_image=is_image,
478 model_name=hi_res_model_name,
479 pdf_image_dpi=pdf_image_dpi,
480 )
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/utils.py:220, in requires_dependencies.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
211 if len(missing_deps) > 0:
212 raise ImportError(
213 f"Following dependencies are missing: {', '.join(missing_deps)}. "
214 + (
(...)
218 ),
219 )
--> 220 return func(*args, **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:177, in process_file_with_ocr(filename, out_layout, extracted_layout, is_image, infer_table_structure, ocr_languages, ocr_mode, pdf_image_dpi)
175 except Exception as e:
176 if os.path.isdir(filename) or os.path.isfile(filename):
--> 177 raise e
178 else:
179 raise FileNotFoundError(f'File "{filename}" not found!') from e
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:165, in process_file_with_ocr(filename, out_layout, extracted_layout, is_image, infer_table_structure, ocr_languages, ocr_mode, pdf_image_dpi)
163 extracted_regions = extracted_layout[i] if i < len(extracted_layout) else None
164 with PILImage.open(image_path) as image:
--> 165 merged_page_layout = supplement_page_layout_with_ocr(
166 page_layout=out_layout.pages[i],
167 image=image,
168 infer_table_structure=infer_table_structure,
169 ocr_languages=ocr_languages,
170 ocr_mode=ocr_mode,
171 extracted_regions=extracted_regions,
172 )
173 merged_page_layouts.append(merged_page_layout)
174 return DocumentLayout.from_pages(merged_page_layouts)
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/utils.py:220, in requires_dependencies.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
211 if len(missing_deps) > 0:
212 raise ImportError(
213 f"Following dependencies are missing: {', '.join(missing_deps)}. "
214 + (
(...)
218 ),
219 )
--> 220 return func(*args, **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:243, in supplement_page_layout_with_ocr(page_layout, image, infer_table_structure, ocr_languages, ocr_mode, extracted_regions)
240 if tables.tables_agent is None:
241 raise RuntimeError("Unable to load table extraction agent.")
--> 243 page_layout.elements[:] = supplement_element_with_table_extraction(
244 elements=cast(List["LayoutElement"], page_layout.elements),
245 image=image,
246 tables_agent=tables.tables_agent,
247 ocr_languages=ocr_languages,
248 ocr_agent=ocr_agent,
249 extracted_regions=extracted_regions,
250 )
252 return page_layout
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured/partition/pdf_image/ocr.py:287, in supplement_element_with_table_extraction(elements, image, tables_agent, ocr_languages, ocr_agent, extracted_regions)
272 cropped_image = image.crop(
273 (
274 padded_element.bbox.x1,
(...)
278 ),
279 )
280 table_tokens = get_table_tokens(
281 table_element_image=cropped_image,
282 ocr_languages=ocr_languages,
(...)
285 table_element=padded_element,
286 )
--> 287 element.text_as_html = tables_agent.predict(cropped_image, ocr_tokens=table_tokens)
288 return elements
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured_inference/models/tables.py:47, in UnstructuredTableTransformerModel.predict(self, x, ocr_tokens)
31 """Predict table structure deferring to run_prediction with ocr tokens
32
33 Note:
(...)
44 FIXME: refactor token data into a dataclass so we have clear expectations of the fields
45 """
46 super().predict(x)
---> 47 return self.run_prediction(x, ocr_tokens=ocr_tokens)
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured_inference/models/tables.py:96, in UnstructuredTableTransformerModel.run_prediction(self, x, pad_for_structure_detection, ocr_tokens, result_format)
88 def run_prediction(
89 self,
90 x: Image,
(...)
93 result_format: Optional[str] = "html",
94 ):
95 """Predict table structure"""
---> 96 outputs_structure = self.get_structure(x, pad_for_structure_detection)
97 if ocr_tokens is None:
98 raise ValueError("Cannot predict table structure with no OCR tokens")
File /opt/conda/envs/ml/lib/python3.11/site-packages/unstructured_inference/models/tables.py:84, in UnstructuredTableTransformerModel.get_structure(self, x, pad_for_structure_detection)
79 logger.info(f"padding image by {pad_for_structure_detection} for structure detection")
80 encoding = self.feature_extractor(
81 pad_image_with_background_color(x, pad_for_structure_detection),
82 return_tensors="pt",
83 ).to(self.device)
---> 84 outputs_structure = self.model(**encoding)
85 outputs_structure["pad_for_structure_detection"] = pad_for_structure_detection
86 return outputs_structure
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:1456, in TableTransformerForObjectDetection.forward(self, pixel_values, pixel_mask, decoder_attention_mask, encoder_outputs, inputs_embeds, decoder_inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1453 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1455 # First, sent images through TABLE_TRANSFORMER base model to obtain encoder + decoder outputs
-> 1456 outputs = self.model(
1457 pixel_values,
1458 pixel_mask=pixel_mask,
1459 decoder_attention_mask=decoder_attention_mask,
1460 encoder_outputs=encoder_outputs,
1461 inputs_embeds=inputs_embeds,
1462 decoder_inputs_embeds=decoder_inputs_embeds,
1463 output_attentions=output_attentions,
1464 output_hidden_states=output_hidden_states,
1465 return_dict=return_dict,
1466 )
1468 sequence_output = outputs[0]
1470 # class logits + predicted bounding boxes
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:1316, in TableTransformerModel.forward(self, pixel_values, pixel_mask, decoder_attention_mask, encoder_outputs, inputs_embeds, decoder_inputs_embeds, output_attentions, output_hidden_states, return_dict)
1312 # Fourth, sent flattened_features + flattened_mask + object queries through encoder
1313 # flattened_features is a Tensor of shape (batch_size, heigth*width, hidden_size)
1314 # flattened_mask is a Tensor of shape (batch_size, heigth*width)
1315 if encoder_outputs is None:
-> 1316 encoder_outputs = self.encoder(
1317 inputs_embeds=flattened_features,
1318 attention_mask=flattened_mask,
1319 object_queries=object_queries,
1320 output_attentions=output_attentions,
1321 output_hidden_states=output_hidden_states,
1322 return_dict=return_dict,
1323 )
1324 # If the user passed a tuple for encoder_outputs, we wrap it in a BaseModelOutput when return_dict=True
1325 elif return_dict and not isinstance(encoder_outputs, BaseModelOutput):
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:977, in TableTransformerEncoder.forward(self, inputs_embeds, attention_mask, object_queries, output_attentions, output_hidden_states, return_dict)
974 layer_outputs = (None, None)
975 else:
976 # we add object_queries as extra input to the encoder_layer
--> 977 layer_outputs = encoder_layer(
978 hidden_states,
979 attention_mask,
980 object_queries=object_queries,
981 output_attentions=output_attentions,
982 )
984 hidden_states = layer_outputs[0]
986 if output_attentions:
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:643, in TableTransformerEncoderLayer.forward(self, hidden_states, attention_mask, object_queries, output_attentions)
640 residual = hidden_states
641 hidden_states = self.self_attn_layer_norm(hidden_states)
--> 643 hidden_states, attn_weights = self.self_attn(
644 hidden_states=hidden_states,
645 attention_mask=attention_mask,
646 object_queries=object_queries,
647 output_attentions=output_attentions,
648 )
650 hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
651 hidden_states = residual + hidden_states
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File /opt/conda/envs/ml/lib/python3.11/site-packages/transformers/models/table_transformer/modeling_table_transformer.py:538, in TableTransformerAttention.forward(self, hidden_states, attention_mask, object_queries, key_value_states, spatial_position_embeddings, output_attentions, **kwargs)
535 key_value_states = self.with_pos_embed(key_value_states, spatial_position_embeddings)
537 # get query proj
--> 538 query_states = self.q_proj(hidden_states) * self.scaling
539 # get key, value proj
540 if is_cross_attention:
541 # cross_attentions
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File /opt/conda/envs/ml/lib/python3.11/site-packages/torch/nn/modules/linear.py:116, in Linear.forward(self, input)
115 def forward(self, input: Tensor) -> Tensor:
--> 116 return F.linear(input, self.weight, self.bias)
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`