change tokenizer parameters
Hi,
It is ok when I use SequenceClassificationExplainer with short texts but for long texts it throws an error like
RuntimeError: The expanded size of the tensor (583) must match the existing size (514) at non-singleton dimension 1. Target sizes: [1, 583]. Tensor sizes: [1, 514]
I think it will solve the problem if I modify or pass some parameters like padding="max_length", truncation=True, max_length=max_length to explainer.
Do you have any suggestion for this problem? How can I solve?
Example usage:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers_interpret import MultiLabelClassificationExplainer, SequenceClassificationExplainer
model = AutoModelForSequenceClassification.from_pretrained("model_name")
tokenizer = AutoTokenizer.from_pretrained("model_name")
explainer = SequenceClassificationExplainer(model, tokenizer)
example_text = """some long text"""
word_attributions = explainer(example_text)
Exception:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_247623/3310833535.py in <module>
1 example_text = """some long text"""
----> 2 word_attributions = explainer(preprocess(example_text), class_name="riskli")
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers_interpret/explainers/sequence_classification.py in __call__(self, text, index, class_name, embedding_type, internal_batch_size, n_steps)
312 if internal_batch_size:
313 self.internal_batch_size = internal_batch_size
--> 314 return self._run(text, index, class_name, embedding_type=embedding_type)
315
316 def __str__(self):
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers_interpret/explainers/sequence_classification.py in _run(self, text, index, class_name, embedding_type)
266 self.text = self._clean_text(text)
267
--> 268 self._calculate_attributions(embeddings=embeddings, index=index, class_name=class_name)
269 return self.word_attributions # type: ignore
270
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers_interpret/explainers/sequence_classification.py in _calculate_attributions(self, embeddings, index, class_name)
225
226 reference_tokens = [token.replace("Ä ", "") for token in self.decode(self.input_ids)]
--> 227 lig = LIGAttributions(
228 self._forward,
229 embeddings,
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers_interpret/attributions.py in __init__(self, custom_forward, embeddings, tokens, input_ids, ref_input_ids, sep_id, attention_mask, token_type_ids, position_ids, ref_token_type_ids, ref_position_ids, internal_batch_size, n_steps)
60 )
61 elif self.position_ids is not None:
---> 62 self._attributions, self.delta = self.lig.attribute(
63 inputs=(self.input_ids, self.position_ids),
64 baselines=(
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/log/__init__.py in wrapper(*args, **kwargs)
33 @wraps(func)
34 def wrapper(*args, **kwargs):
---> 35 return func(*args, **kwargs)
36
37 return wrapper
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/attr/_core/layer/layer_integrated_gradients.py in attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, internal_batch_size, return_convergence_delta, attribute_to_layer_input)
363 self.device_ids = getattr(self.forward_func, "device_ids", None)
364
--> 365 inputs_layer = _forward_layer_eval(
366 self.forward_func,
367 inps,
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/_utils/gradient.py in _forward_layer_eval(forward_fn, inputs, layer, additional_forward_args, device_ids, attribute_to_layer_input, grad_enabled)
180 grad_enabled: bool = False,
181 ) -> Union[Tuple[Tensor, ...], List[Tuple[Tensor, ...]]]:
--> 182 return _forward_layer_eval_with_neuron_grads(
183 forward_fn,
184 inputs,
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/_utils/gradient.py in _forward_layer_eval_with_neuron_grads(forward_fn, inputs, layer, additional_forward_args, gradient_neuron_selector, grad_enabled, device_ids, attribute_to_layer_input)
443
444 with torch.autograd.set_grad_enabled(grad_enabled):
--> 445 saved_layer = _forward_layer_distributed_eval(
446 forward_fn,
447 inputs,
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/_utils/gradient.py in _forward_layer_distributed_eval(forward_fn, inputs, layer, target_ind, additional_forward_args, attribute_to_layer_input, forward_hook_with_return, require_layer_grads)
292 single_layer.register_forward_hook(hook_wrapper(single_layer))
293 )
--> 294 output = _run_forward(
295 forward_fn,
296 inputs,
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/_utils/common.py in _run_forward(forward_func, inputs, target, additional_forward_args)
454 additional_forward_args = _format_additional_forward_args(additional_forward_args)
455
--> 456 output = forward_func(
457 *(*inputs, *additional_forward_args)
458 if additional_forward_args is not None
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers_interpret/explainers/sequence_classification.py in _forward(self, input_ids, position_ids, attention_mask)
178
179 if self.accepts_position_ids:
--> 180 preds = self.model(
181 input_ids,
182 position_ids=position_ids,
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1198 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1199
-> 1200 outputs = self.roberta(
1201 input_ids,
1202 attention_mask=attention_mask,
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
814 if hasattr(self.embeddings, "token_type_ids"):
815 buffered_token_type_ids = self.embeddings.token_type_ids[:, :seq_length]
--> 816 buffered_token_type_ids_expanded = buffered_token_type_ids.expand(batch_size, seq_length)
817 token_type_ids = buffered_token_type_ids_expanded
818 else:
RuntimeError: The expanded size of the tensor (583) must match the existing size (514) at non-singleton dimension 1. Target sizes: [1, 583]. Tensor sizes: [1, 514]
Hey @yusufcakmakk,
I stumbled across the same problem. I simply changed some of the parameters to pass max_length to the tokenizer's encode function. See my fork here.
Hope this helps!
Hey @e-tornike just looked at your fork, this is great, would you be interested in adding this as contribution?
Hi @cdpierse, thanks for having a look at this! I've simplified the truncation further and made a pull request.