tvm
tvm copied to clipboard
[Bug] MetaSchedule lead to a significant difference in the reference results for the log_softmax operator
Actual behavior
Traceback (most recent call last):
File "/data/qshenaf/remote_pc/TirFuzz/bugs/topi.nn.log_softmax_1.py", line 27, in <module>
np.testing.assert_allclose(
File "/data/qshenaf/miniconda3/envs/tvm/lib/python3.12/site-packages/numpy/testing/_private/utils.py", line 1715, in assert_allclose
assert_array_compare(compare, actual, desired, err_msg=str(err_msg),
File "/data/qshenaf/miniconda3/envs/tvm/lib/python3.12/site-packages/numpy/testing/_private/utils.py", line 921, in assert_array_compare
raise AssertionError(msg)
AssertionError:
Not equal to tolerance rtol=1e-05, atol=1e-05
An Inconsistency bug detected.
Mismatched elements: 24694 / 65536 (37.7%)
Max absolute difference among violations: 0.007812
Max relative difference among violations: 0.001672
ACTUAL: array([[[-5.27 , -4.793, -5.016, ..., -5. , -4.805, -5.07 ],
[-4.855, -5.156, -5.062, ..., -5.008, -5.984, -6.164],
[-6.086, -6.08 , -5.566, ..., -5.617, -5.836, -6.266],...
DESIRED: array([[[-5.27 , -4.793, -5.016, ..., -5. , -4.805, -5.07 ],
[-4.86 , -5.156, -5.066, ..., -5.016, -5.99 , -6.168],
[-6.086, -6.08 , -5.566, ..., -5.617, -5.836, -6.266],...
Environment
tvm-0.21.dev0
Steps to reproduce
import tvm
from tvm import te, topi, tir
from tvm import meta_schedule as ms
import numpy as np
def compile_mod(mod, np_input_list, output_shape, output_type, opt_level=3):
with tvm.transform.PassContext(opt_level):
ref_mod = tvm.build(mod, target='llvm')
mod_output = tvm.nd.empty(output_shape, dtype=output_type, device=tvm.cpu(0))
tvm_inputs = [tvm.nd.array(x) for x in np_input_list]
ref_mod(*tvm_inputs, mod_output)
return mod_output
x = te.placeholder((1, 256, 256), dtype='float16', name='x')
op_config = {'x': x, 'axis': 2, }
op_output = topi.nn.log_softmax(**op_config)
np_inputs = [np.random.uniform(-1, 1, size=(1, 256, 256)).astype('float16')]
sch = tir.Schedule(te.create_prim_func([x, op_output]).with_attr('target', tvm.target.Target('llvm')))
ref_output = compile_mod(sch.mod, np_inputs, op_output.shape, op_output.dtype, opt_level=0)
database = ms.tir_integration.tune_tir(mod=sch.mod, target='llvm --num-cores=16', work_dir='./tune_tmp', max_trials_global=1, num_trials_per_iter=1)
sch = ms.tir_integration.compile_tir(database, sch.mod, 'llvm --num-cores=16')
opt_mod_output = compile_mod(sch.mod, np_inputs, op_output.shape, op_output.dtype, opt_level=4)
np.testing.assert_allclose(
ref_output.numpy(), opt_mod_output.numpy(), rtol=1e-5, atol=1e-5, err_msg=f"An Inconsistency bug detected."
)
Triage
- needs-triage
- tune:meta_schedule
cc @ibsidorenko
Hi, same as issue #17977
I checked that all value gaps are within 2 ULPs, in other words, no bug