Invalid Metal library on non-conda Mac env after cmp_tuple
Mac OS, Metal, M3 Macbook Pro, Python 3.12.4 in a venv.
I think 3929a9dc945e9c00684aa676241908550cf7b697 broke something on Metal, I am hitting an invalid library error on a non-conda environment. This was working a while ago, git bisect points to 3929a9dc945e9c00684aa676241908550cf7b697.
AssertionError: Invalid Metal library. Could be due to using conda. Try system python or METAL_XCODE=1 DISABLE_COMPILER_CACHE=1.
To reproduce (similar to https://github.com/tinygrad/tinygrad/issues/5241):
#!/usr/bin/env python3
from tinygrad import Tensor, dtypes
import numpy as np
def main():
huge = Tensor(np.zeros((20000, 20000)), dtype=dtypes.int32)
a = Tensor.arange(20000)
b = Tensor.arange(20000)
indexed = huge[a,b]
print(indexed[0].sum().item())
if __name__ == "__main__":
main()
This works on a9d6a6c339871d489d0db63746490142e6541d82 but fails with the library error on the next commit (3929a9dc945e9c00684aa676241908550cf7b697)
(.venv) ➜ tinygrad git:(a9d6a6c3) ✗ python t.py
Traceback (most recent call last):
File "/Users/rvd/src/rvd/tinygrad/t.py", line 13, in <module>
main()
File "/Users/rvd/src/rvd/tinygrad/t.py", line 10, in main
print(indexed[0].sum().item())
^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rvd/src/rvd/tinygrad/tinygrad/tensor.py", line 271, in item
return self._data().cast(self.dtype.fmt)[0]
^^^^^^^^^^^^
File "/Users/rvd/src/rvd/tinygrad/tinygrad/tensor.py", line 242, in _data
cpu = self.cast(self.dtype.scalar()).contiguous().to("CLANG").realize()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rvd/src/rvd/tinygrad/tinygrad/tensor.py", line 201, in realize
run_schedule(*self.schedule_with_vars(*lst), do_update_stats=do_update_stats)
File "/Users/rvd/src/rvd/tinygrad/tinygrad/engine/realize.py", line 190, in run_schedule
for ei in lower_schedule(schedule):
File "/Users/rvd/src/rvd/tinygrad/tinygrad/engine/realize.py", line 183, in lower_schedule
while len(schedule): yield lower_schedule_item(schedule.pop(0))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rvd/src/rvd/tinygrad/tinygrad/engine/realize.py", line 169, in lower_schedule_item
runner = get_runner(si.outputs[0].device, si.ast)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rvd/src/rvd/tinygrad/tinygrad/engine/realize.py", line 142, in get_runner
method_cache[ckey] = method_cache[bkey] = ret = CompiledRunner(replace(prg, dname=dname))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rvd/src/rvd/tinygrad/tinygrad/engine/realize.py", line 61, in __init__
self.clprg = Device[p.dname].runtime(p.function_name, self.lib)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rvd/src/rvd/tinygrad/tinygrad/runtime/ops_metal.py", line 39, in __init__
assert lib[:4] == b"MTLB", "Invalid Metal library. Could be due to using conda. Try system python or METAL_XCODE=1 DISABLE_COMPILER_CACHE=1."
^^^^^^^^^^^^^^^^^^
AssertionError: Invalid Metal library. Could be due to using conda. Try system python or METAL_XCODE=1 DISABLE_COMPILER_CACHE=1.
``
it might OOM or having indexing issue, i can repro on my M1 Max with 20000, but 18000 runs fine. and it has a 8045004 reduce
Could someone post the first 100ish bytes of that library ? I could take a look at it
hmm i got RuntimeError: Error Domain=MTLCommandBufferErrorDomain Code=1 "Internal Error (0000000e:Internal Error)" UserInfo={NSLocalizedDescription=Internal Error (0000000e:Internal Error), NSUnderlyingError=0x158779a90 {Error Domain=IOGPUCommandQueueErrorDomain Code=14 "(null)"}}, not the invalid MTLB one
Can't repro on M3 Max on master, feel free to reopen if it's still an issue.
I'm having this same problem. Macos Sierra, m3 pro mac mini,
I have a Conda environment & installed tinygrad from the current master
I also ran the test cases according to README.md and they failed. I include one output below the code from the OP
❯ python3
Python 3.10.14 | packaged by conda-forge | (main, Mar 20 2024, 12:51:49) [Clang 16.0.6 ] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> #!/usr/bin/env python3
>>>
>>> from tinygrad import Tensor, dtypes
import numpy as np
def main():
huge = Tensor(np.zeros((20000, 20000)), dtype=dtypes.int32)
a = Tensor.arange(20000)
b = Tensor.arange(20000)
indexed = huge[a,b]
print(indexed[0].sum().item())
if __name__ == "__main__":
main()>>> import numpy as np
>>> def main():
... huge = Tensor(np.zeros((20000, 20000)), dtype=dtypes.int32)
... a = Tensor.arange(20000)
... b = Tensor.arange(20000)
... indexed = huge[a,b]
... print(indexed[0].sum().item())
...
>>> if __name__ == "__main__":
... main()
...
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "<stdin>", line 6, in main
File "/Users/grunwald/tinygrad/tinygrad/tensor.py", line 3506, in _wrapper
ret = fn(*args, **kwargs)
File "/Users/grunwald/tinygrad/tinygrad/tensor.py", line 288, in item
return self._data().cast(self.dtype.fmt)[0]
File "/Users/grunwald/tinygrad/tinygrad/tensor.py", line 3481, in _wrapper
if _METADATA.get() is not None: return fn(*args, **kwargs)
File "/Users/grunwald/tinygrad/tinygrad/tensor.py", line 259, in _data
cpu = self.cast(self.dtype.scalar()).contiguous().to("CLANG").realize()
File "/Users/grunwald/tinygrad/tinygrad/tensor.py", line 3481, in _wrapper
if _METADATA.get() is not None: return fn(*args, **kwargs)
File "/Users/grunwald/tinygrad/tinygrad/tensor.py", line 218, in realize
run_schedule(*self.schedule_with_vars(*lst), do_update_stats=do_update_stats)
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 222, in run_schedule
for ei in lower_schedule(schedule):
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 215, in lower_schedule
raise e
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 209, in lower_schedule
try: yield lower_schedule_item(si)
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 193, in lower_schedule_item
runner = get_runner(si.outputs[0].device, si.ast)
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 162, in get_runner
method_cache[ckey] = method_cache[bkey] = ret = CompiledRunner(replace(prg, dname=dname))
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 85, in __init__
self.clprg = Device[p.dname].runtime(p.function_name, self.lib)
File "/Users/grunwald/tinygrad/tinygrad/runtime/ops_metal.py", line 88, in __init__
assert lib[:4] == b"MTLB", "Invalid Metal library. Could be due to using conda. Try system python or METAL_XCODE=1 DISABLE_COMPILER_CACHE=1."
AssertionError: Invalid Metal library. Could be due to using conda. Try system python or METAL_XCODE=1 DISABLE_COMPILER_CACHE=1.
>>>
and a random part of the tests that failed (they all failed)
======================================================================
FAIL: test_9_gemm (__main__.TestOps)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/Users/grunwald/tinygrad/test/test_ops.py", line 854, in test_9_gemm
helper_test_op([(9,9), (9,9)], lambda x,y: x.matmul(y), lambda x,y: x@y)
File "/Users/grunwald/tinygrad/test/test_ops.py", line 31, in helper_test_op
ret = tinygrad_fxn(*tst).realize()
File "/Users/grunwald/tinygrad/tinygrad/tensor.py", line 3506, in _wrapper
ret = fn(*args, **kwargs)
File "/Users/grunwald/tinygrad/tinygrad/tensor.py", line 218, in realize
run_schedule(*self.schedule_with_vars(*lst), do_update_stats=do_update_stats)
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 222, in run_schedule
for ei in lower_schedule(schedule):
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 215, in lower_schedule
raise e
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 209, in lower_schedule
try: yield lower_schedule_item(si)
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 193, in lower_schedule_item
runner = get_runner(si.outputs[0].device, si.ast)
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 162, in get_runner
method_cache[ckey] = method_cache[bkey] = ret = CompiledRunner(replace(prg, dname=dname))
File "/Users/grunwald/tinygrad/tinygrad/engine/realize.py", line 85, in __init__
self.clprg = Device[p.dname].runtime(p.function_name, self.lib)
File "/Users/grunwald/tinygrad/tinygrad/runtime/ops_metal.py", line 88, in __init__
assert lib[:4] == b"MTLB", "Invalid Metal library. Could be due to using conda. Try system python or METAL_XCODE=1 DISABLE_COMPILER_CACHE=1."
AssertionError: Invalid Metal library. Could be due to using conda. Try system python or METAL_XCODE=1 DISABLE_COMPILER_CACHE=1.
Yep same issue
Changing the path in tinygrad/runtime/ops_metal.py to:
libmetal = ctypes.CDLL("/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/Metal.framework") fixed the issue for me.