clang backend fails using version 18.1.3
using CLANG backend Model already downloaded using LLaMA-7B model 0%| | 0/292 [00:00<?, ?it/s] fatal error: error in backend: Do not know how to soft promote this operator's operand! clang: error: clang frontend command failed with exit code 70 (use -v to see invocation) Ubuntu clang version 18.1.3 (1ubuntu1) Target: x86_64-pc-linux-gnu Thread model: posix InstalledDir: /usr/bin clang: note: diagnostic msg: Error generating preprocessed source(s) - ignoring input from stdin. clang: note: diagnostic msg: Error generating preprocessed source(s) - no preprocessable inputs. loaded weights in 79.66 ms, 0.03 GB loaded at 0.42 GB/s
... File "/home/lorenzo/projects/tinygrad/tinygrad/runtime/ops_clang.py", line 15, in compile subprocess.check_output(['clang', '-shared', *self.args, '-O2', '-Wall', '-Werror', '-x', 'c', '-fPIC', '-ffreestanding', '-nostdlib', File "/usr/lib/python3.12/subprocess.py", line 466, in check_output return run(*popenargs, stdout=PIPE, timeout=timeout, check=True, File "/usr/lib/python3.12/subprocess.py", line 571, in run raise CalledProcessError(retcode, process.args, subprocess.CalledProcessError: Command '['clang', '-shared', '-march=native', '-O2', '-Wall', '-Werror', '-x', 'c', '-fPIC', '-ffreestanding', '-nostdlib', '-', '-o', '/tmp/tmp6v5p3a9d']' returned non-zero exit status 1.
(noob) Running python examples/llama3.py. I don't understand what clang is complainig about.
Can you run with DEBUG=4 and paste the broken source?
Oh, it fails for me too, but with a different error.
tiny@tiny19:~/tinygrad$ CLANG=1 python3 examples/llama3.py --model /raid/weights/LLaMA-3/8B/
seed = 1728209391
0%| | 0/292 [00:00<?, ?it/s]
<stdin>:2:48: error: __bf16 is not supported on this target
void E_4194304_4(__fp16* restrict data0, const __bf16* restrict data1) {
^
<stdin>:5:5: error: __bf16 is not supported on this target
__bf16 val0 = data1[alu0+1];
^
<stdin>:6:5: error: __bf16 is not supported on this target
__bf16 val1 = data1[alu0+2];
^
<stdin>:7:5: error: __bf16 is not supported on this target
__bf16 val2 = data1[alu0+3];
^
<stdin>:8:5: error: __bf16 is not supported on this target
__bf16 val3 = data1[alu0];
^
<stdin>:9:51: error: cannot type-cast from __bf16
*((__fp164*)(data0+alu0)) = (__fp164){(__fp16)(val3),(__fp16)(val0),(__fp16)(val1),(__fp16)(val2)};
^~~~~~
<stdin>:9:66: error: cannot type-cast from __bf16
*((__fp164*)(data0+alu0)) = (__fp164){(__fp16)(val3),(__fp16)(val0),(__fp16)(val1),(__fp16)(val2)};
^~~~~~
<stdin>:9:81: error: cannot type-cast from __bf16
*((__fp164*)(data0+alu0)) = (__fp164){(__fp16)(val3),(__fp16)(val0),(__fp16)(val1),(__fp16)(val2)};
^~~~~~
<stdin>:9:96: error: cannot type-cast from __bf16
*((__fp164*)(data0+alu0)) = (__fp164){(__fp16)(val3),(__fp16)(val0),(__fp16)(val1),(__fp16)(val2)};
^~~~~~
9 errors generated.
CLANG doesn't support BF16
seed = 1728209481
opened device CLANG from pid:31525
opened device PYTHON from pid:31525
assign <LB CLANG (1,) uint (<MetaOps.COPY: 30>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
assign <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)> <- <LB CLANG (1,) uint (<BinaryOps.ADD: 9>, None)>
opened device DISK:llama3-8b-sfr/model-00001-of-00004.safetensors from pid:31525
*** DISK:ll 1 empty 4976698672 dtypes.uchar arg 1 mem 0.00 GB
*** DISK:ll 2 view 8 @ 0 arg 2 mem 0.00 GB
*** CLANG 3 copy 8, CLANG <- DISK:ll arg 2 mem 0.00 GB tm 40.65us/ 0.04ms ( 0.00 GFLOPS 0.0|0.0 GB/s)
*** DISK:ll 4 view 9512 @ 8 arg 2 mem 0.00 GB
*** CLANG 5 copy 9512, CLANG <- DISK:ll arg 2 mem 0.00 GB tm 12.90us/ 0.05ms ( 0.00 GFLOPS 0.7|0.7 GB/s)
opened device DISK:llama3-8b-sfr/model-00002-of-00004.safetensors from pid:31525
*** DISK:ll 6 empty 4999802720 dtypes.uchar arg 1 mem 0.00 GB
*** DISK:ll 7 view 8 @ 0 arg 2 mem 0.00 GB
*** CLANG 8 copy 8, CLANG <- DISK:ll arg 2 mem 0.00 GB tm 29.85us/ 0.08ms ( 0.00 GFLOPS 0.0|0.0 GB/s)
*** DISK:ll 9 view 12120 @ 8 arg 2 mem 0.00 GB
*** CLANG 10 copy 12120, CLANG <- DISK:ll arg 2 mem 0.00 GB tm 8.48us/ 0.09ms ( 0.00 GFLOPS 1.4|1.4 GB/s)
opened device DISK:llama3-8b-sfr/model-00003-of-00004.safetensors from pid:31525
*** DISK:ll 11 empty 4915916176 dtypes.uchar arg 1 mem 0.00 GB
*** DISK:ll 12 view 8 @ 0 arg 2 mem 0.00 GB
*** CLANG 13 copy 8, CLANG <- DISK:ll arg 2 mem 0.00 GB tm 38.52us/ 0.13ms ( 0.00 GFLOPS 0.0|0.0 GB/s)
*** DISK:ll 14 view 11656 @ 8 arg 2 mem 0.00 GB
*** CLANG 15 copy 11656, CLANG <- DISK:ll arg 2 mem 0.00 GB tm 10.62us/ 0.14ms ( 0.00 GFLOPS 1.1|1.1 GB/s)
opened device DISK:llama3-8b-sfr/model-00004-of-00004.safetensors from pid:31525
*** DISK:ll 16 empty 1168138808 dtypes.uchar arg 1 mem 0.00 GB
*** DISK:ll 17 view 8 @ 0 arg 2 mem 0.00 GB
*** CLANG 18 copy 8, CLANG <- DISK:ll arg 2 mem 0.00 GB tm 39.60us/ 0.18ms ( 0.00 GFLOPS 0.0|0.0 GB/s)
*** DISK:ll 19 view 560 @ 8 arg 2 mem 0.00 GB
*** CLANG 20 copy 560, CLANG <- DISK:ll arg 2 mem 0.00 GB tm 6.15us/ 0.19ms ( 0.00 GFLOPS 0.1|0.1 GB/s)
0%| | 0/292 [00:00<?, ?it/s]*** DISK:ll 21 view 33554432 @ 1444963632 arg 2 mem 0.00 GB
*** CLANG 22 copy 33.55M, CLANG <- DISK:ll arg 2 mem 0.03 GB tm 5320.86us/ 5.51ms ( 0.00 GFLOPS 6.3|6.3 GB/s)
E_4194304_4
UOp(UOps.SINK, dtypes.void, arg=KernelInfo(local_dims=0, upcasted=1, dont_use_locals=False), src=(
UOp(UOps.STORE, dtypes.void, arg=None, src=(
UOp(UOps.DEFINE_GLOBAL, PtrDType(dtypes.half), arg=0, src=()),
x2:=UOp(UOps.SHAPETRACKER, dtypes.void, arg=ShapeTracker(views=(View(shape=(4194304, 4), strides=(4, 1), offset=0, mask=None, contiguous=True),)), src=()),
UOp(UOps.CAST, dtypes.half, arg=None, src=(
UOp(UOps.LOAD, dtypes.bfloat16, arg=None, src=(
UOp(UOps.DEFINE_GLOBAL, PtrDType(dtypes.bfloat16), arg=1, src=()),
x2,)),)),)),))
[Opt(op=OptOps.UPCAST, axis=0, amt=4)]
typedef __fp16 __fp164 attribute((aligned(8),vector_size(8)));
void E_4194304_4(__fp16* restrict data0, const __bf16* restrict data1) {
for (int ridx0 = 0; ridx0 < 4194304; ridx0++) {
int alu0 = (ridx0<<2);
__bf16 val0 = data1[alu0+1];
__bf16 val1 = data1[alu0+2];
__bf16 val2 = data1[alu0+3];
__bf16 val3 = data1[alu0];
((__fp164)(data0+alu0)) = (__fp164){(__fp16)(val3),(__fp16)(val0),(__fp16)(val1),(__fp16)(val2)};
}
}
fatal error: error in backend: Do not know how to soft promote this operator's operand!
clang: error: clang frontend command failed with exit code 70 (use -v to see invocation)
Ubuntu clang version 18.1.3 (1ubuntu1)
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin
clang: note: diagnostic msg: Error generating preprocessed source(s) - ignoring input from stdin.
clang: note: diagnostic msg: Error generating preprocessed source(s) - no preprocessable inputs.
error lowering UOps.SINK
tensor operations:
(cast,)
loaded weights in 79.97 ms, 0.03 GB loaded at 0.42 GB/s
Traceback (most recent call last):
File "/home/lorenzo/projects/PDF_Anonymizer/llama3.py", line 250, in
Yea, it's probably the _bf16.
Maybe not related, but bf16 was not working on my (clang --version Ubuntu clang version 18.1.3 (1ubuntu1) Target: x86_64-pc-linux-gnu) And fix it to remove '-nostdlib' flag in ops_clang.py Or provide linkage for -rtlib
Hit the same issue inside an Ubuntu 22.04 container (<stdin>:2:45: error: bf16 is not supported on this target), thanks to @vishfrnds clue, I was able to get past it by upgrading clang >= 18 w/ this:
# Upgrade to latest clang (stock is clang 14.x on Ubuntu 22.04 LTS)
export CLANG_VERSION=21
wget -qO- https://apt.llvm.org/llvm.sh | bash -s -- ${CLANG_VERSION}
# Tell tinygrad to use the new clang
export CC=clang-${CLANG_VERSION}
# Nuke the target arg
sed --in-place -e "s/f'--target={target}-none-unknown-elf', //" .venv/lib/python3*/site-packages/tinygrad/runtime/ops_cpu.py
closing as stale, feel free to open new issues