Halide
Halide copied to clipboard
Inlined function with update stage won't vectorize when parent function is scheduled with compute_with
In the following example:
const int f_size = 128;
const int g_size = 256;
Buffer<int> f_im(f_size, f_size, 5), g_im(g_size, g_size);
Var x("x"), y("y"), c("c"), xi("xi"), yi("yi"), yii("yii"), yo("yo");
Func f("f"), g("g"), h("h"), input("input");
input(x, y) = x;
f(x, y, c) = c * input(x, y);
h(x, y, c) = f(x, y, c);
Func inl("inl");
inl(x, y) = f(x / 2, y / 2, 0);
inl(x, y) += f(x / 2, y / 2, 2);
g(x, y) = inl(x, y);
g
.split(y, yo, y, 32 * 2, TailStrategy::RoundUp)
.split(y, y, yi, 2, TailStrategy::RoundUp)
.vectorize(x, 4, TailStrategy::GuardWithIf)
.compute_with(h, y, LoopAlignStrategy::AlignEnd)
//.compute_root()
;
h
.reorder(x, c, y)
.split(y, yo, y, 32, TailStrategy::RoundUp)
.split(y, y, yi, 1, TailStrategy::RoundUp)
.vectorize(x, 4, TailStrategy::GuardWithIf)
.compute_root();
g.bound(y, 0, g_size);
h.bound(y, 0, f_size).bound(c, 0, 5);
Pipeline p({h, g});
p.compile_jit();
Here function inl is inlined into g, which is scheduled to be computed with f. g is vectorized across x, but in the final IR inl serialized and compute in the loop:
for (g.s0.x.x, 0, t266) {
allocate inl[int32 * 4]
let inl.s0.x.loop_extent.s = (g.extent.0 - (g.s0.x.x*4))
produce inl {
let t279 = max(min(inl.s0.x.loop_extent.s, 4), 0)
let t280 = (g.s0.x.x*2)
for (g.s0.x.v0, 0, t279) {
inl[g.s0.x.v0] = 0
inl[g.s0.x.v0] = (inl[g.s0.x.v0] + ((((g.min.0 + g.s0.x.v0)/2) + t280)*2))
}
}
consume inl {
g[ramp(((g.s0.x.x*4) + ((g.stride.1*t278) + t264)), 1, 4)] = inl[ramp(0, 1, 4)]
}
free inl
}
However, if we schedule g as compute_root, inl will be properly vectorized:
for (g.s0.x.x, 0, t50) {
allocate inl[int32 * 4]
produce inl {
inl[ramp(0, 1, 4)] = x4(0)
inl[ramp(0, 1, 4)] = interleave_vectors((ramp((((g.s0.x.x*2) + t46)*2), 2, 2) + inl[ramp(0, 2, 2)]), (ramp((((g.s0.x.x*2) + t47)*2), 2, 2) + inl[ramp(1, 2, 2)]))
}
consume inl {
g[ramp(((g.s0.x.x*4) + ((g.stride.1*t53) + t48)), 1, 4)] = inl[ramp(0, 1, 4)]
}
free inl
}