ispc
ispc copied to clipboard
Differences in behavior with explicit vector and loops
struct FVector4
{
float V[4];
};
inline uniform FVector4 Mul3(const uniform FVector4 &A, const uniform FVector4 &B)
{
uniform float<4> S0 = { A.V[0], A.V[1], A.V[2], A.V[3] };
uniform float<4> S1 = { B.V[0], B.V[1], B.V[2], B.V[3] };
uniform float<4> D = S0 * S1;
uniform FVector4 Result;
Result.V[0] = D[0];
Result.V[1] = D[1];
Result.V[2] = D[2];
Result.V[3] = D[3];
return Result;
}
inline uniform FVector4 Mul4(const uniform FVector4 &A, const uniform FVector4 &B)
{
uniform float<4> S0 = { A.V[0], A.V[1], A.V[2], A.V[3] };
uniform float<4> S1 = { B.V[0], B.V[1], B.V[2], B.V[3] };
uniform float<4> D = S0 * S1;
uniform FVector4 Result;
for(uniform int i = 0; i < 4; i++)
{
Result.V[i] = D[i];
}
return Result;
}
unmasked void TestUniform3(uniform FVector4 Dst[], const uniform FVector4 Src0[], const uniform FVector4 Src1[], const uniform FVector4 Src2[], const uniform int Count)
{
for(uniform int i = 0; i < Count; i++)
{
Dst[i] = Mul3(Src0[i], Src1[i]);
}
}
unmasked void TestUniform4(uniform FVector4 Dst[], const uniform FVector4 Src0[], const uniform FVector4 Src1[], const uniform FVector4 Src2[], const uniform int Count)
{
for(uniform int i = 0; i < Count; i++)
{
Dst[i] = Mul4(Src0[i], Src1[i]);
}
}
Mul3 and Mul4 should be functionally equivalent. However, mul4 is generating extra mov instructions that shouldn't be needed.