ispc icon indicating copy to clipboard operation
ispc copied to clipboard

Differences in behavior with explicit vector and loops

Open JeffRous opened this issue 6 years ago • 0 comments

struct FVector4
{
	float V[4]; 
};

inline uniform FVector4 Mul3(const uniform FVector4 &A, const uniform FVector4 &B)
{
    uniform float<4> S0 = { A.V[0], A.V[1], A.V[2], A.V[3] };
    uniform float<4> S1 = { B.V[0], B.V[1], B.V[2], B.V[3] };
    uniform float<4> D =  S0 * S1;

    uniform FVector4 Result;
    Result.V[0] = D[0];
    Result.V[1] = D[1];
    Result.V[2] = D[2];
    Result.V[3] = D[3];
    return Result;
}

inline uniform FVector4 Mul4(const uniform FVector4 &A, const uniform FVector4 &B)
{
    uniform float<4> S0 = { A.V[0], A.V[1], A.V[2], A.V[3] };
    uniform float<4> S1 = { B.V[0], B.V[1], B.V[2], B.V[3] };
    uniform float<4> D =  S0 * S1;

    uniform FVector4 Result;
    for(uniform int i = 0; i < 4; i++)
    {
        Result.V[i] = D[i];
    }

    return Result;
}

unmasked void TestUniform3(uniform FVector4 Dst[], const uniform FVector4 Src0[], const uniform FVector4 Src1[], const uniform FVector4 Src2[], const uniform int Count)
{
	for(uniform int i = 0; i < Count; i++)
	{
        Dst[i] = Mul3(Src0[i], Src1[i]);
	}
}

unmasked void TestUniform4(uniform FVector4 Dst[], const uniform FVector4 Src0[], const uniform FVector4 Src1[], const uniform FVector4 Src2[], const uniform int Count)
{
	for(uniform int i = 0; i < Count; i++)
	{
        Dst[i] = Mul4(Src0[i], Src1[i]);
	}
}

Mul3 and Mul4 should be functionally equivalent. However, mul4 is generating extra mov instructions that shouldn't be needed.

JeffRous avatar Jun 05 '19 23:06 JeffRous