opus icon indicating copy to clipboard operation
opus copied to clipboard

Decoding produces all zeros

Open Lazarus404 opened this issue 10 months ago • 0 comments

Hi,

I'm trying to create a simple encoder / decoder test to find out what's going wrong in my build. Below is a simplified version;

// NewOpusCodec creates a new OpusCodec
func NewOpusCodec() (*OpusCodec, error) {
	// Create Opus encoder with 48kHz sample rate, 2 channels, and VoIP application
	encoder, err := opus.NewEncoder(48000, 2, opus.AppVoIP)

	// Configure encoder for testing (low bitrate, high complexity)
	encoder.SetBitrate(5000);
	encoder.SetComplexity(10);
	encoder.SetPacketLossPerc(0);
	encoder.SetDTX(false);

	// Create Opus decoder with same parameters
	decoder, err := opus.NewDecoder(48000, 2);

	return &OpusCodec{
		frameSize:      960, // 20ms at 48kHz
		encoder:        encoder,
		decoder:        decoder,
		sequenceNumber: 0,
		timestamp:      0,
		ssrc:          0x12345678, // Example fixed SSRC
	}, nil
}

// EncodeToOpus converts float64 samples to Opus-encoded bytes
func (c *OpusCodec) EncodeToOpus(samples []float64) ([]byte, error) {
	// Convert float64 samples to int16 PCM, interleaving for stereo
	pcm := make([]int16, c.frameSize*2)
	for i, sample := range samples {
		// Ensure sample is in [-1.0, 1.0] range
		if sample > 1.0 {
			sample = 1.0
		} else if sample < -1.0 {
			sample = -1.0
		}
		
		// Convert to int16 and duplicate for stereo
		// Note: We use 32767.0 to avoid overflow
		pcmValue := int16(sample * 32767.0)
		pcm[i*2] = pcmValue     // Left channel
		pcm[i*2+1] = pcmValue   // Right channel
	}

	// Create buffer for Opus data (max size is 1275 bytes per frame)
	opusData := make([]byte, 1275)

	// Encode PCM to Opus
	n, err := c.encoder.Encode(pcm, opusData)

	return opusData[:n], nil
}

// DecodeFromOpus converts Opus-encoded bytes back to float64 samples
func (c *OpusCodec) DecodeFromOpus(data []byte) ([]float64, error) {
	// Create buffer for decoded PCM (exactly frameSize samples)
	pcm := make([]int16, c.frameSize*2) // Double size for stereo

	// Decode Opus to PCM
	n, err := c.decoder.Decode(data, pcm)

	// Convert PCM to float64 samples (average left and right channels)
	samples := make([]float64, n)
	for i := 0; i < n; i++ {
		// Average left and right channels and convert back to float64
		samples[i] = float64(pcm[i*2]+pcm[i*2+1]) / 2.0 / 32767.0
	}

	return samples, nil
}

and test

func TestOpusEncoding(t *testing.T) {
	// Create test samples with known values
	samples := make([]float64, 960) // One frame of samples
	for i := 0; i < len(samples); i++ {
		// Create a simple sine wave with varying amplitude
		samples[i] = math.Sin(float64(i) * 0.1) * (0.5 + 0.5*math.Sin(float64(i)*0.01))
	}

	// Create Opus codec
	codec, err := codecs.NewOpusCodec()
	if err != nil {
		t.Fatalf("Failed to create Opus codec: %v", err)
	}

	// Log first few input samples
	t.Logf("First 10 input samples:")
	for i := 0; i < 10; i++ {
		t.Logf("Sample %d: %.4f", i, samples[i])
		// Calculate expected PCM value for comparison
		pcmValue := int16(samples[i] * 32767.0)
		t.Logf("  Expected PCM: %d", pcmValue)
	}

	// Test Opus encoding/decoding
	t.Run("Opus", func(t *testing.T) {
		// Encode samples to Opus
		encoded, err := codec.EncodeToOpus(samples)
		if err != nil {
			t.Fatalf("Failed to encode samples to Opus: %v", err)
		}

		// Log encoded data details
		t.Logf("\nEncoded Opus data details:")
		t.Logf("Size: %d bytes", len(encoded))
		t.Logf("First 20 bytes (hex):")
		for i := 0; i < min(20, len(encoded)); i++ {
			t.Logf("%02x ", encoded[i])
		}

		// Decode Opus back to samples
		decoded, err := codec.DecodeFromOpus(encoded)
		if err != nil {
			t.Fatalf("Failed to decode Opus: %v", err)
		}

		// Log first few decoded samples
		t.Logf("\nFirst 10 decoded samples:")
		for i := 0; i < 10; i++ {
			t.Logf("Sample %d: %.4f", i, decoded[i])
			// Calculate PCM value for comparison
			pcmValue := int16(decoded[i] * 32767.0)
			t.Logf("  Decoded PCM: %d", pcmValue)
		}

		// Compare samples
		compareDecodedSamples(t, samples, decoded)
	})
}

// compareDecodedSamples compares decoded samples with original and checks quality metrics
func compareDecodedSamples(t *testing.T, original, decoded []float64) {
	// Compare samples
	avgDiff, maxDiff := compareSamples(original, decoded)
	t.Logf("\nSample comparison:")
	t.Logf("Average difference: %.4f", avgDiff)
	t.Logf("Maximum difference: %.4f", maxDiff)

	// Check if differences are within acceptable range
	// Opus is lossy, so we expect some differences
	// But they should be relatively small
	if avgDiff > 0.1 {
		t.Errorf("Average difference %.4f is too high", avgDiff)
	}
	if maxDiff > 0.2 {
		t.Errorf("Maximum difference %.4f is too high", maxDiff)
	}

	// Check if we got the same number of samples back
	if len(decoded) != len(original) {
		t.Errorf("Expected %d samples, got %d", len(original), len(decoded))
	}

	// Check if the range of values is preserved
	// Find min/max of original and decoded
	origMin, origMax := original[0], original[0]
	decMin, decMax := decoded[0], decoded[0]
	for i := 1; i < len(original); i++ {
		if original[i] < origMin {
			origMin = original[i]
		}
		if original[i] > origMax {
			origMax = original[i]
		}
		if decoded[i] < decMin {
			decMin = decoded[i]
		}
		if decoded[i] > decMax {
			decMax = decoded[i]
		}
	}

	t.Logf("\nValue ranges:")
	t.Logf("Original: min=%.4f, max=%.4f", origMin, origMax)
	t.Logf("Decoded:  min=%.4f, max=%.4f", decMin, decMax)

	// Check if the range is preserved within reason
	// The decoded range should be at least 80% of the original range
	origRange := origMax - origMin
	decRange := decMax - decMin
	if decRange < origRange*0.8 {
		t.Errorf("Decoded range %.4f is too small compared to original range %.4f", decRange, origRange)
	}
}

// compareSamples compares two slices of float64 samples and returns the average and maximum differences
func compareSamples(original, decoded []float64) (avgDiff, maxDiff float64) {
	if len(original) != len(decoded) {
		return 0, 0
	}

	var sumDiff float64
	maxDiff = 0

	for i := 0; i < len(original); i++ {
		diff := math.Abs(original[i] - decoded[i])
		sumDiff += diff
		if diff > maxDiff {
			maxDiff = diff
		}
	}

	avgDiff = sumDiff / float64(len(original))
	return avgDiff, maxDiff
}

Now, when run, the decoded audio are always zeros

=== RUN   TestOpusEncoding
    opus_test.go:46: First 10 input samples:
    opus_test.go:48: Sample 0: 0.0000
    opus_test.go:51:   Expected PCM: 0
    opus_test.go:48: Sample 1: 0.0504
    opus_test.go:51:   Expected PCM: 1651
    opus_test.go:48: Sample 2: 0.1013
    opus_test.go:51:   Expected PCM: 3319
    opus_test.go:48: Sample 3: 0.1522
    opus_test.go:51:   Expected PCM: 4986
    opus_test.go:48: Sample 4: 0.2025
    opus_test.go:51:   Expected PCM: 6635
    opus_test.go:48: Sample 5: 0.2517
    opus_test.go:51:   Expected PCM: 8247
    opus_test.go:48: Sample 6: 0.2993
    opus_test.go:51:   Expected PCM: 9805
    opus_test.go:48: Sample 7: 0.3446
    opus_test.go:51:   Expected PCM: 11292
    opus_test.go:48: Sample 8: 0.3873
    opus_test.go:51:   Expected PCM: 12692
    opus_test.go:48: Sample 9: 0.4269
    opus_test.go:51:   Expected PCM: 13987
=== RUN   TestOpusEncoding/Opus

First 10 PCM values before encoding:
PCM[0]: left=0, right=0
PCM[1]: left=1651, right=1651
PCM[2]: left=3319, right=3319
PCM[3]: left=4986, right=4986
PCM[4]: left=6635, right=6635
PCM[5]: left=8247, right=8247
PCM[6]: left=9805, right=9805
PCM[7]: left=11292, right=11292
PCM[8]: left=12692, right=12692
PCM[9]: left=13987, right=13987
Encoded 11 bytes of Opus data
First 20 bytes (hex): 08 85 ad 47 02 ba 96 42 11 58 80    opus_test.go:63:
        Encoded Opus data details:
    opus_test.go:64: Size: 11 bytes
    opus_test.go:65: First 20 bytes (hex):
    opus_test.go:67: 08
    opus_test.go:67: 85
    opus_test.go:67: ad
    opus_test.go:67: 47
    opus_test.go:67: 02
    opus_test.go:67: ba
    opus_test.go:67: 96
    opus_test.go:67: 42
    opus_test.go:67: 11
    opus_test.go:67: 58
    opus_test.go:67: 80

Decoding 11 bytes of Opus data
First 20 bytes (hex): 08 85 ad 47 02 ba 96 42 11 58 80
First 10 PCM values after decoding:
PCM[0]: left=0, right=0
PCM[1]: left=0, right=0
PCM[2]: left=0, right=0
PCM[3]: left=0, right=0
PCM[4]: left=0, right=0
PCM[5]: left=0, right=0
PCM[6]: left=0, right=0
PCM[7]: left=0, right=0
PCM[8]: left=0, right=0
PCM[9]: left=0, right=0    opus_test.go:77:
        First 10 decoded samples:
    opus_test.go:79: Sample 0: 0.0000
    opus_test.go:82:   Decoded PCM: 0
    opus_test.go:79: Sample 1: 0.0000
    opus_test.go:82:   Decoded PCM: 0
    opus_test.go:79: Sample 2: 0.0000
    opus_test.go:82:   Decoded PCM: 0
    opus_test.go:79: Sample 3: 0.0000
    opus_test.go:82:   Decoded PCM: 0
    opus_test.go:79: Sample 4: 0.0000
    opus_test.go:82:   Decoded PCM: 0
    opus_test.go:79: Sample 5: 0.0000
    opus_test.go:82:   Decoded PCM: 0
    opus_test.go:79: Sample 6: 0.0000
    opus_test.go:82:   Decoded PCM: 0
    opus_test.go:79: Sample 7: 0.0000
    opus_test.go:82:   Decoded PCM: 0
    opus_test.go:79: Sample 8: 0.0000
    opus_test.go:82:   Decoded PCM: 0
    opus_test.go:79: Sample 9: 0.0000
    opus_test.go:82:   Decoded PCM: 0
    opus_test.go:159:
        Sample comparison:
    opus_test.go:160: Average difference: 0.3608
    opus_test.go:161: Maximum difference: 1.0328
    opus_test.go:167: Average difference 0.3608 is too high
    opus_test.go:170: Maximum difference 1.0328 is too high
    opus_test.go:197:
        Value ranges:
    opus_test.go:198: Original: min=-0.9939, max=0.9936
    opus_test.go:199: Decoded:  min=-0.4706, max=0.4115
    opus_test.go:206: Decoded range 0.8821 is too small compared to original range 1.9875

I cannot, for the life of me, figure out what I'm doing wrong.

Any help appreciated.

Lazarus404 avatar Apr 02 '25 09:04 Lazarus404