hertz icon indicating copy to clipboard operation
hertz copied to clipboard

SSE 中 扫描解析 stream的 EOL 逻辑 基于 bytes.IndexByte() 实现 是否最佳

Open alimy opened this issue 10 months ago • 2 comments

Issue(#1373 #1377)

SSE 中 扫描解析 stream的 EOL 逻辑 基于 bytes.IndexByte() 实现 在 本场景 中 是否最佳?

// file: pkg/protocol/sse/utils.go

// https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream
// end-of-line   = ( cr lf / cr / lf )
func scanEOL(data []byte, atEOF bool) (advance int, token []byte, err error) {
	if atEOF && len(data) == 0 {
		return 0, nil, nil
	}
	i := bytes.IndexByte(data, '\r')
	j := bytes.IndexByte(data, '\n')
	if i >= 0 {
		if i+1 == j { // \r\n
			return i + 2, data[0:i], nil
		}
		if j >= 0 { // choose the nearer \r or \n as EOL
			if i < j {
				return i + 1, data[0:i], nil // \r
			}
			return j + 1, data[0:j], nil // \n
		}
		// if ends with '\r', we need to check the next char is NOT '\n' as per spec
		// this may cause unexpected blocks on reading more data.
		if i < len(data)-1 || atEOF {
			return i + 1, data[0:i], nil
		}
	} else if j >= 0 {
		return j + 1, data[0:j], nil
	}
	if atEOF {
		return len(data), data, nil
	}
	return 0, nil, nil // more data
}

// https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream
// end-of-line   = ( cr lf / cr / lf )
func scanEOL2(data []byte, atEOF bool) (advance int, token []byte, err error) {
	size := len(data)
	if atEOF && size == 0 {
		return
	}
	for i, c := range data {
		switch c {
		case '\r': // \r OR \r\n AS EOL
			if i+1 < size && data[i+1] == '\n' {
				advance, token = i+2, data[:i]
				return
			}
			// if ends with '\r', we need to check the next char is NOT '\n' as per spec
			// this may cause unexpected blocks on reading more data.
			if i+1 < size || atEOF {
				advance, token = i+1, data[:i]
				return
			}
		case '\n': // \n AS EOL
			advance, token = i+1, data[:i]
			return
		default:
			// nothing
		}
	}
	if atEOF {
		advance, token = size, data
	}
	return // more data
}
// file: pkg/protocol/sse/utils_test.go

func BenchmarkScanEOL(b *testing.B) {
        tests := []struct {
		data    string
		atEOF   bool
		advance int
		token   string
	}{
		{"", true, 0, ""},
		{"", false, 0, ""},

		{"hello long long long long  long long long long  long long long long\r\nworld", false, 69, "hello long long long long  long long long long  long long long long"},
		{"hello long long long long  long long long long  long long long long\rworld", false, 68, "hello long long long long  long long long long  long long long long"},
		{"hello long long long long  long long long long  long long long long\nworld", false, 68, "hello long long long long  long long long long  long long long long"},
		{"hello long long long long  long long long long  long long long long world", false, 0, ""},
		{"hello long long long long  long long long long  long long long long world", true, 73, "hello long long long long  long long long long  long long long long world"},

		{"hello\r\nworld", false, 7, "hello"},
		{"hello\rworld", false, 6, "hello"},
		{"hello\nworld", false, 6, "hello"},
		{"hello world", false, 0, ""},
		{"hello world", true, 11, "hello world"},
		{"\r", false, 0, ""},
		{"hello\r", false, 0, ""},
		{"hello\r", true, 6, "hello"},
		{"\n", false, 1, ""},
		{"\r\nhello", false, 2, ""},
		{"\r\n", false, 2, ""},
	}

	for i := 0; i < b.N; i++ {
		for _, tc := range tests {
			advance, token, _ := scanEOL([]byte(tc.data), tc.atEOF)
			if advance != tc.advance || string(token) != tc.token {
				b.Fatalf("scanLines(data=%q, atEOF=%v) returns (%d, %q) expect (%d, %q)",
					tc.data, tc.atEOF, advance, string(token), tc.advance, tc.token)
			}
		}
	}
}

func BenchmarkScanEOL2(b *testing.B) {
        tests := []struct {
		data    string
		atEOF   bool
		advance int
		token   string
	}{
		{"", true, 0, ""},
		{"", false, 0, ""},

		{"hello long long long long  long long long long  long long long long\r\nworld", false, 69, "hello long long long long  long long long long  long long long long"},
		{"hello long long long long  long long long long  long long long long\rworld", false, 68, "hello long long long long  long long long long  long long long long"},
		{"hello long long long long  long long long long  long long long long\nworld", false, 68, "hello long long long long  long long long long  long long long long"},
		{"hello long long long long  long long long long  long long long long world", false, 0, ""},
		{"hello long long long long  long long long long  long long long long world", true, 73, "hello long long long long  long long long long  long long long long world"},

		{"hello\r\nworld", false, 7, "hello"},
		{"hello\rworld", false, 6, "hello"},
		{"hello\nworld", false, 6, "hello"},
		{"hello world", false, 0, ""},
		{"hello world", true, 11, "hello world"},
		{"\r", false, 0, ""},
		{"hello\r", false, 0, ""},
		{"hello\r", true, 6, "hello"},
		{"\n", false, 1, ""},
		{"\r\nhello", false, 2, ""},
		{"\r\n", false, 2, ""},
	}

	for i := 0; i < b.N; i++ {
		for _, tc := range tests {
			advance, token, _ := scanEOL2([]byte(tc.data), tc.atEOF)
			if advance != tc.advance || string(token) != tc.token {
				b.Fatalf("scanLines(data=%q, atEOF=%v) returns (%d, %q) expect (%d, %q)",
					tc.data, tc.atEOF, advance, string(token), tc.advance, tc.token)
			}
		}
	}
}

benchmark

> go test -bench=ScanEOL* -benchmem 
goos: linux
goarch: amd64
pkg: github.com/cloudwego/hertz/pkg/protocol/sse
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
BenchmarkScanEOL-8       1581817               681.9 ns/op           400 B/op          5 allocs/op
BenchmarkScanEOL2-8      2307902               515.6 ns/op             0 B/op          0 allocs/op
PASS
ok      github.com/cloudwego/hertz/pkg/protocol/sse     3.607s

alimy avatar Jun 11 '25 16:06 alimy

我是好奇才benchmark了一下,最开始我也以为 既然bytes.IndexByte() 是有指令级优化的,应该会快很多,结果是两种逻辑差不多快; 如果注释掉 这段稍微长一点的文本用例

		{"hello long long long long  long long long long  long long long long\r\nworld", false, 69, "hello long long long long  long long long long  long long long long"},
		{"hello long long long long  long long long long  long long long long\rworld", false, 68, "hello long long long long  long long long long  long long long long"},
		{"hello long long long long  long long long long  long long long long\nworld", false, 68, "hello long long long long  long long long long  long long long long"},
		{"hello long long long long  long long long long  long long long long world", false, 0, ""},
		{"hello long long long long  long long long long  long long long long world", true, 73, "hello long long long long  long long long long  long long long long world"},

结果是

> go test -bench=ScanEOL* -benchmem 
goos: linux
goarch: amd64
pkg: github.com/cloudwego/hertz/pkg/protocol/sse
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
BenchmarkScanEOL-8       4079202               278.3 ns/op             0 B/op          0 allocs/op
BenchmarkScanEOL2-8      6956415               156.8 ns/op             0 B/op          0 allocs/op
PASS
ok      github.com/cloudwego/hertz/pkg/protocol/sse     2.875s

go version

> go version
go version go1.24.3 linux/amd64

scanEOL2() 看起来是稍微快一点, 但就是不知道逻辑是否完全符合设计,或者说需要更多的测试用例来检测一下;同时我也比较好奇, 如果 data 文本很大时 比如几kB,结果会怎样? 有时间的和感兴趣的可以在本机上测试一下;

(本Issue只是一时好奇探索了一下,暂时也没太多时间探索,后面闲了继续看看~)

alimy avatar Jun 11 '25 17:06 alimy

感谢反馈。

xiaost avatar Jun 12 '25 03:06 xiaost