mirror of
https://github.com/amnezia-vpn/amneziawg-go.git
synced 2025-08-03 02:02:50 +02:00
Use parallel summation with native byte order per RFC 1071. add-with-carry operation is used to add 4 words per operation. Byteswap is performed before and after checksumming for compatibility with old `checksumNoFold()`. With this we get a 30-80% speedup in `checksum()` depending on packet sizes. Add unit tests with comparison to a per-word implementation. **Intel(R) Xeon(R) Silver 4210R CPU @ 2.40GHz** | Size | OldTime | NewTime | Speedup | |------|---------|---------|----------| | 64 | 12.64 | 9.183 | 1.376456 | | 128 | 18.52 | 12.72 | 1.455975 | | 256 | 31.01 | 18.13 | 1.710425 | | 512 | 54.46 | 29.03 | 1.87599 | | 1024 | 102 | 52.2 | 1.954023 | | 1500 | 146.8 | 81.36 | 1.804326 | | 2048 | 196.9 | 102.5 | 1.920976 | | 4096 | 389.8 | 200.8 | 1.941235 | | 8192 | 767.3 | 413.3 | 1.856521 | | 9000 | 851.7 | 448.8 | 1.897727 | | 9001 | 854.8 | 451.9 | 1.891569 | **AMD EPYC 7352 24-Core Processor** | Size | OldTime | NewTime | Speedup | |------|---------|---------|----------| | 64 | 9.159 | 6.949 | 1.318031 | | 128 | 13.59 | 10.59 | 1.283286 | | 256 | 22.37 | 14.91 | 1.500335 | | 512 | 41.42 | 24.22 | 1.710157 | | 1024 | 81.59 | 45.05 | 1.811099 | | 1500 | 120.4 | 68.35 | 1.761522 | | 2048 | 162.8 | 90.14 | 1.806079 | | 4096 | 321.4 | 180.3 | 1.782585 | | 8192 | 650.4 | 360.8 | 1.802661 | | 9000 | 706.3 | 398.1 | 1.774177 | | 9001 | 712.4 | 398.2 | 1.789051 | Signed-off-by: Tu Dinh Ngoc <dinhngoc.tu@irit.fr> [Jason: simplified and cleaned up unit tests] Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
98 lines
2.1 KiB
Go
98 lines
2.1 KiB
Go
package tun
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"fmt"
|
|
"math/rand"
|
|
"testing"
|
|
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
func checksumRef(b []byte, initial uint16) uint16 {
|
|
ac := uint64(initial)
|
|
|
|
for len(b) >= 2 {
|
|
ac += uint64(binary.BigEndian.Uint16(b))
|
|
b = b[2:]
|
|
}
|
|
if len(b) == 1 {
|
|
ac += uint64(b[0]) << 8
|
|
}
|
|
|
|
for (ac >> 16) > 0 {
|
|
ac = (ac >> 16) + (ac & 0xffff)
|
|
}
|
|
return uint16(ac)
|
|
}
|
|
|
|
func pseudoHeaderChecksumRefNoFold(protocol uint8, srcAddr, dstAddr []byte, totalLen uint16) uint16 {
|
|
sum := checksumRef(srcAddr, 0)
|
|
sum = checksumRef(dstAddr, sum)
|
|
sum = checksumRef([]byte{0, protocol}, sum)
|
|
tmp := make([]byte, 2)
|
|
binary.BigEndian.PutUint16(tmp, totalLen)
|
|
return checksumRef(tmp, sum)
|
|
}
|
|
|
|
func TestChecksum(t *testing.T) {
|
|
for length := 0; length <= 9001; length++ {
|
|
buf := make([]byte, length)
|
|
rng := rand.New(rand.NewSource(1))
|
|
rng.Read(buf)
|
|
csum := checksum(buf, 0x1234)
|
|
csumRef := checksumRef(buf, 0x1234)
|
|
if csum != csumRef {
|
|
t.Error("Expected checksum", csumRef, "got", csum)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestPseudoHeaderChecksum(t *testing.T) {
|
|
for _, addrLen := range []int{4, 16} {
|
|
for length := 0; length <= 9001; length++ {
|
|
srcAddr := make([]byte, addrLen)
|
|
dstAddr := make([]byte, addrLen)
|
|
buf := make([]byte, length)
|
|
rng := rand.New(rand.NewSource(1))
|
|
rng.Read(srcAddr)
|
|
rng.Read(dstAddr)
|
|
rng.Read(buf)
|
|
phSum := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, srcAddr, dstAddr, uint16(length))
|
|
csum := checksum(buf, phSum)
|
|
phSumRef := pseudoHeaderChecksumRefNoFold(unix.IPPROTO_TCP, srcAddr, dstAddr, uint16(length))
|
|
csumRef := checksumRef(buf, phSumRef)
|
|
if csum != csumRef {
|
|
t.Error("Expected checksumRef", csumRef, "got", csum)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkChecksum(b *testing.B) {
|
|
lengths := []int{
|
|
64,
|
|
128,
|
|
256,
|
|
512,
|
|
1024,
|
|
1500,
|
|
2048,
|
|
4096,
|
|
8192,
|
|
9000,
|
|
9001,
|
|
}
|
|
|
|
for _, length := range lengths {
|
|
b.Run(fmt.Sprintf("%d", length), func(b *testing.B) {
|
|
buf := make([]byte, length)
|
|
rng := rand.New(rand.NewSource(1))
|
|
rng.Read(buf)
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
checksum(buf, 0)
|
|
}
|
|
})
|
|
}
|
|
}
|