Skip to content

Commit d7e2a5b

Browse files
committed
Draft neon support
1 parent 58ce266 commit d7e2a5b

File tree

3 files changed

+46
-5
lines changed

3 files changed

+46
-5
lines changed

pkg/search/asm_neon.go

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
//go:build ignore
2+
3+
package main
4+
5+
import (
6+
. "github.com/mmcloughlin/avo/build"
7+
. "github.com/mmcloughlin/avo/operand"
8+
"github.com/mmcloughlin/avo/reg"
9+
)
10+
11+
// main generates assembly code for NEON on ARM aarch64.
12+
func main() {
13+
TEXT("findInChunk", NOSPLIT, "func(needle []byte, haystack []byte) int64")
14+
Doc("findInChunk is only generated for testing.")
15+
//hptr := Load(Param("haystack").Base(), GP64())
16+
needleLen := Load(Param("needle").Len(), GP64()); DECQ(needleLen)
17+
needle := Load(Param("needle").Base(), GP64())
18+
_, _ = inlineSplat(needle, needleLen)
19+
20+
//offset := inlineFindInChunk("test", f, l, hptr, needle, needleLen)
21+
offset := GP64()
22+
23+
Store(offset, ReturnIndex(0))
24+
RET()
25+
}
26+
27+
// inlineSplat fills one 128bit register with repeated first neelde char and
28+
// another with repeated last needle char.
29+
func inlineSplat(needle0, needleLen reg.Register) (reg.VecVirtual, reg.VecVirtual) {
30+
Comment("create vector filled with first and last character")
31+
f := YMM()
32+
l := YMM()
33+
34+
VDUP
35+
36+
needle1 := GP64(); MOVQ(needle0, needle1);
37+
ADDQ(needleLen, needle1)
38+
VPBROADCASTB(Mem{Base: needle0}, f)
39+
VPBROADCASTB(Mem{Base: needle1}, l)
40+
41+
return f, l
42+
}

pkg/search/bytes_neon_arm64.go

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ import (
44
"bytes"
55
)
66

7+
// findInChunk is only generated for testing.
8+
func findInChunk(needle []byte, haystack []byte) int64
9+
710
// indexNeon returns the first position the needle is in the haystack.
811
func indexNeon (haystack []byte, needle []byte) int64 {
912
// TODO: port to ARM64 Neon. This file will be generated.

pkg/search/bytes_test.go

+1-5
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ import (
99
"testing"
1010

1111
"github.com/stretchr/testify/require"
12-
"golang.org/x/sys/cpu"
1312
)
1413

1514
//go:generate go run asm_avx2.go -out bytes_avx2_amd64.s -stubs bytes_avx2_amd64.go
15+
//go:generate go run asm_neon.go -out bytes_neon_arm64.s -stubs bytes_neon_arm64.go
1616

1717
func TestSimpleIndex(t *testing.T) {
1818
for _, tt := range []struct {
@@ -129,10 +129,6 @@ func TestMask(t *testing.T) {
129129
}
130130
}
131131

132-
func TestArm(t * testing.T) {
133-
require.True(t, cpu.ARM64.HasASIMD)
134-
}
135-
136132
func BenchmarkIndexSmall(b *testing.B) {
137133
needle := []byte("goldner7875")
138134
haystack, err := loadHaystack("small.log")

0 commit comments

Comments
 (0)