Files
lo/exp/simd/math_sse.go
T
Samuel Berthe 035f1b358a Experiments: adding SIMD helpers (#801)
* feat(exp,simd): adding SumAxB helpers

* feat(exp,simd): adding MeanAxB and ClampAxB helpers

* feat(exp,simd): adding MinAxB and MaxAxB helpers

* refactor(exp,simd): group perf helper category + architecture

* feat(exp,simd): adding ContainsAxB helpers

* perf(exp,simd): cast to unsafe slice once

* feat(exp,simd): call the right SIMD helper based on local architecture

* chore: internal dependency linking

* Update exp/simd/math.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* style: fix linter

* style: fix linter

* chore: enable simd in makefile

* chore(ci): add simd package to test runs

* chore(ci): add simd package to test runs only for go 1.26

* fix(simd): fix overflow

* fix(simd): fix overflow and apply the same behavior than lo.Mean

* doc(exp,simd): adding initial doc

* refactor(simd): move intersect_avx2 and intersect_sse code into intersect_avx512

* fix(simd): call SSE fallback instead of lo.Sum for default helpers

* feat(simd): cache simd features on package init to avoid repeated checks

* perf(exp,simd): precompute length + improve code quality

* perf(exp,simd): faster iteration for min/max value

* test(exp,simd): adding benchmarks

* test(exp,simd): adding benchmarks results

* test(exp,simd): adding benchmarks results

* doc(exp,simd): adding warning for overflows in SIMD operations

* feat(exp,simd): adding more dispatch helpers

* feat(exp,simd): adding SumBy variants

* feat(exp,simd): adding MeanBy variants

* fix(exp,simd): faster clamp

* 💄

* doc(exp,simd): adding SumBy + MeanBy

* fix(exp,simd): faster SIMD operations

* chore(ci): enable the benchmarks temporary

* chore(ci): display cpu architecture before running tests

* chore(ci): github actions are hidding some useful stuffs

* chore(ci): no SIMD VM available at Github during the weekend ???

* test(exp,simd): larger epsilon

* oops

* perf(exp,simd): faster iterations

* doc(exp,simd): report last version of benchmarks

* 💄

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-02-21 19:19:36 +01:00

1850 lines
44 KiB
Go

//go:build go1.26 && goexperiment.simd && amd64
package simd
import (
"simd/archsimd"
"unsafe"
"github.com/samber/lo"
)
// SSE (128-bit) SIMD sum functions - 16/8/4/2 lanes
// SumInt8x16 sums a slice of int8 using SSE SIMD (Int8x16, 16 lanes).
// Overflow: The accumulation is performed using int8, which can overflow for large collections.
// If the sum exceeds the int8 range (-128 to 127), the result will wrap around silently.
// For collections that may overflow, consider using a wider type or handle overflow detection externally.
func SumInt8x16[T ~int8](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes16
base := unsafeSliceInt8(collection, length)
var acc archsimd.Int8x16
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt8x16Slice(base[i : i+lanes])
acc = acc.Add(v)
}
var buf [lanes]int8
acc.Store(&buf)
var sum T
for k := uint(0); k < lanes; k++ {
sum += T(buf[k])
}
for ; i < length; i++ {
sum += collection[i]
}
return sum
}
// SumInt16x8 sums a slice of int16 using SSE SIMD (Int16x8, 8 lanes).
// Overflow: The accumulation is performed using int16, which can overflow for large collections.
// If the sum exceeds the int16 range (-32768 to 32767), the result will wrap around silently.
// For collections that may overflow, consider using a wider type or handle overflow detection externally.
func SumInt16x8[T ~int16](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes8
base := unsafeSliceInt16(collection, length)
var acc archsimd.Int16x8
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt16x8Slice(base[i : i+lanes])
acc = acc.Add(v)
}
var buf [lanes]int16
acc.Store(&buf)
var sum T
for k := uint(0); k < lanes; k++ {
sum += T(buf[k])
}
for ; i < length; i++ {
sum += collection[i]
}
return sum
}
// SumInt32x4 sums a slice of int32 using SSE SIMD (Int32x4, 4 lanes).
// Overflow: The accumulation is performed using int32, which can overflow for very large collections.
// If the sum exceeds the int32 range (-2147483648 to 2147483647), the result will wrap around silently.
// For collections that may overflow, consider using SumInt64x2 or handle overflow detection externally.
func SumInt32x4[T ~int32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes4
base := unsafeSliceInt32(collection, length)
var acc archsimd.Int32x4
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt32x4Slice(base[i : i+lanes])
acc = acc.Add(v)
}
var buf [lanes]int32
acc.Store(&buf)
var sum T
for k := uint(0); k < lanes; k++ {
sum += T(buf[k])
}
for ; i < length; i++ {
sum += collection[i]
}
return sum
}
// SumInt64x2 sums a slice of int64 using SSE SIMD (Int64x2, 2 lanes).
// Overflow: The accumulation is performed using int64, which can overflow for extremely large collections.
// If the sum exceeds the int64 range, the result will wrap around silently.
// For collections that may overflow, handle overflow detection externally (e.g., using big.Int).
func SumInt64x2[T ~int64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes2
base := unsafeSliceInt64(collection, length)
var acc archsimd.Int64x2
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt64x2Slice(base[i : i+lanes])
acc = acc.Add(v)
}
var buf [lanes]int64
acc.Store(&buf)
var sum T
for k := uint(0); k < lanes; k++ {
sum += T(buf[k])
}
for ; i < length; i++ {
sum += collection[i]
}
return sum
}
// SumUint8x16 sums a slice of uint8 using SSE SIMD (Uint8x16, 16 lanes).
// Overflow: The accumulation is performed using uint8, which can overflow for large collections.
// If the sum exceeds the uint8 range (0 to 255), the result will wrap around silently.
// For collections that may overflow, consider using a wider type or handle overflow detection externally.
func SumUint8x16[T ~uint8](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes16
base := unsafeSliceUint8(collection, length)
var acc archsimd.Uint8x16
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint8x16Slice(base[i : i+lanes])
acc = acc.Add(v)
}
var buf [lanes]uint8
acc.Store(&buf)
var sum T
for k := uint(0); k < lanes; k++ {
sum += T(buf[k])
}
for ; i < length; i++ {
sum += collection[i]
}
return sum
}
// SumUint16x8 sums a slice of uint16 using SSE SIMD (Uint16x8, 8 lanes).
// Overflow: The accumulation is performed using uint16, which can overflow for large collections.
// If the sum exceeds the uint16 range (0 to 65535), the result will wrap around silently.
// For collections that may overflow, consider using a wider type or handle overflow detection externally.
func SumUint16x8[T ~uint16](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes8
base := unsafeSliceUint16(collection, length)
var acc archsimd.Uint16x8
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint16x8Slice(base[i : i+lanes])
acc = acc.Add(v)
}
var buf [lanes]uint16
acc.Store(&buf)
var sum T
for k := uint(0); k < lanes; k++ {
sum += T(buf[k])
}
for ; i < length; i++ {
sum += collection[i]
}
return sum
}
// SumUint32x4 sums a slice of uint32 using SSE SIMD (Uint32x4, 4 lanes).
// Overflow: The accumulation is performed using uint32, which can overflow for very large collections.
// If the sum exceeds the uint32 range (0 to 4294967295), the result will wrap around silently.
// For collections that may overflow, consider using SumUint64x2 or handle overflow detection externally.
func SumUint32x4[T ~uint32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes4
base := unsafeSliceUint32(collection, length)
var acc archsimd.Uint32x4
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint32x4Slice(base[i : i+lanes])
acc = acc.Add(v)
}
var buf [lanes]uint32
acc.Store(&buf)
var sum T
for k := uint(0); k < lanes; k++ {
sum += T(buf[k])
}
for ; i < length; i++ {
sum += collection[i]
}
return sum
}
// SumUint64x2 sums a slice of uint64 using SSE SIMD (Uint64x2, 2 lanes).
// Overflow: The accumulation is performed using uint64, which can overflow for extremely large collections.
// If the sum exceeds the uint64 range, the result will wrap around silently.
// For collections that may overflow, handle overflow detection externally (e.g., using big.Int).
func SumUint64x2[T ~uint64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes2
base := unsafeSliceUint64(collection, length)
var acc archsimd.Uint64x2
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint64x2Slice(base[i : i+lanes])
acc = acc.Add(v)
}
var buf [lanes]uint64
acc.Store(&buf)
var sum T
for k := uint(0); k < lanes; k++ {
sum += T(buf[k])
}
for ; i < length; i++ {
sum += collection[i]
}
return sum
}
// SumFloat32x4 sums a slice of float32 using SSE SIMD (Float32x4, 4 lanes).
// Overflow: The accumulation is performed using float32. Overflow will result in +/-Inf rather than wrapping.
// For collections requiring high precision or large sums, consider using SumFloat64x2.
func SumFloat32x4[T ~float32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes4
base := unsafeSliceFloat32(collection, length)
var acc archsimd.Float32x4
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadFloat32x4Slice(base[i : i+lanes])
acc = acc.Add(v)
}
var buf [lanes]float32
acc.Store(&buf)
var sum T
for k := uint(0); k < lanes; k++ {
sum += T(buf[k])
}
for ; i < length; i++ {
sum += collection[i]
}
return sum
}
// SumFloat64x2 sums a slice of float64 using SSE SIMD (Float64x2, 2 lanes).
// Overflow: The accumulation is performed using float64. Overflow will result in +/-Inf rather than wrapping.
// For collections that may overflow, handle overflow detection externally (e.g., using big.Float).
func SumFloat64x2[T ~float64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes2
base := unsafeSliceFloat64(collection, length)
var acc archsimd.Float64x2
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadFloat64x2Slice(base[i : i+lanes])
acc = acc.Add(v)
}
var buf [lanes]float64
acc.Store(&buf)
var sum T
for k := uint(0); k < lanes; k++ {
sum += T(buf[k])
}
for ; i < length; i++ {
sum += collection[i]
}
return sum
}
// MeanInt8x16 calculates the mean of a slice of int8 using SSE SIMD
func MeanInt8x16[T ~int8](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
sum := SumInt8x16(collection)
return sum / T(length)
}
// MeanInt16x8 calculates the mean of a slice of int16 using SSE SIMD
func MeanInt16x8[T ~int16](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
sum := SumInt16x8(collection)
return sum / T(length)
}
// MeanInt32x4 calculates the mean of a slice of int32 using SSE SIMD
func MeanInt32x4[T ~int32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
sum := SumInt32x4(collection)
return sum / T(length)
}
// MeanInt64x2 calculates the mean of a slice of int64 using SSE SIMD
func MeanInt64x2[T ~int64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
sum := SumInt64x2(collection)
return sum / T(length)
}
// MeanUint8x16 calculates the mean of a slice of uint8 using SSE SIMD
func MeanUint8x16[T ~uint8](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
sum := SumUint8x16(collection)
return sum / T(length)
}
// MeanUint16x8 calculates the mean of a slice of uint16 using SSE SIMD
func MeanUint16x8[T ~uint16](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
sum := SumUint16x8(collection)
return sum / T(length)
}
// MeanUint32x4 calculates the mean of a slice of uint32 using SSE SIMD
func MeanUint32x4[T ~uint32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
sum := SumUint32x4(collection)
return sum / T(length)
}
// MeanUint64x2 calculates the mean of a slice of uint64 using SSE SIMD
func MeanUint64x2[T ~uint64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
sum := SumUint64x2(collection)
return sum / T(length)
}
// MeanFloat32x4 calculates the mean of a slice of float32 using SSE SIMD
func MeanFloat32x4[T ~float32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
sum := SumFloat32x4(collection)
return sum / T(length)
}
// MeanFloat64x2 calculates the mean of a slice of float64 using SSE SIMD
func MeanFloat64x2[T ~float64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
sum := SumFloat64x2(collection)
return sum / T(length)
}
// ClampInt8x16 clamps each element in collection between min and max values using SSE SIMD
func ClampInt8x16[T ~int8, Slice ~[]T](collection Slice, min, max T) Slice {
length := uint(len(collection))
if length == 0 {
return collection
}
result := make(Slice, length)
const lanes = simdLanes16
base := unsafeSliceInt8(collection, length)
minVec := archsimd.BroadcastInt8x16(int8(min))
maxVec := archsimd.BroadcastInt8x16(int8(max))
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt8x16Slice(base[i : i+lanes])
clamped := v.Max(minVec).Min(maxVec)
// bearer:disable go_gosec_unsafe_unsafe
clamped.Store((*[lanes]int8)(unsafe.Pointer(&result[i])))
}
for ; i < length; i++ {
val := collection[i]
if val < min {
val = min
} else if val > max {
val = max
}
result[i] = val
}
return result
}
// ClampInt16x8 clamps each element in collection between min and max values using SSE SIMD
func ClampInt16x8[T ~int16, Slice ~[]T](collection Slice, min, max T) Slice {
length := uint(len(collection))
if length == 0 {
return collection
}
result := make(Slice, length)
const lanes = simdLanes8
base := unsafeSliceInt16(collection, length)
minVec := archsimd.BroadcastInt16x8(int16(min))
maxVec := archsimd.BroadcastInt16x8(int16(max))
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt16x8Slice(base[i : i+lanes])
clamped := v.Max(minVec).Min(maxVec)
// bearer:disable go_gosec_unsafe_unsafe
clamped.Store((*[lanes]int16)(unsafe.Pointer(&result[i])))
}
for ; i < length; i++ {
val := collection[i]
if val < min {
val = min
} else if val > max {
val = max
}
result[i] = val
}
return result
}
// ClampInt32x4 clamps each element in collection between min and max values using SSE SIMD
func ClampInt32x4[T ~int32, Slice ~[]T](collection Slice, min, max T) Slice {
length := uint(len(collection))
if length == 0 {
return collection
}
result := make(Slice, length)
const lanes = simdLanes4
base := unsafeSliceInt32(collection, length)
minVec := archsimd.BroadcastInt32x4(int32(min))
maxVec := archsimd.BroadcastInt32x4(int32(max))
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt32x4Slice(base[i : i+lanes])
clamped := v.Max(minVec).Min(maxVec)
// bearer:disable go_gosec_unsafe_unsafe
clamped.Store((*[lanes]int32)(unsafe.Pointer(&result[i])))
}
for ; i < length; i++ {
val := collection[i]
if val < min {
val = min
} else if val > max {
val = max
}
result[i] = val
}
return result
}
// ClampInt64x2 clamps each element in collection between min and max values using SSE SIMD and AVX-512 SIMD.
func ClampInt64x2[T ~int64, Slice ~[]T](collection Slice, min, max T) Slice {
length := uint(len(collection))
if length == 0 {
return collection
}
result := make(Slice, length)
const lanes = simdLanes2
base := unsafeSliceInt64(collection, length)
minVec := archsimd.BroadcastInt64x2(int64(min))
maxVec := archsimd.BroadcastInt64x2(int64(max))
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt64x2Slice(base[i : i+lanes])
clamped := v.Max(minVec).Min(maxVec)
// bearer:disable go_gosec_unsafe_unsafe
clamped.Store((*[lanes]int64)(unsafe.Pointer(&result[i])))
}
for ; i < length; i++ {
val := collection[i]
if val < min {
val = min
} else if val > max {
val = max
}
result[i] = val
}
return result
}
// ClampUint8x16 clamps each element in collection between min and max values using SSE SIMD
func ClampUint8x16[T ~uint8, Slice ~[]T](collection Slice, min, max T) Slice {
length := uint(len(collection))
if length == 0 {
return collection
}
result := make(Slice, length)
const lanes = simdLanes16
base := unsafeSliceUint8(collection, length)
minVec := archsimd.BroadcastUint8x16(uint8(min))
maxVec := archsimd.BroadcastUint8x16(uint8(max))
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint8x16Slice(base[i : i+lanes])
clamped := v.Max(minVec).Min(maxVec)
// bearer:disable go_gosec_unsafe_unsafe
clamped.Store((*[lanes]uint8)(unsafe.Pointer(&result[i])))
}
for ; i < length; i++ {
val := collection[i]
if val < min {
val = min
} else if val > max {
val = max
}
result[i] = val
}
return result
}
// ClampUint16x8 clamps each element in collection between min and max values using SSE SIMD
func ClampUint16x8[T ~uint16, Slice ~[]T](collection Slice, min, max T) Slice {
length := uint(len(collection))
if length == 0 {
return collection
}
result := make(Slice, length)
const lanes = simdLanes8
base := unsafeSliceUint16(collection, length)
minVec := archsimd.BroadcastUint16x8(uint16(min))
maxVec := archsimd.BroadcastUint16x8(uint16(max))
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint16x8Slice(base[i : i+lanes])
clamped := v.Max(minVec).Min(maxVec)
// bearer:disable go_gosec_unsafe_unsafe
clamped.Store((*[lanes]uint16)(unsafe.Pointer(&result[i])))
}
for ; i < length; i++ {
val := collection[i]
if val < min {
val = min
} else if val > max {
val = max
}
result[i] = val
}
return result
}
// ClampUint32x4 clamps each element in collection between min and max values using SSE SIMD
func ClampUint32x4[T ~uint32, Slice ~[]T](collection Slice, min, max T) Slice {
length := uint(len(collection))
if length == 0 {
return collection
}
result := make(Slice, length)
const lanes = simdLanes4
base := unsafeSliceUint32(collection, length)
minVec := archsimd.BroadcastUint32x4(uint32(min))
maxVec := archsimd.BroadcastUint32x4(uint32(max))
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint32x4Slice(base[i : i+lanes])
clamped := v.Max(minVec).Min(maxVec)
// bearer:disable go_gosec_unsafe_unsafe
clamped.Store((*[lanes]uint32)(unsafe.Pointer(&result[i])))
}
for ; i < length; i++ {
val := collection[i]
if val < min {
val = min
} else if val > max {
val = max
}
result[i] = val
}
return result
}
// ClampUint64x2 clamps each element in collection between min and max values using SSE SIMD and AVX-512 SIMD.
func ClampUint64x2[T ~uint64, Slice ~[]T](collection Slice, min, max T) Slice {
length := uint(len(collection))
if length == 0 {
return collection
}
result := make(Slice, length)
const lanes = simdLanes2
base := unsafeSliceUint64(collection, length)
minVec := archsimd.BroadcastUint64x2(uint64(min))
maxVec := archsimd.BroadcastUint64x2(uint64(max))
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint64x2Slice(base[i : i+lanes])
clamped := v.Max(minVec).Min(maxVec)
// bearer:disable go_gosec_unsafe_unsafe
clamped.Store((*[lanes]uint64)(unsafe.Pointer(&result[i])))
}
for ; i < length; i++ {
val := collection[i]
if val < min {
val = min
} else if val > max {
val = max
}
result[i] = val
}
return result
}
// ClampFloat32x4 clamps each element in collection between min and max values using SSE SIMD
func ClampFloat32x4[T ~float32, Slice ~[]T](collection Slice, min, max T) Slice {
length := uint(len(collection))
if length == 0 {
return collection
}
result := make(Slice, length)
const lanes = simdLanes4
base := unsafeSliceFloat32(collection, length)
minVec := archsimd.BroadcastFloat32x4(float32(min))
maxVec := archsimd.BroadcastFloat32x4(float32(max))
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadFloat32x4Slice(base[i : i+lanes])
clamped := v.Max(minVec).Min(maxVec)
// bearer:disable go_gosec_unsafe_unsafe
clamped.Store((*[lanes]float32)(unsafe.Pointer(&result[i])))
}
for ; i < length; i++ {
val := collection[i]
if val < min {
val = min
} else if val > max {
val = max
}
result[i] = val
}
return result
}
// ClampFloat64x2 clamps each element in collection between min and max values using SSE SIMD
func ClampFloat64x2[T ~float64, Slice ~[]T](collection Slice, min, max T) Slice {
length := uint(len(collection))
if length == 0 {
return collection
}
result := make(Slice, length)
const lanes = simdLanes2
base := unsafeSliceFloat64(collection, length)
minVec := archsimd.BroadcastFloat64x2(float64(min))
maxVec := archsimd.BroadcastFloat64x2(float64(max))
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadFloat64x2Slice(base[i : i+lanes])
clamped := v.Max(minVec).Min(maxVec)
// bearer:disable go_gosec_unsafe_unsafe
clamped.Store((*[lanes]float64)(unsafe.Pointer(&result[i])))
}
for ; i < length; i++ {
val := collection[i]
if val < min {
val = min
} else if val > max {
val = max
}
result[i] = val
}
return result
}
// MinInt8x16 finds the minimum value in a collection of int8 using SSE SIMD
func MinInt8x16[T ~int8](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes16
base := unsafeSliceInt8(collection, length)
var minVec archsimd.Int8x16
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt8x16Slice(base[i : i+lanes])
if !firstInitialized {
minVec = v
firstInitialized = true
} else {
minVec = minVec.Min(v)
}
}
// Find minimum in the vector (only if we processed any vectors)
var minVal int8
if firstInitialized {
var buf [lanes]int8
minVec.Store(&buf)
minVal = min(
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15],
)
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] < T(minVal) {
minVal = int8(collection[i])
firstInitialized = true
}
}
return T(minVal)
}
// MinInt16x8 finds the minimum value in a collection of int16 using SSE SIMD
func MinInt16x8[T ~int16](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes8
base := unsafeSliceInt16(collection, length)
var minVec archsimd.Int16x8
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt16x8Slice(base[i : i+lanes])
if !firstInitialized {
minVec = v
firstInitialized = true
} else {
minVec = minVec.Min(v)
}
}
// Find minimum in the vector (only if we processed any vectors)
var minVal int16
if firstInitialized {
var buf [lanes]int16
minVec.Store(&buf)
minVal = min(buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] < T(minVal) {
minVal = int16(collection[i])
firstInitialized = true
}
}
return T(minVal)
}
// MinInt32x4 finds the minimum value in a collection of int32 using SSE SIMD
func MinInt32x4[T ~int32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes4
base := unsafeSliceInt32(collection, length)
var minVec archsimd.Int32x4
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt32x4Slice(base[i : i+lanes])
if !firstInitialized {
minVec = v
firstInitialized = true
} else {
minVec = minVec.Min(v)
}
}
// Find minimum in the vector (only if we processed any vectors)
var minVal int32
if firstInitialized {
var buf [lanes]int32
minVec.Store(&buf)
minVal = min(buf[0], buf[1], buf[2], buf[3])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] < T(minVal) {
minVal = int32(collection[i])
firstInitialized = true
}
}
return T(minVal)
}
// MinInt64x2 finds the minimum value in a collection of int64 using SSE SIMD
func MinInt64x2[T ~int64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes2
base := unsafeSliceInt64(collection, length)
var minVec archsimd.Int64x2
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt64x2Slice(base[i : i+lanes])
if !firstInitialized {
minVec = v
firstInitialized = true
} else {
minVec = minVec.Min(v)
}
}
// Find minimum in the vector (only if we processed any vectors)
var minVal int64
if firstInitialized {
var buf [lanes]int64
minVec.Store(&buf)
minVal = min(buf[0], buf[1])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] < T(minVal) {
minVal = int64(collection[i])
firstInitialized = true
}
}
return T(minVal)
}
// MinUint8x16 finds the minimum value in a collection of uint8 using SSE SIMD
func MinUint8x16[T ~uint8](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes16
base := unsafeSliceUint8(collection, length)
var minVec archsimd.Uint8x16
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint8x16Slice(base[i : i+lanes])
if !firstInitialized {
minVec = v
firstInitialized = true
} else {
minVec = minVec.Min(v)
}
}
// Find minimum in the vector (only if we processed any vectors)
var minVal uint8
if firstInitialized {
var buf [lanes]uint8
minVec.Store(&buf)
minVal = min(
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15],
)
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] < T(minVal) {
minVal = uint8(collection[i])
firstInitialized = true
}
}
return T(minVal)
}
// MinUint16x8 finds the minimum value in a collection of uint16 using SSE SIMD
func MinUint16x8[T ~uint16](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes8
base := unsafeSliceUint16(collection, length)
var minVec archsimd.Uint16x8
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint16x8Slice(base[i : i+lanes])
if !firstInitialized {
minVec = v
firstInitialized = true
} else {
minVec = minVec.Min(v)
}
}
// Find minimum in the vector (only if we processed any vectors)
var minVal uint16
if firstInitialized {
var buf [lanes]uint16
minVec.Store(&buf)
minVal = min(buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] < T(minVal) {
minVal = uint16(collection[i])
firstInitialized = true
}
}
return T(minVal)
}
// MinUint32x4 finds the minimum value in a collection of uint32 using SSE SIMD
func MinUint32x4[T ~uint32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes4
base := unsafeSliceUint32(collection, length)
var minVec archsimd.Uint32x4
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint32x4Slice(base[i : i+lanes])
if !firstInitialized {
minVec = v
firstInitialized = true
} else {
minVec = minVec.Min(v)
}
}
// Find minimum in the vector (only if we processed any vectors)
var minVal uint32
if firstInitialized {
var buf [lanes]uint32
minVec.Store(&buf)
minVal = min(buf[0], buf[1], buf[2], buf[3])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] < T(minVal) {
minVal = uint32(collection[i])
firstInitialized = true
}
}
return T(minVal)
}
// MinUint64x2 finds the minimum value in a collection of uint64 using SSE SIMD
func MinUint64x2[T ~uint64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes2
base := unsafeSliceUint64(collection, length)
var minVec archsimd.Uint64x2
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint64x2Slice(base[i : i+lanes])
if !firstInitialized {
minVec = v
firstInitialized = true
} else {
minVec = minVec.Min(v)
}
}
// Find minimum in the vector (only if we processed any vectors)
var minVal uint64
if firstInitialized {
var buf [lanes]uint64
minVec.Store(&buf)
minVal = min(buf[0], buf[1])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] < T(minVal) {
minVal = uint64(collection[i])
firstInitialized = true
}
}
return T(minVal)
}
// MinFloat32x4 finds the minimum value in a collection of float32 using SSE SIMD
func MinFloat32x4[T ~float32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes4
base := unsafeSliceFloat32(collection, length)
var minVec archsimd.Float32x4
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadFloat32x4Slice(base[i : i+lanes])
if !firstInitialized {
minVec = v
firstInitialized = true
} else {
minVec = minVec.Min(v)
}
}
// Find minimum in the vector (only if we processed any vectors)
var minVal float32
if firstInitialized {
var buf [lanes]float32
minVec.Store(&buf)
minVal = min(buf[0], buf[1], buf[2], buf[3])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] < T(minVal) {
minVal = float32(collection[i])
firstInitialized = true
}
}
return T(minVal)
}
// MinFloat64x2 finds the minimum value in a collection of float64 using SSE SIMD
func MinFloat64x2[T ~float64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes2
base := unsafeSliceFloat64(collection, length)
var minVec archsimd.Float64x2
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadFloat64x2Slice(base[i : i+lanes])
if !firstInitialized {
minVec = v
firstInitialized = true
} else {
minVec = minVec.Min(v)
}
}
// Find minimum in the vector (only if we processed any vectors)
var minVal float64
if firstInitialized {
var buf [lanes]float64
minVec.Store(&buf)
minVal = min(buf[0], buf[1])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] < T(minVal) {
minVal = float64(collection[i])
firstInitialized = true
}
}
return T(minVal)
}
// MaxInt8x16 finds the maximum value in a collection of int8 using SSE SIMD
func MaxInt8x16[T ~int8](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes16
base := unsafeSliceInt8(collection, length)
var maxVec archsimd.Int8x16
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt8x16Slice(base[i : i+lanes])
if !firstInitialized {
maxVec = v
firstInitialized = true
} else {
maxVec = maxVec.Max(v)
}
}
// Find maximum in the vector (only if we processed any vectors)
var maxVal int8
if firstInitialized {
var buf [lanes]int8
maxVec.Store(&buf)
maxVal = max(
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15],
)
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] > T(maxVal) {
maxVal = int8(collection[i])
firstInitialized = true
}
}
return T(maxVal)
}
// MaxInt16x8 finds the maximum value in a collection of int16 using SSE SIMD
func MaxInt16x8[T ~int16](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes8
base := unsafeSliceInt16(collection, length)
var maxVec archsimd.Int16x8
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt16x8Slice(base[i : i+lanes])
if !firstInitialized {
maxVec = v
firstInitialized = true
} else {
maxVec = maxVec.Max(v)
}
}
// Find maximum in the vector (only if we processed any vectors)
var maxVal int16
if firstInitialized {
var buf [lanes]int16
maxVec.Store(&buf)
maxVal = max(buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] > T(maxVal) {
maxVal = int16(collection[i])
firstInitialized = true
}
}
return T(maxVal)
}
// MaxInt32x4 finds the maximum value in a collection of int32 using SSE SIMD
func MaxInt32x4[T ~int32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes4
base := unsafeSliceInt32(collection, length)
var maxVec archsimd.Int32x4
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt32x4Slice(base[i : i+lanes])
if !firstInitialized {
maxVec = v
firstInitialized = true
} else {
maxVec = maxVec.Max(v)
}
}
// Find maximum in the vector (only if we processed any vectors)
var maxVal int32
if firstInitialized {
var buf [lanes]int32
maxVec.Store(&buf)
maxVal = max(buf[0], buf[1], buf[2], buf[3])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] > T(maxVal) {
maxVal = int32(collection[i])
firstInitialized = true
}
}
return T(maxVal)
}
// MaxInt64x2 finds the maximum value in a collection of int64 using SSE SIMD
func MaxInt64x2[T ~int64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes2
base := unsafeSliceInt64(collection, length)
var maxVec archsimd.Int64x2
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadInt64x2Slice(base[i : i+lanes])
if !firstInitialized {
maxVec = v
firstInitialized = true
} else {
maxVec = maxVec.Max(v)
}
}
// Find maximum in the vector (only if we processed any vectors)
var maxVal int64
if firstInitialized {
var buf [lanes]int64
maxVec.Store(&buf)
maxVal = max(buf[0], buf[1])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] > T(maxVal) {
maxVal = int64(collection[i])
firstInitialized = true
}
}
return T(maxVal)
}
// MaxUint8x16 finds the maximum value in a collection of uint8 using SSE SIMD
func MaxUint8x16[T ~uint8](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes16
base := unsafeSliceUint8(collection, length)
var maxVec archsimd.Uint8x16
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint8x16Slice(base[i : i+lanes])
if !firstInitialized {
maxVec = v
firstInitialized = true
} else {
maxVec = maxVec.Max(v)
}
}
// Find maximum in the vector (only if we processed any vectors)
var maxVal uint8
if firstInitialized {
var buf [lanes]uint8
maxVec.Store(&buf)
maxVal = max(
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15],
)
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] > T(maxVal) {
maxVal = uint8(collection[i])
firstInitialized = true
}
}
return T(maxVal)
}
// MaxUint16x8 finds the maximum value in a collection of uint16 using SSE SIMD
func MaxUint16x8[T ~uint16](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes8
base := unsafeSliceUint16(collection, length)
var maxVec archsimd.Uint16x8
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint16x8Slice(base[i : i+lanes])
if !firstInitialized {
maxVec = v
firstInitialized = true
} else {
maxVec = maxVec.Max(v)
}
}
// Find maximum in the vector (only if we processed any vectors)
var maxVal uint16
if firstInitialized {
var buf [lanes]uint16
maxVec.Store(&buf)
maxVal = max(buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] > T(maxVal) {
maxVal = uint16(collection[i])
firstInitialized = true
}
}
return T(maxVal)
}
// MaxUint32x4 finds the maximum value in a collection of uint32 using SSE SIMD
func MaxUint32x4[T ~uint32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes4
base := unsafeSliceUint32(collection, length)
var maxVec archsimd.Uint32x4
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint32x4Slice(base[i : i+lanes])
if !firstInitialized {
maxVec = v
firstInitialized = true
} else {
maxVec = maxVec.Max(v)
}
}
// Find maximum in the vector (only if we processed any vectors)
var maxVal uint32
if firstInitialized {
var buf [lanes]uint32
maxVec.Store(&buf)
maxVal = max(buf[0], buf[1], buf[2], buf[3])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] > T(maxVal) {
maxVal = uint32(collection[i])
firstInitialized = true
}
}
return T(maxVal)
}
// MaxUint64x2 finds the maximum value in a collection of uint64 using SSE SIMD
func MaxUint64x2[T ~uint64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes2
base := unsafeSliceUint64(collection, length)
var maxVec archsimd.Uint64x2
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadUint64x2Slice(base[i : i+lanes])
if !firstInitialized {
maxVec = v
firstInitialized = true
} else {
maxVec = maxVec.Max(v)
}
}
// Find maximum in the vector (only if we processed any vectors)
var maxVal uint64
if firstInitialized {
var buf [lanes]uint64
maxVec.Store(&buf)
maxVal = max(buf[0], buf[1])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] > T(maxVal) {
maxVal = uint64(collection[i])
firstInitialized = true
}
}
return T(maxVal)
}
// MaxFloat32x4 finds the maximum value in a collection of float32 using SSE SIMD
func MaxFloat32x4[T ~float32](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes4
base := unsafeSliceFloat32(collection, length)
var maxVec archsimd.Float32x4
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadFloat32x4Slice(base[i : i+lanes])
if !firstInitialized {
maxVec = v
firstInitialized = true
} else {
maxVec = maxVec.Max(v)
}
}
// Find maximum in the vector (only if we processed any vectors)
var maxVal float32
if firstInitialized {
var buf [lanes]float32
maxVec.Store(&buf)
maxVal = max(buf[0], buf[1], buf[2], buf[3])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] > T(maxVal) {
maxVal = float32(collection[i])
firstInitialized = true
}
}
return T(maxVal)
}
// MaxFloat64x2 finds the maximum value in a collection of float64 using SSE SIMD
func MaxFloat64x2[T ~float64](collection []T) T {
length := uint(len(collection))
if length == 0 {
return 0
}
const lanes = simdLanes2
base := unsafeSliceFloat64(collection, length)
var maxVec archsimd.Float64x2
firstInitialized := false
i := uint(0)
for ; i+lanes <= length; i += lanes {
v := archsimd.LoadFloat64x2Slice(base[i : i+lanes])
if !firstInitialized {
maxVec = v
firstInitialized = true
} else {
maxVec = maxVec.Max(v)
}
}
// Find maximum in the vector (only if we processed any vectors)
var maxVal float64
if firstInitialized {
var buf [lanes]float64
maxVec.Store(&buf)
maxVal = max(buf[0], buf[1])
}
// Handle remaining elements
for ; i < length; i++ {
if !firstInitialized || collection[i] > T(maxVal) {
maxVal = float64(collection[i])
firstInitialized = true
}
}
return T(maxVal)
}
// SSE (128-bit) SIMD sumBy functions - 16/8/4/2 lanes
// These implementations use lo.Map to apply the iteratee, then chain with SIMD sum functions.
// SumByInt8x16 sums the values extracted by iteratee from a slice using SSE SIMD.
func SumByInt8x16[T any, R ~int8](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return SumInt8x16(mapped)
}
// SumByInt16x8 sums the values extracted by iteratee from a slice using SSE SIMD.
func SumByInt16x8[T any, R ~int16](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return SumInt16x8(mapped)
}
// SumByInt32x4 sums the values extracted by iteratee from a slice using SSE SIMD.
func SumByInt32x4[T any, R ~int32](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return SumInt32x4(mapped)
}
// SumByInt64x2 sums the values extracted by iteratee from a slice using SSE SIMD.
func SumByInt64x2[T any, R ~int64](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return SumInt64x2(mapped)
}
// SumByUint8x16 sums the values extracted by iteratee from a slice using SSE SIMD.
func SumByUint8x16[T any, R ~uint8](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return SumUint8x16(mapped)
}
// SumByUint16x8 sums the values extracted by iteratee from a slice using SSE SIMD.
func SumByUint16x8[T any, R ~uint16](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return SumUint16x8(mapped)
}
// SumByUint32x4 sums the values extracted by iteratee from a slice using SSE SIMD.
func SumByUint32x4[T any, R ~uint32](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return SumUint32x4(mapped)
}
// SumByUint64x2 sums the values extracted by iteratee from a slice using SSE SIMD.
func SumByUint64x2[T any, R ~uint64](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return SumUint64x2(mapped)
}
// SumByFloat32x4 sums the values extracted by iteratee from a slice using SSE SIMD.
func SumByFloat32x4[T any, R ~float32](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return SumFloat32x4(mapped)
}
// SumByFloat64x2 sums the values extracted by iteratee from a slice using SSE SIMD.
func SumByFloat64x2[T any, R ~float64](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return SumFloat64x2(mapped)
}
// SSE (128-bit) SIMD meanBy functions - 16/8/4/2 lanes
// These implementations use lo.Map to apply the iteratee, then chain with SIMD mean functions.
// MeanByInt8x16 calculates the mean of values extracted by iteratee from a slice using SSE SIMD.
func MeanByInt8x16[T any, R ~int8](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return MeanInt8x16(mapped)
}
// MeanByInt16x8 calculates the mean of values extracted by iteratee from a slice using SSE SIMD.
func MeanByInt16x8[T any, R ~int16](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return MeanInt16x8(mapped)
}
// MeanByInt32x4 calculates the mean of values extracted by iteratee from a slice using SSE SIMD.
func MeanByInt32x4[T any, R ~int32](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return MeanInt32x4(mapped)
}
// MeanByInt64x2 calculates the mean of values extracted by iteratee from a slice using SSE SIMD.
func MeanByInt64x2[T any, R ~int64](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return MeanInt64x2(mapped)
}
// MeanByUint8x16 calculates the mean of values extracted by iteratee from a slice using SSE SIMD.
func MeanByUint8x16[T any, R ~uint8](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return MeanUint8x16(mapped)
}
// MeanByUint16x8 calculates the mean of values extracted by iteratee from a slice using SSE SIMD.
func MeanByUint16x8[T any, R ~uint16](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return MeanUint16x8(mapped)
}
// MeanByUint32x4 calculates the mean of values extracted by iteratee from a slice using SSE SIMD.
func MeanByUint32x4[T any, R ~uint32](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return MeanUint32x4(mapped)
}
// MeanByUint64x2 calculates the mean of values extracted by iteratee from a slice using SSE SIMD.
func MeanByUint64x2[T any, R ~uint64](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return MeanUint64x2(mapped)
}
// MeanByFloat32x4 calculates the mean of values extracted by iteratee from a slice using SSE SIMD.
func MeanByFloat32x4[T any, R ~float32](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return MeanFloat32x4(mapped)
}
// MeanByFloat64x2 calculates the mean of values extracted by iteratee from a slice using SSE SIMD.
func MeanByFloat64x2[T any, R ~float64](collection []T, iteratee func(item T) R) R {
mapped := lo.Map(collection, func(item T, _ int) R { return iteratee(item) })
return MeanFloat64x2(mapped)
}