mirror of
https://github.com/fergalmoran/ladybird.git
synced 2025-12-22 09:19:03 +00:00
AK/SIMDExtras: Fix masking logic in shuffle_or_0
This commit is contained in:
committed by
Ali Mohammad Pur
parent
48f1861ce9
commit
a168bec7ef
@@ -218,11 +218,10 @@ ALWAYS_INLINE static T shuffle_or_0_impl(T a, Control control, IndexSequence<Idx
|
|||||||
using E = ElementOf<T>;
|
using E = ElementOf<T>;
|
||||||
|
|
||||||
if constexpr (__has_builtin(__builtin_shuffle)) {
|
if constexpr (__has_builtin(__builtin_shuffle)) {
|
||||||
// GCC does a very bad job at optimizing the masking, while not recognizing the shuffle idiom
|
auto vector = __builtin_shuffle(a, control);
|
||||||
// So we jinx its __builtin_shuffle to work with out of bounds indices
|
for (size_t i = 0; i < N; ++i)
|
||||||
// TODO: verify that this masking logic is correct (for machines with __builtin_shuffle)
|
vector[i] = control[i] < 0 || control[i] >= N ? 0 : vector[i];
|
||||||
auto mask = (control >= 0) | (control < N);
|
return vector;
|
||||||
return __builtin_shuffle(a, control & mask) & ~mask;
|
|
||||||
}
|
}
|
||||||
// 1. Set all out of bounds values to ~0
|
// 1. Set all out of bounds values to ~0
|
||||||
// Note: This is done so that the optimization mentioned down below works
|
// Note: This is done so that the optimization mentioned down below works
|
||||||
|
|||||||
Reference in New Issue
Block a user