From 5e66512cbda66ce4a0f9aa38d52d902e9caefcf8 Mon Sep 17 00:00:00 2001 From: Enver Balalic Date: Sat, 8 Jun 2024 23:18:21 +0200 Subject: [PATCH] LibWasm: Implement f32x4 and f64x2 arithmetic SIMD ops Adds all the arithmetic ops for f32x4 and f64x2 SIMD instructions. With this, we pass 8375 additional tests :) Quite a few of the spec tests for this are still failing. I confirmed with the wasmer runtime manually for a number of them, and we seem to match their and results. I'm not really sure what's happening here, a spec bug or wasmer is broken in the same way. 18476 failed before. 10101 failed after. --- .../AbstractMachine/BytecodeInterpreter.cpp | 91 ++++++++++++------- .../LibWasm/AbstractMachine/Operators.h | 77 +++++++++++++++- 2 files changed, 136 insertions(+), 32 deletions(-) diff --git a/Userland/Libraries/LibWasm/AbstractMachine/BytecodeInterpreter.cpp b/Userland/Libraries/LibWasm/AbstractMachine/BytecodeInterpreter.cpp index 4656ab2cff..97a95c9baa 100644 --- a/Userland/Libraries/LibWasm/AbstractMachine/BytecodeInterpreter.cpp +++ b/Userland/Libraries/LibWasm/AbstractMachine/BytecodeInterpreter.cpp @@ -1392,6 +1392,10 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi return binary_numeric_operation>(configuration); case Instructions::f32x4_ge.value(): return binary_numeric_operation>(configuration); + case Instructions::f32x4_min.value(): + return binary_numeric_operation>(configuration); + case Instructions::f32x4_max.value(): + return binary_numeric_operation>(configuration); case Instructions::f64x2_eq.value(): return binary_numeric_operation>(configuration); case Instructions::f64x2_ne.value(): @@ -1404,6 +1408,62 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi return binary_numeric_operation>(configuration); case Instructions::f64x2_ge.value(): return binary_numeric_operation>(configuration); + case Instructions::f64x2_min.value(): + return binary_numeric_operation>(configuration); + case Instructions::f64x2_max.value(): + return binary_numeric_operation>(configuration); + case Instructions::f32x4_div.value(): + return binary_numeric_operation>(configuration); + case Instructions::f32x4_mul.value(): + return binary_numeric_operation>(configuration); + case Instructions::f32x4_sub.value(): + return binary_numeric_operation>(configuration); + case Instructions::f32x4_add.value(): + return binary_numeric_operation>(configuration); + case Instructions::f32x4_pmin.value(): + return binary_numeric_operation>(configuration); + case Instructions::f32x4_pmax.value(): + return binary_numeric_operation>(configuration); + case Instructions::f64x2_div.value(): + return binary_numeric_operation>(configuration); + case Instructions::f64x2_mul.value(): + return binary_numeric_operation>(configuration); + case Instructions::f64x2_sub.value(): + return binary_numeric_operation>(configuration); + case Instructions::f64x2_add.value(): + return binary_numeric_operation>(configuration); + case Instructions::f64x2_pmin.value(): + return binary_numeric_operation>(configuration); + case Instructions::f64x2_pmax.value(): + return binary_numeric_operation>(configuration); + case Instructions::f32x4_ceil.value(): + return unary_operation>(configuration); + case Instructions::f32x4_floor.value(): + return unary_operation>(configuration); + case Instructions::f32x4_trunc.value(): + return unary_operation>(configuration); + case Instructions::f32x4_nearest.value(): + return unary_operation>(configuration); + case Instructions::f32x4_sqrt.value(): + return unary_operation>(configuration); + case Instructions::f32x4_neg.value(): + return unary_operation>(configuration); + case Instructions::f32x4_abs.value(): + return unary_operation>(configuration); + case Instructions::f64x2_ceil.value(): + return unary_operation>(configuration); + case Instructions::f64x2_floor.value(): + return unary_operation>(configuration); + case Instructions::f64x2_trunc.value(): + return unary_operation>(configuration); + case Instructions::f64x2_nearest.value(): + return unary_operation>(configuration); + case Instructions::f64x2_sqrt.value(): + return unary_operation>(configuration); + case Instructions::f64x2_neg.value(): + return unary_operation>(configuration); + case Instructions::f64x2_abs.value(): + return unary_operation>(configuration); case Instructions::v128_not.value(): case Instructions::v128_and.value(): case Instructions::v128_andnot.value(): @@ -1430,23 +1490,16 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi case Instructions::i8x16_bitmask.value(): case Instructions::i8x16_narrow_i16x8_s.value(): case Instructions::i8x16_narrow_i16x8_u.value(): - case Instructions::f32x4_ceil.value(): - case Instructions::f32x4_floor.value(): - case Instructions::f32x4_trunc.value(): - case Instructions::f32x4_nearest.value(): case Instructions::i8x16_add.value(): case Instructions::i8x16_add_sat_s.value(): case Instructions::i8x16_add_sat_u.value(): case Instructions::i8x16_sub.value(): case Instructions::i8x16_sub_sat_s.value(): case Instructions::i8x16_sub_sat_u.value(): - case Instructions::f64x2_ceil.value(): - case Instructions::f64x2_floor.value(): case Instructions::i8x16_min_s.value(): case Instructions::i8x16_min_u.value(): case Instructions::i8x16_max_s.value(): case Instructions::i8x16_max_u.value(): - case Instructions::f64x2_trunc.value(): case Instructions::i8x16_avgr_u.value(): case Instructions::i16x8_extadd_pairwise_i8x16_s.value(): case Instructions::i16x8_extadd_pairwise_i8x16_u.value(): @@ -1469,7 +1522,6 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi case Instructions::i16x8_sub.value(): case Instructions::i16x8_sub_sat_s.value(): case Instructions::i16x8_sub_sat_u.value(): - case Instructions::f64x2_nearest.value(): case Instructions::i16x8_mul.value(): case Instructions::i16x8_min_s.value(): case Instructions::i16x8_min_u.value(): @@ -1508,7 +1560,6 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi case Instructions::i64x2_extend_high_i32x4_s.value(): case Instructions::i64x2_extend_low_i32x4_u.value(): case Instructions::i64x2_extend_high_i32x4_u.value(): - case Instructions::i64x2_add.value(): case Instructions::i64x2_sub.value(): case Instructions::i64x2_mul.value(): case Instructions::i64x2_eq.value(): @@ -1521,28 +1572,6 @@ void BytecodeInterpreter::interpret(Configuration& configuration, InstructionPoi case Instructions::i64x2_extmul_high_i32x4_s.value(): case Instructions::i64x2_extmul_low_i32x4_u.value(): case Instructions::i64x2_extmul_high_i32x4_u.value(): - case Instructions::f32x4_abs.value(): - case Instructions::f32x4_neg.value(): - case Instructions::f32x4_sqrt.value(): - case Instructions::f32x4_add.value(): - case Instructions::f32x4_sub.value(): - case Instructions::f32x4_mul.value(): - case Instructions::f32x4_div.value(): - case Instructions::f32x4_min.value(): - case Instructions::f32x4_max.value(): - case Instructions::f32x4_pmin.value(): - case Instructions::f32x4_pmax.value(): - case Instructions::f64x2_abs.value(): - case Instructions::f64x2_neg.value(): - case Instructions::f64x2_sqrt.value(): - case Instructions::f64x2_add.value(): - case Instructions::f64x2_sub.value(): - case Instructions::f64x2_mul.value(): - case Instructions::f64x2_div.value(): - case Instructions::f64x2_min.value(): - case Instructions::f64x2_max.value(): - case Instructions::f64x2_pmin.value(): - case Instructions::f64x2_pmax.value(): case Instructions::i32x4_trunc_sat_f32x4_s.value(): case Instructions::i32x4_trunc_sat_f32x4_u.value(): case Instructions::f32x4_convert_i32x4_s.value(): diff --git a/Userland/Libraries/LibWasm/AbstractMachine/Operators.h b/Userland/Libraries/LibWasm/AbstractMachine/Operators.h index 1628105d99..c0b5a52981 100644 --- a/Userland/Libraries/LibWasm/AbstractMachine/Operators.h +++ b/Userland/Libraries/LibWasm/AbstractMachine/Operators.h @@ -364,6 +364,26 @@ struct Maximum { static StringView name() { return "maximum"sv; } }; +struct PseudoMinimum { + template + auto operator()(Lhs lhs, Rhs rhs) const + { + return rhs < lhs ? rhs : lhs; + } + + static StringView name() { return "pseudo_minimum"sv; } +}; + +struct PseudoMaximum { + template + auto operator()(Lhs lhs, Rhs rhs) const + { + return lhs < rhs ? rhs : lhs; + } + + static StringView name() { return "pseudo_maximum"sv; } +}; + struct CopySign { template auto operator()(Lhs lhs, Rhs rhs) const @@ -462,6 +482,61 @@ struct Ceil { static StringView name() { return "ceil"sv; } }; +template +struct VectorFloatBinaryOp { + auto operator()(u128 lhs, u128 rhs) const + { + using VectorType = NativeFloatingVectorType<128, VectorSize, NativeFloatingType<128 / VectorSize>>; + auto first = bit_cast(lhs); + auto second = bit_cast(rhs); + VectorType result; + Op op; + for (size_t i = 0; i < VectorSize; ++i) { + result[i] = op(first[i], second[i]); + } + return bit_cast(result); + } + + static StringView name() + { + switch (VectorSize) { + case 4: + return "vecf(32x4).binary_op"sv; + case 2: + return "vecf(64x2).binary_op"sv; + default: + VERIFY_NOT_REACHED(); + } + } +}; + +template +struct VectorFloatUnaryOp { + auto operator()(u128 lhs) const + { + using VectorType = NativeFloatingVectorType<128, VectorSize, NativeFloatingType<128 / VectorSize>>; + auto first = bit_cast(lhs); + VectorType result; + Op op; + for (size_t i = 0; i < VectorSize; ++i) { + result[i] = op(first[i]); + } + return bit_cast(result); + } + + static StringView name() + { + switch (VectorSize) { + case 4: + return "vecf(32x4).unary_op"sv; + case 2: + return "vecf(64x2).unary_op"sv; + default: + VERIFY_NOT_REACHED(); + } + } +}; + struct Floor { template auto operator()(Lhs lhs) const @@ -479,7 +554,7 @@ struct Floor { struct Truncate { template - AK::ErrorOr operator()(Lhs lhs) const + auto operator()(Lhs lhs) const { if constexpr (IsSame) return truncf(lhs);