Skip to content

Commit 732aa34

Browse files
committed
[Clang] feat: Allow AVX/AVX2 permute operations in constexpr
Enables the usage of the following X86 intrinsics in `constexpr`: ```c _mm256_permute2f128_pd _mm256_permute2f128_ps _mm256_permute2f128_si256 _mm256_permute2x128_si256 ```
1 parent 95e4dc6 commit 732aa34

File tree

5 files changed

+79
-4
lines changed

5 files changed

+79
-4
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -462,15 +462,19 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
462462
def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
463463
def cvtpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
464464
def cvttps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
465-
def vperm2f128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
466-
def vperm2f128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
467-
def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
468465
foreach Op = ["max", "min"] in {
469466
def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
470467
def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
471468
}
472469
}
473470

471+
let Features = "avx",
472+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
473+
def vperm2f128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
474+
def vperm2f128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
475+
def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
476+
}
477+
474478
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
475479
def vpermilpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
476480
def vpermilps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
@@ -567,14 +571,14 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
567571
def psadbw256
568572
: X86Builtin<
569573
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
570-
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
571574
}
572575

573576
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
574577
def permdf256
575578
: X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
576579
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long "
577580
"int>, _Constant int)">;
581+
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
578582
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
579583
def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
580584
def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5101,6 +5101,29 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
51015101
unsigned SrcIdx = (ShuffleMask >> 6) & 0x1;
51025102
return std::pair<unsigned, int>{SrcIdx, Offset};
51035103
});
5104+
case X86::BI__builtin_ia32_vperm2f128_pd256:
5105+
case X86::BI__builtin_ia32_vperm2f128_ps256:
5106+
case X86::BI__builtin_ia32_vperm2f128_si256:
5107+
case X86::BI__builtin_ia32_permti256:
5108+
return interp__builtin_ia32_shuffle_generic(
5109+
S, OpPC, Call,
5110+
[BuiltinID, Call](unsigned DstIdx, unsigned ShuffleMask) {
5111+
unsigned NumElements =
5112+
Call->getArg(0)->getType()->getAs<VectorType>()->getNumElements();
5113+
unsigned PreservedBitsCnt = NumElements >> 2;
5114+
unsigned ControlBitsCnt = DstIdx >> PreservedBitsCnt << 2;
5115+
unsigned ControlBits = ShuffleMask >> ControlBitsCnt;
5116+
5117+
if (BuiltinID == X86::BI__builtin_ia32_permti256 &&
5118+
(ControlBits & 0b1000))
5119+
return std::make_pair(0u, -1);
5120+
5121+
unsigned SrcVecIdx = (ControlBits & 0b10) >> 1;
5122+
unsigned PreservedBitsMask = (1 << PreservedBitsCnt) - 1;
5123+
int SrcIdx = ((ControlBits & 0b1) << PreservedBitsCnt) |
5124+
(DstIdx & PreservedBitsMask);
5125+
return std::make_pair(SrcVecIdx, SrcIdx);
5126+
});
51045127
case X86::BI__builtin_ia32_pshufb128:
51055128
case X86::BI__builtin_ia32_pshufb256:
51065129
case X86::BI__builtin_ia32_pshufb512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14330,6 +14330,34 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1433014330

1433114331
return Success(ResultElements, E);
1433214332
}
14333+
case X86::BI__builtin_ia32_vperm2f128_pd256:
14334+
case X86::BI__builtin_ia32_vperm2f128_ps256:
14335+
case X86::BI__builtin_ia32_vperm2f128_si256:
14336+
case X86::BI__builtin_ia32_permti256: {
14337+
APValue R;
14338+
if (!evalShuffleGeneric(
14339+
Info, E, R, [E](unsigned DstIdx, unsigned ShuffleMask) {
14340+
unsigned NumElements = E->getArg(0)
14341+
->getType()
14342+
->getAs<VectorType>()
14343+
->getNumElements();
14344+
unsigned PreservedBitsCnt = NumElements >> 2;
14345+
unsigned ControlBitsCnt = DstIdx >> PreservedBitsCnt << 2;
14346+
unsigned ControlBits = ShuffleMask >> ControlBitsCnt;
14347+
14348+
if (E->getBuiltinCallee() == X86::BI__builtin_ia32_permti256 &&
14349+
(ControlBits & 0b1000))
14350+
return std::make_pair(0u, -1);
14351+
14352+
unsigned SrcVecIdx = (ControlBits & 0b10) >> 1;
14353+
unsigned PreservedBitsMask = (1 << PreservedBitsCnt) - 1;
14354+
int SrcIdx = ((ControlBits & 0b1) << PreservedBitsCnt) |
14355+
(DstIdx & PreservedBitsMask);
14356+
return std::make_pair(SrcVecIdx, SrcIdx);
14357+
}))
14358+
return false;
14359+
return Success(R, E);
14360+
}
1433314361
}
1433414362
}
1433514363

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,18 +1439,33 @@ __m256d test_mm256_permute2f128_pd(__m256d A, __m256d B) {
14391439
return _mm256_permute2f128_pd(A, B, 0x31);
14401440
}
14411441

1442+
TEST_CONSTEXPR(match_m256d(_mm256_permute2f128_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0xA7), 7.0, 8.0, 5.0, 6.0));
1443+
TEST_CONSTEXPR(match_m256d(_mm256_permute2f128_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x5F), 7.0, 8.0, 3.0, 4.0));
1444+
TEST_CONSTEXPR(match_m256d(_mm256_permute2f128_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x37), 7.0, 8.0, 7.0, 8.0));
1445+
TEST_CONSTEXPR(match_m256d(_mm256_permute2f128_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x12), 5.0, 6.0, 3.0, 4.0));
1446+
14421447
__m256 test_mm256_permute2f128_ps(__m256 A, __m256 B) {
14431448
// CHECK-LABEL: test_mm256_permute2f128_ps
14441449
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
14451450
return _mm256_permute2f128_ps(A, B, 0x13);
14461451
}
14471452

1453+
TEST_CONSTEXPR(match_m256(_mm256_permute2f128_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), 0xA7), 13.0f, 14.0f, 15.0f, 16.0f, 9.0f, 10.0f, 11.0f, 12.0f));
1454+
TEST_CONSTEXPR(match_m256(_mm256_permute2f128_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), 0x5F), 13.0f, 14.0f, 15.0f, 16.0f, 5.0f, 6.0f, 7.0f, 8.0f));
1455+
TEST_CONSTEXPR(match_m256(_mm256_permute2f128_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), 0x37), 13.0f, 14.0f, 15.0f, 16.0f, 13.0f, 14.0f, 15.0f, 16.0f));
1456+
TEST_CONSTEXPR(match_m256(_mm256_permute2f128_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), 0x12), 9.0f, 10.0f, 11.0f, 12.0f, 5.0f, 6.0f, 7.0f, 8.0f));
1457+
14481458
__m256i test_mm256_permute2f128_si256(__m256i A, __m256i B) {
14491459
// CHECK-LABEL: test_mm256_permute2f128_si256
14501460
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
14511461
return _mm256_permute2f128_si256(A, B, 0x20);
14521462
}
14531463

1464+
TEST_CONSTEXPR(match_m256i(_mm256_permute2f128_si256(((__m256i){1LL, 2LL, 3LL, 4LL}), ((__m256i){5LL, 6LL, 7LL, 8LL}), 0xA7), 7LL, 8LL, 5LL, 6LL));
1465+
TEST_CONSTEXPR(match_m256i(_mm256_permute2f128_si256(((__m256i){1LL, 2LL, 3LL, 4LL}), ((__m256i){5LL, 6LL, 7LL, 8LL}), 0x5F), 7LL, 8LL, 3LL, 4LL));
1466+
TEST_CONSTEXPR(match_m256i(_mm256_permute2f128_si256(((__m256i){1LL, 2LL, 3LL, 4LL}), ((__m256i){5LL, 6LL, 7LL, 8LL}), 0x37), 7LL, 8LL, 7LL, 8LL));
1467+
TEST_CONSTEXPR(match_m256i(_mm256_permute2f128_si256(((__m256i){1LL, 2LL, 3LL, 4LL}), ((__m256i){5LL, 6LL, 7LL, 8LL}), 0x12), 5LL, 6LL, 3LL, 4LL));
1468+
14541469
__m128d test_mm_permutevar_pd(__m128d A, __m128i B) {
14551470
// CHECK-LABEL: test_mm_permutevar_pd
14561471
// CHECK: call {{.*}}<2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %{{.*}}, <2 x i64> %{{.*}})

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,6 +1106,11 @@ __m256i test_mm256_permute2x128_si256(__m256i a, __m256i b) {
11061106
return _mm256_permute2x128_si256(a, b, 0x38);
11071107
}
11081108

1109+
TEST_CONSTEXPR(match_m256i(_mm256_permute2x128_si256(((__m256i){1LL, 2LL, 3LL, 4LL}), ((__m256i){5LL, 6LL, 7LL, 8LL}), 0xA7), 7LL, 8LL, 0LL, 0LL));
1110+
TEST_CONSTEXPR(match_m256i(_mm256_permute2x128_si256(((__m256i){1LL, 2LL, 3LL, 4LL}), ((__m256i){5LL, 6LL, 7LL, 8LL}), 0x5F), 0LL, 0LL, 3LL, 4LL));
1111+
TEST_CONSTEXPR(match_m256i(_mm256_permute2x128_si256(((__m256i){1LL, 2LL, 3LL, 4LL}), ((__m256i){5LL, 6LL, 7LL, 8LL}), 0x37), 7LL, 8LL, 7LL, 8LL));
1112+
TEST_CONSTEXPR(match_m256i(_mm256_permute2x128_si256(((__m256i){1LL, 2LL, 3LL, 4LL}), ((__m256i){5LL, 6LL, 7LL, 8LL}), 0x12), 5LL, 6LL, 3LL, 4LL));
1113+
11091114
__m256i test_mm256_permute4x64_epi64(__m256i a) {
11101115
// CHECK-LABEL: test_mm256_permute4x64_epi64
11111116
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 2, i32 0>

0 commit comments

Comments
 (0)