Dep: Update vixl to 662828c

This commit is contained in:
Stenzek
2024-06-14 17:27:12 +10:00
parent d45e218da7
commit f0c2832d03
66 changed files with 65453 additions and 10345 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -24,6 +24,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
#include <sys/auxv.h>
#define VIXL_USE_LINUX_HWCAP 1
#endif
#include "../utils-vixl.h"
#include "cpu-aarch64.h"
@ -31,6 +36,382 @@
namespace vixl {
namespace aarch64 {
const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
const IDRegister::Field AA64PFR0::kRAS(28);
const IDRegister::Field AA64PFR0::kSVE(32);
const IDRegister::Field AA64PFR0::kDIT(48);
const IDRegister::Field AA64PFR0::kCSV2(56);
const IDRegister::Field AA64PFR0::kCSV3(60);
const IDRegister::Field AA64PFR1::kBT(0);
const IDRegister::Field AA64PFR1::kSSBS(4);
const IDRegister::Field AA64PFR1::kMTE(8);
const IDRegister::Field AA64PFR1::kSME(24);
const IDRegister::Field AA64ISAR0::kAES(4);
const IDRegister::Field AA64ISAR0::kSHA1(8);
const IDRegister::Field AA64ISAR0::kSHA2(12);
const IDRegister::Field AA64ISAR0::kCRC32(16);
const IDRegister::Field AA64ISAR0::kAtomic(20);
const IDRegister::Field AA64ISAR0::kRDM(28);
const IDRegister::Field AA64ISAR0::kSHA3(32);
const IDRegister::Field AA64ISAR0::kSM3(36);
const IDRegister::Field AA64ISAR0::kSM4(40);
const IDRegister::Field AA64ISAR0::kDP(44);
const IDRegister::Field AA64ISAR0::kFHM(48);
const IDRegister::Field AA64ISAR0::kTS(52);
const IDRegister::Field AA64ISAR0::kRNDR(60);
const IDRegister::Field AA64ISAR1::kDPB(0);
const IDRegister::Field AA64ISAR1::kAPA(4);
const IDRegister::Field AA64ISAR1::kAPI(8);
const IDRegister::Field AA64ISAR1::kJSCVT(12);
const IDRegister::Field AA64ISAR1::kFCMA(16);
const IDRegister::Field AA64ISAR1::kLRCPC(20);
const IDRegister::Field AA64ISAR1::kGPA(24);
const IDRegister::Field AA64ISAR1::kGPI(28);
const IDRegister::Field AA64ISAR1::kFRINTTS(32);
const IDRegister::Field AA64ISAR1::kSB(36);
const IDRegister::Field AA64ISAR1::kSPECRES(40);
const IDRegister::Field AA64ISAR1::kBF16(44);
const IDRegister::Field AA64ISAR1::kDGH(48);
const IDRegister::Field AA64ISAR1::kI8MM(52);
const IDRegister::Field AA64ISAR2::kWFXT(0);
const IDRegister::Field AA64ISAR2::kRPRES(4);
const IDRegister::Field AA64ISAR2::kMOPS(16);
const IDRegister::Field AA64ISAR2::kCSSC(52);
const IDRegister::Field AA64MMFR0::kECV(60);
const IDRegister::Field AA64MMFR1::kLO(16);
const IDRegister::Field AA64MMFR1::kAFP(44);
const IDRegister::Field AA64MMFR2::kAT(32);
const IDRegister::Field AA64ZFR0::kSVEver(0);
const IDRegister::Field AA64ZFR0::kAES(4);
const IDRegister::Field AA64ZFR0::kBitPerm(16);
const IDRegister::Field AA64ZFR0::kBF16(20);
const IDRegister::Field AA64ZFR0::kSHA3(32);
const IDRegister::Field AA64ZFR0::kSM4(40);
const IDRegister::Field AA64ZFR0::kI8MM(44);
const IDRegister::Field AA64ZFR0::kF32MM(52);
const IDRegister::Field AA64ZFR0::kF64MM(56);
const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1);
const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1);
const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1);
const IDRegister::Field AA64SMFR0::kSMEi8i32(36);
const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1);
const IDRegister::Field AA64SMFR0::kSMEi16i64(52);
const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1);
CPUFeatures AA64PFR0::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS);
if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2);
if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM);
if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3);
return f;
}
CPUFeatures AA64PFR1::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS);
if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3);
if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME);
return f;
}
CPUFeatures AA64ISAR0::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG);
return f;
}
CPUFeatures AA64ISAR1::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP);
if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16);
if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
// Only one of these fields should be non-zero, but they have the same
// encodings, so merge the logic.
int apx = std::max(Get(kAPI), Get(kAPA));
if (apx >= 1) {
f.Combine(CPUFeatures::kPAuth);
// APA (rather than API) indicates QARMA.
if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA);
if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC);
if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2);
if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC);
if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined);
}
if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
if (Get(kGPA) >= 1) {
f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
}
return f;
}
CPUFeatures AA64ISAR2::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT);
if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS);
if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC);
return f;
}
CPUFeatures AA64MMFR0::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV);
return f;
}
CPUFeatures AA64MMFR1::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP);
return f;
}
CPUFeatures AA64MMFR2::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT);
return f;
}
CPUFeatures AA64ZFR0::GetCPUFeatures() const {
// This register is only available with SVE, but reads-as-zero in its absence,
// so it's always safe to read it.
CPUFeatures f;
if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4);
if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3);
if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kSVE_EBF16);
if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm);
if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES);
if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128);
if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2);
return f;
}
CPUFeatures AA64SMFR0::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32);
if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32);
if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32);
if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32);
if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64);
if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64);
if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64);
return f;
}
int IDRegister::Get(IDRegister::Field field) const {
int msb = field.GetMsb();
int lsb = field.GetLsb();
VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
(sizeof(int) * kBitsPerByte));
switch (field.GetType()) {
case Field::kSigned:
return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
case Field::kUnsigned:
return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
}
VIXL_UNREACHABLE();
return 0;
}
CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
CPUFeatures f;
#define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \
f.Combine(Read##NAME().GetCPUFeatures());
VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
#undef VIXL_COMBINE_ID_REG
return f;
}
CPUFeatures CPU::InferCPUFeaturesFromOS(
CPUFeatures::QueryIDRegistersOption option) {
CPUFeatures features;
#ifdef VIXL_USE_LINUX_HWCAP
// Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
// than explicit bits, but explicit bits allow us to identify features that
// the toolchain doesn't know about.
static const CPUFeatures::Feature kFeatureBitsLow[] =
{// Bits 0-7
CPUFeatures::kFP,
CPUFeatures::kNEON,
CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track.
CPUFeatures::kAES,
CPUFeatures::kPmull1Q,
CPUFeatures::kSHA1,
CPUFeatures::kSHA2,
CPUFeatures::kCRC32,
// Bits 8-15
CPUFeatures::kAtomics,
CPUFeatures::kFPHalf,
CPUFeatures::kNEONHalf,
CPUFeatures::kIDRegisterEmulation,
CPUFeatures::kRDM,
CPUFeatures::kJSCVT,
CPUFeatures::kFcma,
CPUFeatures::kRCpc,
// Bits 16-23
CPUFeatures::kDCPoP,
CPUFeatures::kSHA3,
CPUFeatures::kSM3,
CPUFeatures::kSM4,
CPUFeatures::kDotProduct,
CPUFeatures::kSHA512,
CPUFeatures::kSVE,
CPUFeatures::kFHM,
// Bits 24-31
CPUFeatures::kDIT,
CPUFeatures::kUSCAT,
CPUFeatures::kRCpcImm,
CPUFeatures::kFlagM,
CPUFeatures::kSSBSControl,
CPUFeatures::kSB,
CPUFeatures::kPAuth,
CPUFeatures::kPAuthGeneric};
VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64);
static const CPUFeatures::Feature kFeatureBitsHigh[] =
{// Bits 0-7
CPUFeatures::kDCCVADP,
CPUFeatures::kSVE2,
CPUFeatures::kSVEAES,
CPUFeatures::kSVEPmull128,
CPUFeatures::kSVEBitPerm,
CPUFeatures::kSVESHA3,
CPUFeatures::kSVESM4,
CPUFeatures::kAXFlag,
// Bits 8-15
CPUFeatures::kFrintToFixedSizedInt,
CPUFeatures::kSVEI8MM,
CPUFeatures::kSVEF32MM,
CPUFeatures::kSVEF64MM,
CPUFeatures::kSVEBF16,
CPUFeatures::kI8MM,
CPUFeatures::kBF16,
CPUFeatures::kDGH,
// Bits 16-23
CPUFeatures::kRNG,
CPUFeatures::kBTI,
CPUFeatures::kMTE,
CPUFeatures::kECV,
CPUFeatures::kAFP,
CPUFeatures::kRPRES,
CPUFeatures::kMTE3,
CPUFeatures::kSME,
// Bits 24-31
CPUFeatures::kSMEi16i64,
CPUFeatures::kSMEf64f64,
CPUFeatures::kSMEi8i32,
CPUFeatures::kSMEf16f32,
CPUFeatures::kSMEb16f32,
CPUFeatures::kSMEf32f32,
CPUFeatures::kSMEfa64,
CPUFeatures::kWFXT,
// Bits 32-39
CPUFeatures::kEBF16,
CPUFeatures::kSVE_EBF16};
VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64);
auto combine_features = [&features](uint64_t hwcap,
const CPUFeatures::Feature* feature_array,
size_t features_size) {
for (size_t i = 0; i < features_size; i++) {
if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]);
}
};
uint64_t hwcap_low = getauxval(AT_HWCAP);
uint64_t hwcap_high = getauxval(AT_HWCAP2);
combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow));
combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh));
// MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support
if (features.Has(CPUFeatures::kMTE)) {
features.Combine(CPUFeatures::kMTEInstructions);
}
#endif // VIXL_USE_LINUX_HWCAP
if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
(features.Has(CPUFeatures::kIDRegisterEmulation))) {
features.Combine(InferCPUFeaturesFromIDRegisters());
}
return features;
}
#ifdef __aarch64__
#define VIXL_READ_ID_REG(NAME, MRS_ARG) \
NAME CPU::Read##NAME() { \
uint64_t value = 0; \
__asm__("mrs %0, " MRS_ARG : "=r"(value)); \
return NAME(value); \
}
#else // __aarch64__
#define VIXL_READ_ID_REG(NAME, MRS_ARG) \
NAME CPU::Read##NAME() { \
VIXL_UNREACHABLE(); \
return NAME(0); \
}
#endif // __aarch64__
VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
#undef VIXL_READ_ID_REG
// Initialise to smallest possible cache size.
unsigned CPU::dcache_line_size_ = 1;
unsigned CPU::icache_line_size_ = 1;
@ -76,7 +457,28 @@ uint32_t CPU::GetCacheType() {
}
void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
// Query the SVE vector length. This requires CPUFeatures::kSVE.
int CPU::ReadSVEVectorLengthInBits() {
#ifdef __aarch64__
uint64_t vl;
// To support compilers that don't understand `rdvl`, encode the value
// directly and move it manually.
__asm__(
" .word 0x04bf5100\n" // rdvl x0, #8
" mov %[vl], x0\n"
: [vl] "=r"(vl)
:
: "x0");
VIXL_ASSERT(vl <= INT_MAX);
return static_cast<int>(vl);
#else
VIXL_UNREACHABLE();
return 0;
#endif
}
void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
#ifdef __aarch64__
// Implement the cache synchronisation for all targets where AArch64 is the
// host, even if we're building the simulator for an AAarch64 host. This
@ -174,5 +576,6 @@ void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
#endif
}
} // namespace aarch64
} // namespace vixl

View File

@ -24,16 +24,54 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cpu-features-auditor-aarch64.h"
#include "cpu-features.h"
#include "globals-vixl.h"
#include "utils-vixl.h"
#include "decoder-aarch64.h"
#include "cpu-features-auditor-aarch64.h"
#include "decoder-aarch64.h"
namespace vixl {
namespace aarch64 {
const CPUFeaturesAuditor::FormToVisitorFnMap*
CPUFeaturesAuditor::GetFormToVisitorFnMap() {
static const FormToVisitorFnMap form_to_visitor = {
DEFAULT_FORM_TO_VISITOR_MAP(CPUFeaturesAuditor),
SIM_AUD_VISITOR_MAP(CPUFeaturesAuditor),
{"fcmla_asimdelem_c_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fcmla_asimdelem_c_s"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmlal2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmlal_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmla_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmla_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmlsl2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmlsl_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmls_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmls_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmulx_asimdelem_rh_h"_h,
&CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmulx_asimdelem_r_sd"_h,
&CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmul_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"fmul_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"sdot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"smlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"smlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"smull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"sqdmlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"sqdmlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"sqdmull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"udot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"umlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"umlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
{"umull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
};
return &form_to_visitor;
}
// Every instruction must update last_instruction_, even if only to clear it,
// and every instruction must also update seen_ once it has been fully handled.
// This scope makes that simple, and allows early returns in the decode logic.
@ -140,6 +178,25 @@ void CPUFeaturesAuditor::VisitAddSubWithCarry(const Instruction* instr) {
USE(instr);
}
void CPUFeaturesAuditor::VisitRotateRightIntoFlags(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
switch (instr->Mask(RotateRightIntoFlagsMask)) {
case RMIF:
scope.Record(CPUFeatures::kFlagM);
return;
}
}
void CPUFeaturesAuditor::VisitEvaluateIntoFlags(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
switch (instr->Mask(EvaluateIntoFlagsMask)) {
case SETF8:
case SETF16:
scope.Record(CPUFeatures::kFlagM);
return;
}
}
void CPUFeaturesAuditor::VisitAtomicMemory(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
switch (instr->Mask(AtomicMemoryMask)) {
@ -254,6 +311,37 @@ void CPUFeaturesAuditor::VisitDataProcessing2Source(const Instruction* instr) {
}
}
void CPUFeaturesAuditor::VisitLoadStoreRCpcUnscaledOffset(
const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) {
case LDAPURB:
case LDAPURSB_w:
case LDAPURSB_x:
case LDAPURH:
case LDAPURSH_w:
case LDAPURSH_x:
case LDAPUR_w:
case LDAPURSW:
case LDAPUR_x:
// These stores don't actually have RCpc semantics but they're included with
// the RCpc extensions.
case STLURB:
case STLURH:
case STLUR_w:
case STLUR_x:
scope.Record(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm);
return;
}
}
void CPUFeaturesAuditor::VisitLoadStorePAC(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
USE(instr);
scope.Record(CPUFeatures::kPAuth);
}
void CPUFeaturesAuditor::VisitDataProcessing3Source(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
USE(instr);
@ -329,6 +417,16 @@ void CPUFeaturesAuditor::VisitFPDataProcessing1Source(
case FRINTI_h:
scope.Record(CPUFeatures::kFPHalf);
return;
case FRINT32X_s:
case FRINT32X_d:
case FRINT32Z_s:
case FRINT32Z_d:
case FRINT64X_s:
case FRINT64X_d:
case FRINT64Z_s:
case FRINT64Z_d:
scope.Record(CPUFeatures::kFrintToFixedSizedInt);
return;
default:
// No special CPU features.
// This category includes some half-precision FCVT instructions that do
@ -410,8 +508,6 @@ void CPUFeaturesAuditor::VisitFPImmediate(const Instruction* instr) {
void CPUFeaturesAuditor::VisitFPIntegerConvert(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
// All of these instructions require FP.
scope.Record(CPUFeatures::kFP);
switch (instr->Mask(FPIntegerConvertMask)) {
case FCVTAS_wh:
case FCVTAS_xh:
@ -441,17 +537,23 @@ void CPUFeaturesAuditor::VisitFPIntegerConvert(const Instruction* instr) {
case SCVTF_hx:
case UCVTF_hw:
case UCVTF_hx:
scope.Record(CPUFeatures::kFP);
scope.Record(CPUFeatures::kFPHalf);
return;
case FMOV_dx:
scope.RecordOneOrBothOf(CPUFeatures::kFP, CPUFeatures::kNEON);
return;
case FMOV_d1_x:
case FMOV_x_d1:
scope.Record(CPUFeatures::kFP);
scope.Record(CPUFeatures::kNEON);
return;
case FJCVTZS:
scope.Record(CPUFeatures::kFP);
scope.Record(CPUFeatures::kJSCVT);
return;
default:
// No special CPU features.
scope.Record(CPUFeatures::kFP);
return;
}
}
@ -611,6 +713,12 @@ void CPUFeaturesAuditor::VisitNEON2RegMisc(const Instruction* instr) {
case NEON_FCMLT_zero:
scope.Record(CPUFeatures::kFP);
return;
case NEON_FRINT32X:
case NEON_FRINT32Z:
case NEON_FRINT64X:
case NEON_FRINT64Z:
scope.Record(CPUFeatures::kFP, CPUFeatures::kFrintToFixedSizedInt);
return;
default:
// No additional features.
return;
@ -628,6 +736,12 @@ void CPUFeaturesAuditor::VisitNEON3Different(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
// All of these instructions require NEON.
scope.Record(CPUFeatures::kNEON);
if (form_hash_ == "pmull_asimddiff_l"_h) {
if (instr->GetNEONSize() == 3) {
// Source is 1D or 2D, destination is 1Q.
scope.Record(CPUFeatures::kPmull1Q);
}
}
USE(instr);
}
@ -638,6 +752,17 @@ void CPUFeaturesAuditor::VisitNEON3Same(const Instruction* instr) {
if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
scope.Record(CPUFeatures::kFP);
}
switch (instr->Mask(NEON3SameFHMMask)) {
case NEON_FMLAL:
case NEON_FMLAL2:
case NEON_FMLSL:
case NEON_FMLSL2:
scope.Record(CPUFeatures::kFP, CPUFeatures::kNEONHalf, CPUFeatures::kFHM);
return;
default:
// No additional features.
return;
}
}
void CPUFeaturesAuditor::VisitNEON3SameExtra(const Instruction* instr) {
@ -699,7 +824,18 @@ void CPUFeaturesAuditor::VisitNEONByIndexedElement(const Instruction* instr) {
scope.Record(CPUFeatures::kRDM);
return;
default:
// Fall through to check other FP instructions.
// Fall through to check other instructions.
break;
}
switch (instr->Mask(NEONByIndexedElementFPLongMask)) {
case NEON_FMLAL_H_byelement:
case NEON_FMLAL2_H_byelement:
case NEON_FMLSL_H_byelement:
case NEON_FMLSL2_H_byelement:
scope.Record(CPUFeatures::kFP, CPUFeatures::kNEONHalf, CPUFeatures::kFHM);
return;
default:
// Fall through to check other instructions.
break;
}
switch (instr->Mask(NEONByIndexedElementFPMask)) {
@ -782,7 +918,6 @@ void CPUFeaturesAuditor::VisitNEONModifiedImmediate(const Instruction* instr) {
scope.Record(CPUFeatures::kFP);
if (instr->ExtractBit(11)) scope.Record(CPUFeatures::kNEONHalf);
}
USE(instr);
}
void CPUFeaturesAuditor::VisitNEONPerm(const Instruction* instr) {
@ -980,6 +1115,165 @@ void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) {
USE(instr);
}
// Most SVE visitors require only SVE.
#define VIXL_SIMPLE_SVE_VISITOR_LIST(V) \
V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets) \
V(SVE32BitGatherLoad_VectorPlusImm) \
V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets) \
V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets) \
V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets) \
V(SVE32BitGatherPrefetch_VectorPlusImm) \
V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets) \
V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets) \
V(SVE32BitScatterStore_VectorPlusImm) \
V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets) \
V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets) \
V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets) \
V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets) \
V(SVE64BitGatherLoad_VectorPlusImm) \
V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets) \
V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \
V(SVE64BitGatherPrefetch_VectorPlusImm) \
V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets) \
V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets) \
V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets) \
V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \
V(SVE64BitScatterStore_VectorPlusImm) \
V(SVEAddressGeneration) \
V(SVEBitwiseLogicalUnpredicated) \
V(SVEBitwiseShiftUnpredicated) \
V(SVEFFRInitialise) \
V(SVEFFRWriteFromPredicate) \
V(SVEFPAccumulatingReduction) \
V(SVEFPArithmeticUnpredicated) \
V(SVEFPCompareVectors) \
V(SVEFPCompareWithZero) \
V(SVEFPComplexAddition) \
V(SVEFPComplexMulAdd) \
V(SVEFPComplexMulAddIndex) \
V(SVEFPFastReduction) \
V(SVEFPMulIndex) \
V(SVEFPMulAdd) \
V(SVEFPMulAddIndex) \
V(SVEFPUnaryOpUnpredicated) \
V(SVEIncDecByPredicateCount) \
V(SVEIndexGeneration) \
V(SVEIntArithmeticUnpredicated) \
V(SVEIntCompareSignedImm) \
V(SVEIntCompareUnsignedImm) \
V(SVEIntCompareVectors) \
V(SVEIntMulAddPredicated) \
V(SVEIntMulAddUnpredicated) \
V(SVEIntReduction) \
V(SVEIntUnaryArithmeticPredicated) \
V(SVEMovprfx) \
V(SVEMulIndex) \
V(SVEPermuteVectorExtract) \
V(SVEPermuteVectorInterleaving) \
V(SVEPredicateCount) \
V(SVEPredicateLogical) \
V(SVEPropagateBreak) \
V(SVEStackFrameAdjustment) \
V(SVEStackFrameSize) \
V(SVEVectorSelect) \
V(SVEBitwiseLogical_Predicated) \
V(SVEBitwiseLogicalWithImm_Unpredicated) \
V(SVEBitwiseShiftByImm_Predicated) \
V(SVEBitwiseShiftByVector_Predicated) \
V(SVEBitwiseShiftByWideElements_Predicated) \
V(SVEBroadcastBitmaskImm) \
V(SVEBroadcastFPImm_Unpredicated) \
V(SVEBroadcastGeneralRegister) \
V(SVEBroadcastIndexElement) \
V(SVEBroadcastIntImm_Unpredicated) \
V(SVECompressActiveElements) \
V(SVEConditionallyBroadcastElementToVector) \
V(SVEConditionallyExtractElementToSIMDFPScalar) \
V(SVEConditionallyExtractElementToGeneralRegister) \
V(SVEConditionallyTerminateScalars) \
V(SVEConstructivePrefix_Unpredicated) \
V(SVEContiguousFirstFaultLoad_ScalarPlusScalar) \
V(SVEContiguousLoad_ScalarPlusImm) \
V(SVEContiguousLoad_ScalarPlusScalar) \
V(SVEContiguousNonFaultLoad_ScalarPlusImm) \
V(SVEContiguousNonTemporalLoad_ScalarPlusImm) \
V(SVEContiguousNonTemporalLoad_ScalarPlusScalar) \
V(SVEContiguousNonTemporalStore_ScalarPlusImm) \
V(SVEContiguousNonTemporalStore_ScalarPlusScalar) \
V(SVEContiguousPrefetch_ScalarPlusImm) \
V(SVEContiguousPrefetch_ScalarPlusScalar) \
V(SVEContiguousStore_ScalarPlusImm) \
V(SVEContiguousStore_ScalarPlusScalar) \
V(SVECopySIMDFPScalarRegisterToVector_Predicated) \
V(SVECopyFPImm_Predicated) \
V(SVECopyGeneralRegisterToVector_Predicated) \
V(SVECopyIntImm_Predicated) \
V(SVEElementCount) \
V(SVEExtractElementToSIMDFPScalarRegister) \
V(SVEExtractElementToGeneralRegister) \
V(SVEFPArithmetic_Predicated) \
V(SVEFPArithmeticWithImm_Predicated) \
V(SVEFPConvertPrecision) \
V(SVEFPConvertToInt) \
V(SVEFPExponentialAccelerator) \
V(SVEFPRoundToIntegralValue) \
V(SVEFPTrigMulAddCoefficient) \
V(SVEFPTrigSelectCoefficient) \
V(SVEFPUnaryOp) \
V(SVEIncDecRegisterByElementCount) \
V(SVEIncDecVectorByElementCount) \
V(SVEInsertSIMDFPScalarRegister) \
V(SVEInsertGeneralRegister) \
V(SVEIntAddSubtractImm_Unpredicated) \
V(SVEIntAddSubtractVectors_Predicated) \
V(SVEIntCompareScalarCountAndLimit) \
V(SVEIntConvertToFP) \
V(SVEIntDivideVectors_Predicated) \
V(SVEIntMinMaxImm_Unpredicated) \
V(SVEIntMinMaxDifference_Predicated) \
V(SVEIntMulImm_Unpredicated) \
V(SVEIntMulVectors_Predicated) \
V(SVELoadAndBroadcastElement) \
V(SVELoadAndBroadcastQOWord_ScalarPlusImm) \
V(SVELoadAndBroadcastQOWord_ScalarPlusScalar) \
V(SVELoadMultipleStructures_ScalarPlusImm) \
V(SVELoadMultipleStructures_ScalarPlusScalar) \
V(SVELoadPredicateRegister) \
V(SVELoadVectorRegister) \
V(SVEPartitionBreakCondition) \
V(SVEPermutePredicateElements) \
V(SVEPredicateFirstActive) \
V(SVEPredicateInitialize) \
V(SVEPredicateNextActive) \
V(SVEPredicateReadFromFFR_Predicated) \
V(SVEPredicateReadFromFFR_Unpredicated) \
V(SVEPredicateTest) \
V(SVEPredicateZero) \
V(SVEPropagateBreakToNextPartition) \
V(SVEReversePredicateElements) \
V(SVEReverseVectorElements) \
V(SVEReverseWithinElements) \
V(SVESaturatingIncDecRegisterByElementCount) \
V(SVESaturatingIncDecVectorByElementCount) \
V(SVEStoreMultipleStructures_ScalarPlusImm) \
V(SVEStoreMultipleStructures_ScalarPlusScalar) \
V(SVEStorePredicateRegister) \
V(SVEStoreVectorRegister) \
V(SVETableLookup) \
V(SVEUnpackPredicateElements) \
V(SVEUnpackVectorElements) \
V(SVEVectorSplice)
#define VIXL_DEFINE_SIMPLE_SVE_VISITOR(NAME) \
void CPUFeaturesAuditor::Visit##NAME(const Instruction* instr) { \
RecordInstructionFeaturesScope scope(this); \
scope.Record(CPUFeatures::kSVE); \
USE(instr); \
}
VIXL_SIMPLE_SVE_VISITOR_LIST(VIXL_DEFINE_SIMPLE_SVE_VISITOR)
#undef VIXL_DEFINE_SIMPLE_SVE_VISITOR
#undef VIXL_SIMPLE_SVE_VISITOR_LIST
void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
@ -1001,7 +1295,19 @@ void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
required.Combine(CPUFeatures::kPAuth);
break;
default:
if (instr->GetImmHint() == ESB) required.Combine(CPUFeatures::kRAS);
switch (instr->GetImmHint()) {
case ESB:
required.Combine(CPUFeatures::kRAS);
break;
case BTI:
case BTI_j:
case BTI_c:
case BTI_jc:
required.Combine(CPUFeatures::kBTI);
break;
default:
break;
}
break;
}
@ -1009,6 +1315,52 @@ void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
// features are not implemented, so we record the corresponding features
// only if they are available.
if (available_.Has(required)) scope.Record(required);
} else if (instr->Mask(SystemSysMask) == SYS) {
switch (instr->GetSysOp()) {
// DC instruction variants.
case CGVAC:
case CGDVAC:
case CGVAP:
case CGDVAP:
case CIGVAC:
case CIGDVAC:
case GVA:
case GZVA:
scope.Record(CPUFeatures::kMTE);
break;
case CVAP:
scope.Record(CPUFeatures::kDCPoP);
break;
case CVADP:
scope.Record(CPUFeatures::kDCCVADP);
break;
case IVAU:
case CVAC:
case CVAU:
case CIVAC:
case ZVA:
// No special CPU features.
break;
}
} else if (instr->Mask(SystemPStateFMask) == SystemPStateFixed) {
switch (instr->Mask(SystemPStateMask)) {
case CFINV:
scope.Record(CPUFeatures::kFlagM);
break;
case AXFLAG:
case XAFLAG:
scope.Record(CPUFeatures::kAXFlag);
break;
}
} else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
if (instr->Mask(SystemSysRegMask) == MRS) {
switch (instr->GetImmSystemRegister()) {
case RNDR:
case RNDRRS:
scope.Record(CPUFeatures::kRNG);
break;
}
}
}
}
@ -1049,11 +1401,447 @@ void CPUFeaturesAuditor::VisitUnconditionalBranchToRegister(
}
}
void CPUFeaturesAuditor::VisitReserved(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
USE(instr);
}
void CPUFeaturesAuditor::VisitUnimplemented(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
USE(instr);
}
void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) {
VIXL_ASSERT(metadata->count("form") > 0);
const std::string& form = (*metadata)["form"];
form_hash_ = Hash(form.c_str());
const FormToVisitorFnMap* fv = CPUFeaturesAuditor::GetFormToVisitorFnMap();
FormToVisitorFnMap::const_iterator it = fv->find(form_hash_);
if (it == fv->end()) {
RecordInstructionFeaturesScope scope(this);
std::map<uint32_t, const CPUFeatures> features = {
{"adclb_z_zzz"_h, CPUFeatures::kSVE2},
{"adclt_z_zzz"_h, CPUFeatures::kSVE2},
{"addhnb_z_zz"_h, CPUFeatures::kSVE2},
{"addhnt_z_zz"_h, CPUFeatures::kSVE2},
{"addp_z_p_zz"_h, CPUFeatures::kSVE2},
{"bcax_z_zzz"_h, CPUFeatures::kSVE2},
{"bdep_z_zz"_h,
CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
{"bext_z_zz"_h,
CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
{"bgrp_z_zz"_h,
CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
{"bsl1n_z_zzz"_h, CPUFeatures::kSVE2},
{"bsl2n_z_zzz"_h, CPUFeatures::kSVE2},
{"bsl_z_zzz"_h, CPUFeatures::kSVE2},
{"cadd_z_zz"_h, CPUFeatures::kSVE2},
{"cdot_z_zzz"_h, CPUFeatures::kSVE2},
{"cdot_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"cdot_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"cmla_z_zzz"_h, CPUFeatures::kSVE2},
{"cmla_z_zzzi_h"_h, CPUFeatures::kSVE2},
{"cmla_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"eor3_z_zzz"_h, CPUFeatures::kSVE2},
{"eorbt_z_zz"_h, CPUFeatures::kSVE2},
{"eortb_z_zz"_h, CPUFeatures::kSVE2},
{"ext_z_zi_con"_h, CPUFeatures::kSVE2},
{"faddp_z_p_zz"_h, CPUFeatures::kSVE2},
{"fcvtlt_z_p_z_h2s"_h, CPUFeatures::kSVE2},
{"fcvtlt_z_p_z_s2d"_h, CPUFeatures::kSVE2},
{"fcvtnt_z_p_z_d2s"_h, CPUFeatures::kSVE2},
{"fcvtnt_z_p_z_s2h"_h, CPUFeatures::kSVE2},
{"fcvtx_z_p_z_d2s"_h, CPUFeatures::kSVE2},
{"fcvtxnt_z_p_z_d2s"_h, CPUFeatures::kSVE2},
{"flogb_z_p_z"_h, CPUFeatures::kSVE2},
{"fmaxnmp_z_p_zz"_h, CPUFeatures::kSVE2},
{"fmaxp_z_p_zz"_h, CPUFeatures::kSVE2},
{"fminnmp_z_p_zz"_h, CPUFeatures::kSVE2},
{"fminp_z_p_zz"_h, CPUFeatures::kSVE2},
{"fmlalb_z_zzz"_h, CPUFeatures::kSVE2},
{"fmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"fmlalt_z_zzz"_h, CPUFeatures::kSVE2},
{"fmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"fmlslb_z_zzz"_h, CPUFeatures::kSVE2},
{"fmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"fmlslt_z_zzz"_h, CPUFeatures::kSVE2},
{"fmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"histcnt_z_p_zz"_h, CPUFeatures::kSVE2},
{"histseg_z_zz"_h, CPUFeatures::kSVE2},
{"ldnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
{"ldnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
{"ldnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
{"ldnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
{"ldnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
{"ldnt1sb_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
{"ldnt1sb_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
{"ldnt1sh_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
{"ldnt1sh_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
{"ldnt1sw_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
{"ldnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
{"ldnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
{"match_p_p_zz"_h, CPUFeatures::kSVE2},
{"mla_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"mla_z_zzzi_h"_h, CPUFeatures::kSVE2},
{"mla_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"mls_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"mls_z_zzzi_h"_h, CPUFeatures::kSVE2},
{"mls_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"mul_z_zz"_h, CPUFeatures::kSVE2},
{"mul_z_zzi_d"_h, CPUFeatures::kSVE2},
{"mul_z_zzi_h"_h, CPUFeatures::kSVE2},
{"mul_z_zzi_s"_h, CPUFeatures::kSVE2},
{"nbsl_z_zzz"_h, CPUFeatures::kSVE2},
{"nmatch_p_p_zz"_h, CPUFeatures::kSVE2},
{"pmul_z_zz"_h, CPUFeatures::kSVE2},
{"pmullb_z_zz"_h, CPUFeatures::kSVE2},
{"pmullt_z_zz"_h, CPUFeatures::kSVE2},
{"raddhnb_z_zz"_h, CPUFeatures::kSVE2},
{"raddhnt_z_zz"_h, CPUFeatures::kSVE2},
{"rshrnb_z_zi"_h, CPUFeatures::kSVE2},
{"rshrnt_z_zi"_h, CPUFeatures::kSVE2},
{"rsubhnb_z_zz"_h, CPUFeatures::kSVE2},
{"rsubhnt_z_zz"_h, CPUFeatures::kSVE2},
{"saba_z_zzz"_h, CPUFeatures::kSVE2},
{"sabalb_z_zzz"_h, CPUFeatures::kSVE2},
{"sabalt_z_zzz"_h, CPUFeatures::kSVE2},
{"sabdlb_z_zz"_h, CPUFeatures::kSVE2},
{"sabdlt_z_zz"_h, CPUFeatures::kSVE2},
{"sadalp_z_p_z"_h, CPUFeatures::kSVE2},
{"saddlb_z_zz"_h, CPUFeatures::kSVE2},
{"saddlbt_z_zz"_h, CPUFeatures::kSVE2},
{"saddlt_z_zz"_h, CPUFeatures::kSVE2},
{"saddwb_z_zz"_h, CPUFeatures::kSVE2},
{"saddwt_z_zz"_h, CPUFeatures::kSVE2},
{"sbclb_z_zzz"_h, CPUFeatures::kSVE2},
{"sbclt_z_zzz"_h, CPUFeatures::kSVE2},
{"shadd_z_p_zz"_h, CPUFeatures::kSVE2},
{"shrnb_z_zi"_h, CPUFeatures::kSVE2},
{"shrnt_z_zi"_h, CPUFeatures::kSVE2},
{"shsub_z_p_zz"_h, CPUFeatures::kSVE2},
{"shsubr_z_p_zz"_h, CPUFeatures::kSVE2},
{"sli_z_zzi"_h, CPUFeatures::kSVE2},
{"smaxp_z_p_zz"_h, CPUFeatures::kSVE2},
{"sminp_z_p_zz"_h, CPUFeatures::kSVE2},
{"smlalb_z_zzz"_h, CPUFeatures::kSVE2},
{"smlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"smlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"smlalt_z_zzz"_h, CPUFeatures::kSVE2},
{"smlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"smlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"smlslb_z_zzz"_h, CPUFeatures::kSVE2},
{"smlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"smlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"smlslt_z_zzz"_h, CPUFeatures::kSVE2},
{"smlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"smlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"smulh_z_zz"_h, CPUFeatures::kSVE2},
{"smullb_z_zz"_h, CPUFeatures::kSVE2},
{"smullb_z_zzi_d"_h, CPUFeatures::kSVE2},
{"smullb_z_zzi_s"_h, CPUFeatures::kSVE2},
{"smullt_z_zz"_h, CPUFeatures::kSVE2},
{"smullt_z_zzi_d"_h, CPUFeatures::kSVE2},
{"smullt_z_zzi_s"_h, CPUFeatures::kSVE2},
{"splice_z_p_zz_con"_h, CPUFeatures::kSVE2},
{"sqabs_z_p_z"_h, CPUFeatures::kSVE2},
{"sqadd_z_p_zz"_h, CPUFeatures::kSVE2},
{"sqcadd_z_zz"_h, CPUFeatures::kSVE2},
{"sqdmlalb_z_zzz"_h, CPUFeatures::kSVE2},
{"sqdmlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"sqdmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"sqdmlalbt_z_zzz"_h, CPUFeatures::kSVE2},
{"sqdmlalt_z_zzz"_h, CPUFeatures::kSVE2},
{"sqdmlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"sqdmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"sqdmlslb_z_zzz"_h, CPUFeatures::kSVE2},
{"sqdmlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"sqdmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"sqdmlslbt_z_zzz"_h, CPUFeatures::kSVE2},
{"sqdmlslt_z_zzz"_h, CPUFeatures::kSVE2},
{"sqdmlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"sqdmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"sqdmulh_z_zz"_h, CPUFeatures::kSVE2},
{"sqdmulh_z_zzi_d"_h, CPUFeatures::kSVE2},
{"sqdmulh_z_zzi_h"_h, CPUFeatures::kSVE2},
{"sqdmulh_z_zzi_s"_h, CPUFeatures::kSVE2},
{"sqdmullb_z_zz"_h, CPUFeatures::kSVE2},
{"sqdmullb_z_zzi_d"_h, CPUFeatures::kSVE2},
{"sqdmullb_z_zzi_s"_h, CPUFeatures::kSVE2},
{"sqdmullt_z_zz"_h, CPUFeatures::kSVE2},
{"sqdmullt_z_zzi_d"_h, CPUFeatures::kSVE2},
{"sqdmullt_z_zzi_s"_h, CPUFeatures::kSVE2},
{"sqneg_z_p_z"_h, CPUFeatures::kSVE2},
{"sqrdcmlah_z_zzz"_h, CPUFeatures::kSVE2},
{"sqrdcmlah_z_zzzi_h"_h, CPUFeatures::kSVE2},
{"sqrdcmlah_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"sqrdmlah_z_zzz"_h, CPUFeatures::kSVE2},
{"sqrdmlah_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"sqrdmlah_z_zzzi_h"_h, CPUFeatures::kSVE2},
{"sqrdmlah_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"sqrdmlsh_z_zzz"_h, CPUFeatures::kSVE2},
{"sqrdmlsh_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"sqrdmlsh_z_zzzi_h"_h, CPUFeatures::kSVE2},
{"sqrdmlsh_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"sqrdmulh_z_zz"_h, CPUFeatures::kSVE2},
{"sqrdmulh_z_zzi_d"_h, CPUFeatures::kSVE2},
{"sqrdmulh_z_zzi_h"_h, CPUFeatures::kSVE2},
{"sqrdmulh_z_zzi_s"_h, CPUFeatures::kSVE2},
{"sqrshl_z_p_zz"_h, CPUFeatures::kSVE2},
{"sqrshlr_z_p_zz"_h, CPUFeatures::kSVE2},
{"sqrshrnb_z_zi"_h, CPUFeatures::kSVE2},
{"sqrshrnt_z_zi"_h, CPUFeatures::kSVE2},
{"sqrshrunb_z_zi"_h, CPUFeatures::kSVE2},
{"sqrshrunt_z_zi"_h, CPUFeatures::kSVE2},
{"sqshl_z_p_zi"_h, CPUFeatures::kSVE2},
{"sqshl_z_p_zz"_h, CPUFeatures::kSVE2},
{"sqshlr_z_p_zz"_h, CPUFeatures::kSVE2},
{"sqshlu_z_p_zi"_h, CPUFeatures::kSVE2},
{"sqshrnb_z_zi"_h, CPUFeatures::kSVE2},
{"sqshrnt_z_zi"_h, CPUFeatures::kSVE2},
{"sqshrunb_z_zi"_h, CPUFeatures::kSVE2},
{"sqshrunt_z_zi"_h, CPUFeatures::kSVE2},
{"sqsub_z_p_zz"_h, CPUFeatures::kSVE2},
{"sqsubr_z_p_zz"_h, CPUFeatures::kSVE2},
{"sqxtnb_z_zz"_h, CPUFeatures::kSVE2},
{"sqxtnt_z_zz"_h, CPUFeatures::kSVE2},
{"sqxtunb_z_zz"_h, CPUFeatures::kSVE2},
{"sqxtunt_z_zz"_h, CPUFeatures::kSVE2},
{"srhadd_z_p_zz"_h, CPUFeatures::kSVE2},
{"sri_z_zzi"_h, CPUFeatures::kSVE2},
{"srshl_z_p_zz"_h, CPUFeatures::kSVE2},
{"srshlr_z_p_zz"_h, CPUFeatures::kSVE2},
{"srshr_z_p_zi"_h, CPUFeatures::kSVE2},
{"srsra_z_zi"_h, CPUFeatures::kSVE2},
{"sshllb_z_zi"_h, CPUFeatures::kSVE2},
{"sshllt_z_zi"_h, CPUFeatures::kSVE2},
{"ssra_z_zi"_h, CPUFeatures::kSVE2},
{"ssublb_z_zz"_h, CPUFeatures::kSVE2},
{"ssublbt_z_zz"_h, CPUFeatures::kSVE2},
{"ssublt_z_zz"_h, CPUFeatures::kSVE2},
{"ssubltb_z_zz"_h, CPUFeatures::kSVE2},
{"ssubwb_z_zz"_h, CPUFeatures::kSVE2},
{"ssubwt_z_zz"_h, CPUFeatures::kSVE2},
{"stnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
{"stnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
{"stnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
{"stnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
{"stnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
{"stnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
{"stnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
{"subhnb_z_zz"_h, CPUFeatures::kSVE2},
{"subhnt_z_zz"_h, CPUFeatures::kSVE2},
{"suqadd_z_p_zz"_h, CPUFeatures::kSVE2},
{"tbl_z_zz_2"_h, CPUFeatures::kSVE2},
{"tbx_z_zz"_h, CPUFeatures::kSVE2},
{"uaba_z_zzz"_h, CPUFeatures::kSVE2},
{"uabalb_z_zzz"_h, CPUFeatures::kSVE2},
{"uabalt_z_zzz"_h, CPUFeatures::kSVE2},
{"uabdlb_z_zz"_h, CPUFeatures::kSVE2},
{"uabdlt_z_zz"_h, CPUFeatures::kSVE2},
{"uadalp_z_p_z"_h, CPUFeatures::kSVE2},
{"uaddlb_z_zz"_h, CPUFeatures::kSVE2},
{"uaddlt_z_zz"_h, CPUFeatures::kSVE2},
{"uaddwb_z_zz"_h, CPUFeatures::kSVE2},
{"uaddwt_z_zz"_h, CPUFeatures::kSVE2},
{"uhadd_z_p_zz"_h, CPUFeatures::kSVE2},
{"uhsub_z_p_zz"_h, CPUFeatures::kSVE2},
{"uhsubr_z_p_zz"_h, CPUFeatures::kSVE2},
{"umaxp_z_p_zz"_h, CPUFeatures::kSVE2},
{"uminp_z_p_zz"_h, CPUFeatures::kSVE2},
{"umlalb_z_zzz"_h, CPUFeatures::kSVE2},
{"umlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"umlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"umlalt_z_zzz"_h, CPUFeatures::kSVE2},
{"umlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"umlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"umlslb_z_zzz"_h, CPUFeatures::kSVE2},
{"umlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"umlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"umlslt_z_zzz"_h, CPUFeatures::kSVE2},
{"umlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
{"umlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
{"umulh_z_zz"_h, CPUFeatures::kSVE2},
{"umullb_z_zz"_h, CPUFeatures::kSVE2},
{"umullb_z_zzi_d"_h, CPUFeatures::kSVE2},
{"umullb_z_zzi_s"_h, CPUFeatures::kSVE2},
{"umullt_z_zz"_h, CPUFeatures::kSVE2},
{"umullt_z_zzi_d"_h, CPUFeatures::kSVE2},
{"umullt_z_zzi_s"_h, CPUFeatures::kSVE2},
{"uqadd_z_p_zz"_h, CPUFeatures::kSVE2},
{"uqrshl_z_p_zz"_h, CPUFeatures::kSVE2},
{"uqrshlr_z_p_zz"_h, CPUFeatures::kSVE2},
{"uqrshrnb_z_zi"_h, CPUFeatures::kSVE2},
{"uqrshrnt_z_zi"_h, CPUFeatures::kSVE2},
{"uqshl_z_p_zi"_h, CPUFeatures::kSVE2},
{"uqshl_z_p_zz"_h, CPUFeatures::kSVE2},
{"uqshlr_z_p_zz"_h, CPUFeatures::kSVE2},
{"uqshrnb_z_zi"_h, CPUFeatures::kSVE2},
{"uqshrnt_z_zi"_h, CPUFeatures::kSVE2},
{"uqsub_z_p_zz"_h, CPUFeatures::kSVE2},
{"uqsubr_z_p_zz"_h, CPUFeatures::kSVE2},
{"uqxtnb_z_zz"_h, CPUFeatures::kSVE2},
{"uqxtnt_z_zz"_h, CPUFeatures::kSVE2},
{"urecpe_z_p_z"_h, CPUFeatures::kSVE2},
{"urhadd_z_p_zz"_h, CPUFeatures::kSVE2},
{"urshl_z_p_zz"_h, CPUFeatures::kSVE2},
{"urshlr_z_p_zz"_h, CPUFeatures::kSVE2},
{"urshr_z_p_zi"_h, CPUFeatures::kSVE2},
{"ursqrte_z_p_z"_h, CPUFeatures::kSVE2},
{"ursra_z_zi"_h, CPUFeatures::kSVE2},
{"ushllb_z_zi"_h, CPUFeatures::kSVE2},
{"ushllt_z_zi"_h, CPUFeatures::kSVE2},
{"usqadd_z_p_zz"_h, CPUFeatures::kSVE2},
{"usra_z_zi"_h, CPUFeatures::kSVE2},
{"usublb_z_zz"_h, CPUFeatures::kSVE2},
{"usublt_z_zz"_h, CPUFeatures::kSVE2},
{"usubwb_z_zz"_h, CPUFeatures::kSVE2},
{"usubwt_z_zz"_h, CPUFeatures::kSVE2},
{"whilege_p_p_rr"_h, CPUFeatures::kSVE2},
{"whilegt_p_p_rr"_h, CPUFeatures::kSVE2},
{"whilehi_p_p_rr"_h, CPUFeatures::kSVE2},
{"whilehs_p_p_rr"_h, CPUFeatures::kSVE2},
{"whilerw_p_rr"_h, CPUFeatures::kSVE2},
{"whilewr_p_rr"_h, CPUFeatures::kSVE2},
{"xar_z_zzi"_h, CPUFeatures::kSVE2},
{"smmla_z_zzz"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
{"ummla_z_zzz"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
{"usmmla_z_zzz"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
{"fmmla_z_zzz_s"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF32MM)},
{"fmmla_z_zzz_d"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
{"smmla_asimdsame2_g"_h,
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
{"ummla_asimdsame2_g"_h,
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
{"usmmla_asimdsame2_g"_h,
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
{"ld1row_z_p_bi_u32"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
{"ld1row_z_p_br_contiguous"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
{"ld1rod_z_p_bi_u64"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
{"ld1rod_z_p_br_contiguous"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
{"ld1rob_z_p_bi_u8"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
{"ld1rob_z_p_br_contiguous"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
{"ld1roh_z_p_bi_u16"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
{"ld1roh_z_p_br_contiguous"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
{"usdot_asimdsame2_d"_h,
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
{"sudot_asimdelem_d"_h,
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
{"usdot_asimdelem_d"_h,
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
{"usdot_z_zzz_s"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
{"usdot_z_zzzi_s"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
{"sudot_z_zzzi_s"_h,
CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
{"addg_64_addsub_immtags"_h, CPUFeatures::kMTE},
{"gmi_64g_dp_2src"_h, CPUFeatures::kMTE},
{"irg_64i_dp_2src"_h, CPUFeatures::kMTE},
{"ldg_64loffset_ldsttags"_h, CPUFeatures::kMTE},
{"st2g_64soffset_ldsttags"_h, CPUFeatures::kMTE},
{"st2g_64spost_ldsttags"_h, CPUFeatures::kMTE},
{"st2g_64spre_ldsttags"_h, CPUFeatures::kMTE},
{"stgp_64_ldstpair_off"_h, CPUFeatures::kMTE},
{"stgp_64_ldstpair_post"_h, CPUFeatures::kMTE},
{"stgp_64_ldstpair_pre"_h, CPUFeatures::kMTE},
{"stg_64soffset_ldsttags"_h, CPUFeatures::kMTE},
{"stg_64spost_ldsttags"_h, CPUFeatures::kMTE},
{"stg_64spre_ldsttags"_h, CPUFeatures::kMTE},
{"stz2g_64soffset_ldsttags"_h, CPUFeatures::kMTE},
{"stz2g_64spost_ldsttags"_h, CPUFeatures::kMTE},
{"stz2g_64spre_ldsttags"_h, CPUFeatures::kMTE},
{"stzg_64soffset_ldsttags"_h, CPUFeatures::kMTE},
{"stzg_64spost_ldsttags"_h, CPUFeatures::kMTE},
{"stzg_64spre_ldsttags"_h, CPUFeatures::kMTE},
{"subg_64_addsub_immtags"_h, CPUFeatures::kMTE},
{"subps_64s_dp_2src"_h, CPUFeatures::kMTE},
{"subp_64s_dp_2src"_h, CPUFeatures::kMTE},
{"cpyen_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyern_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyewn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpye_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfen_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfern_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfewn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfe_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfmn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfmrn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfmwn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfm_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfpn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfprn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfpwn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyfp_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpymn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpymrn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpymwn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpym_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpypn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyprn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpypwn_cpy_memcms"_h, CPUFeatures::kMOPS},
{"cpyp_cpy_memcms"_h, CPUFeatures::kMOPS},
{"seten_set_memcms"_h, CPUFeatures::kMOPS},
{"sete_set_memcms"_h, CPUFeatures::kMOPS},
{"setgen_set_memcms"_h,
CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
{"setge_set_memcms"_h,
CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
{"setgmn_set_memcms"_h,
CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
{"setgm_set_memcms"_h,
CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
{"setgpn_set_memcms"_h,
CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
{"setgp_set_memcms"_h,
CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
{"setmn_set_memcms"_h, CPUFeatures::kMOPS},
{"setm_set_memcms"_h, CPUFeatures::kMOPS},
{"setpn_set_memcms"_h, CPUFeatures::kMOPS},
{"setp_set_memcms"_h, CPUFeatures::kMOPS},
{"abs_32_dp_1src"_h, CPUFeatures::kCSSC},
{"abs_64_dp_1src"_h, CPUFeatures::kCSSC},
{"cnt_32_dp_1src"_h, CPUFeatures::kCSSC},
{"cnt_64_dp_1src"_h, CPUFeatures::kCSSC},
{"ctz_32_dp_1src"_h, CPUFeatures::kCSSC},
{"ctz_64_dp_1src"_h, CPUFeatures::kCSSC},
{"smax_32_dp_2src"_h, CPUFeatures::kCSSC},
{"smax_64_dp_2src"_h, CPUFeatures::kCSSC},
{"smin_32_dp_2src"_h, CPUFeatures::kCSSC},
{"smin_64_dp_2src"_h, CPUFeatures::kCSSC},
{"umax_32_dp_2src"_h, CPUFeatures::kCSSC},
{"umax_64_dp_2src"_h, CPUFeatures::kCSSC},
{"umin_32_dp_2src"_h, CPUFeatures::kCSSC},
{"umin_64_dp_2src"_h, CPUFeatures::kCSSC},
{"smax_32_minmax_imm"_h, CPUFeatures::kCSSC},
{"smax_64_minmax_imm"_h, CPUFeatures::kCSSC},
{"smin_32_minmax_imm"_h, CPUFeatures::kCSSC},
{"smin_64_minmax_imm"_h, CPUFeatures::kCSSC},
{"umax_32u_minmax_imm"_h, CPUFeatures::kCSSC},
{"umax_64u_minmax_imm"_h, CPUFeatures::kCSSC},
{"umin_32u_minmax_imm"_h, CPUFeatures::kCSSC},
{"umin_64u_minmax_imm"_h, CPUFeatures::kCSSC},
};
if (features.count(form_hash_) > 0) {
scope.Record(features[form_hash_]);
}
} else {
(it->second)(this, instr);
}
}
} // namespace aarch64
} // namespace vixl

View File

@ -0,0 +1,499 @@
// Copyright 2023, VIXL authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of ARM Limited nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
#include "debugger-aarch64.h"
#include <cerrno>
#include <cmath>
#include <cstring>
#include <errno.h>
#include <limits>
#include <unistd.h>
namespace vixl {
namespace aarch64 {
Debugger::Debugger(Simulator* sim)
: sim_(sim), input_stream_(&std::cin), ostream_(sim->GetOutputStream()) {
// Register all basic debugger commands.
RegisterCmd<HelpCmd>();
RegisterCmd<BreakCmd>();
RegisterCmd<StepCmd>();
RegisterCmd<ContinueCmd>();
RegisterCmd<PrintCmd>();
RegisterCmd<TraceCmd>();
RegisterCmd<GdbCmd>();
}
template <class T>
void Debugger::RegisterCmd() {
auto new_command = std::make_unique<T>(sim_);
// Check that the new command word and alias, don't already exist.
std::string_view new_cmd_word = new_command->GetCommandWord();
std::string_view new_cmd_alias = new_command->GetCommandAlias();
for (const auto& cmd : debugger_cmds_) {
std::string_view cmd_word = cmd->GetCommandWord();
std::string_view cmd_alias = cmd->GetCommandAlias();
if (new_cmd_word == cmd_word) {
VIXL_ABORT_WITH_MSG("Command word matches an existing command word.");
} else if (new_cmd_word == cmd_alias) {
VIXL_ABORT_WITH_MSG("Command word matches an existing command alias.");
}
if (new_cmd_alias != "") {
if (new_cmd_alias == cmd_word) {
VIXL_ABORT_WITH_MSG("Command alias matches an existing command word.");
} else if (new_cmd_alias == cmd_alias) {
VIXL_ABORT_WITH_MSG("Command alias matches an existing command alias.");
}
}
}
debugger_cmds_.push_back(std::move(new_command));
}
bool Debugger::IsAtBreakpoint() const {
return IsBreakpoint(reinterpret_cast<uint64_t>(sim_->ReadPc()));
}
void Debugger::Debug() {
DebugReturn done = DebugContinue;
while (done == DebugContinue) {
// Disassemble the next instruction to execute.
PrintDisassembler print_disasm = PrintDisassembler(ostream_);
print_disasm.Disassemble(sim_->ReadPc());
// Read the command line.
fprintf(ostream_, "sim> ");
std::string line;
std::getline(*input_stream_, line);
// Remove all control characters from the command string.
line.erase(std::remove_if(line.begin(),
line.end(),
[](char c) { return std::iscntrl(c); }),
line.end());
// Assume input from std::cin has already been output (e.g: by a terminal)
// but input from elsewhere (e.g: from a testing input stream) has not.
if (input_stream_ != &std::cin) {
fprintf(ostream_, "%s\n", line.c_str());
}
// Parse the command into tokens.
std::vector<std::string> tokenized_cmd = Tokenize(line);
if (!tokenized_cmd.empty()) {
done = ExecDebugCommand(tokenized_cmd);
}
}
}
std::optional<uint64_t> Debugger::ParseUint64String(std::string_view uint64_str,
int base) {
// Clear any previous errors.
errno = 0;
// strtoull uses 0 to indicate that no conversion was possible so first
// check that the string isn't zero.
if (IsZeroUint64String(uint64_str, base)) {
return 0;
}
// Cannot use stoi as it might not be possible to use exceptions.
char* end;
uint64_t value = std::strtoull(uint64_str.data(), &end, base);
if (value == 0 || *end != '\0' || errno == ERANGE) {
return std::nullopt;
}
return value;
}
std::optional<Debugger::RegisterParsedFormat> Debugger::ParseRegString(
std::string_view reg_str) {
// A register should only have 2 (e.g: X0) or 3 (e.g: X31) characters.
if (reg_str.size() < 2 || reg_str.size() > 3) {
return std::nullopt;
}
// Check for aliases of registers.
if (reg_str == "lr") {
return {{'X', kLinkRegCode}};
} else if (reg_str == "sp") {
return {{'X', kSpRegCode}};
}
unsigned max_reg_num;
char reg_prefix = std::toupper(reg_str.front());
switch (reg_prefix) {
case 'W':
VIXL_FALLTHROUGH();
case 'X':
max_reg_num = kNumberOfRegisters - 1;
break;
case 'V':
max_reg_num = kNumberOfVRegisters - 1;
break;
case 'Z':
max_reg_num = kNumberOfZRegisters - 1;
break;
case 'P':
max_reg_num = kNumberOfPRegisters - 1;
break;
default:
return std::nullopt;
}
std::string_view str_code = reg_str.substr(1, reg_str.size());
auto reg_code = ParseUint64String(str_code, 10);
if (!reg_code) {
return std::nullopt;
}
if (*reg_code > max_reg_num) {
return std::nullopt;
}
return {{reg_prefix, *reg_code}};
}
void Debugger::PrintUsage() {
for (const auto& cmd : debugger_cmds_) {
// Print commands in the following format:
// foo / f
// foo <arg>
// A description of the foo command.
//
std::string_view cmd_word = cmd->GetCommandWord();
std::string_view cmd_alias = cmd->GetCommandAlias();
if (cmd_alias != "") {
fprintf(ostream_, "%s / %s\n", cmd_word.data(), cmd_alias.data());
} else {
fprintf(ostream_, "%s\n", cmd_word.data());
}
std::string_view args_str = cmd->GetArgsString();
if (args_str != "") {
fprintf(ostream_, "\t%s %s\n", cmd_word.data(), args_str.data());
}
std::string_view description = cmd->GetDescription();
if (description != "") {
fprintf(ostream_, "\t%s\n", description.data());
}
}
}
std::vector<std::string> Debugger::Tokenize(std::string_view input_line,
char separator) {
std::vector<std::string> words;
if (input_line.empty()) {
return words;
}
for (auto separator_pos = input_line.find(separator);
separator_pos != input_line.npos;
separator_pos = input_line.find(separator)) {
// Skip consecutive, repeated separators.
if (separator_pos != 0) {
words.push_back(std::string{input_line.substr(0, separator_pos)});
}
// Remove characters up to and including the separator.
input_line.remove_prefix(separator_pos + 1);
}
// Add the rest of the string to the vector.
words.push_back(std::string{input_line});
return words;
}
DebugReturn Debugger::ExecDebugCommand(
const std::vector<std::string>& tokenized_cmd) {
std::string cmd_word = tokenized_cmd.front();
for (const auto& cmd : debugger_cmds_) {
if (cmd_word == cmd->GetCommandWord() ||
cmd_word == cmd->GetCommandAlias()) {
const std::vector<std::string> args(tokenized_cmd.begin() + 1,
tokenized_cmd.end());
// Call the handler for the command and pass the arguments.
return cmd->Action(args);
}
}
fprintf(ostream_, "Error: command '%s' not found\n", cmd_word.c_str());
return DebugContinue;
}
bool Debugger::IsZeroUint64String(std::string_view uint64_str, int base) {
// Remove any hex prefixes.
if (base == 0 || base == 16) {
std::string_view prefix = uint64_str.substr(0, 2);
if (prefix == "0x" || prefix == "0X") {
uint64_str.remove_prefix(2);
}
}
if (uint64_str.empty()) {
return false;
}
// Check all remaining digits in the string for anything other than zero.
for (char c : uint64_str) {
if (c != '0') {
return false;
}
}
return true;
}
DebuggerCmd::DebuggerCmd(Simulator* sim,
std::string cmd_word,
std::string cmd_alias,
std::string args_str,
std::string description)
: sim_(sim),
ostream_(sim->GetOutputStream()),
command_word_(cmd_word),
command_alias_(cmd_alias),
args_str_(args_str),
description_(description) {}
DebugReturn HelpCmd::Action(const std::vector<std::string>& args) {
USE(args);
sim_->GetDebugger()->PrintUsage();
return DebugContinue;
}
DebugReturn BreakCmd::Action(const std::vector<std::string>& args) {
if (args.size() != 1) {
fprintf(ostream_, "Error: Use `break <address>` to set a breakpoint\n");
return DebugContinue;
}
std::string arg = args.front();
auto break_addr = Debugger::ParseUint64String(arg);
if (!break_addr) {
fprintf(ostream_, "Error: Use `break <address>` to set a breakpoint\n");
return DebugContinue;
}
if (sim_->GetDebugger()->IsBreakpoint(*break_addr)) {
sim_->GetDebugger()->RemoveBreakpoint(*break_addr);
fprintf(ostream_,
"Breakpoint successfully removed at: 0x%" PRIx64 "\n",
*break_addr);
} else {
sim_->GetDebugger()->RegisterBreakpoint(*break_addr);
fprintf(ostream_,
"Breakpoint successfully added at: 0x%" PRIx64 "\n",
*break_addr);
}
return DebugContinue;
}
DebugReturn StepCmd::Action(const std::vector<std::string>& args) {
if (args.size() > 1) {
fprintf(ostream_,
"Error: use `step [number]` to step an optional number of"
" instructions\n");
return DebugContinue;
}
// Step 1 instruction by default.
std::optional<uint64_t> number_of_instructions_to_execute{1};
if (args.size() == 1) {
// Parse the argument to step that number of instructions.
std::string arg = args.front();
number_of_instructions_to_execute = Debugger::ParseUint64String(arg);
if (!number_of_instructions_to_execute) {
fprintf(ostream_,
"Error: use `step [number]` to step an optional number of"
" instructions\n");
return DebugContinue;
}
}
while (!sim_->IsSimulationFinished() &&
*number_of_instructions_to_execute > 0) {
sim_->ExecuteInstruction();
(*number_of_instructions_to_execute)--;
// The first instruction has already been printed by Debug() so only
// enable instruction tracing after the first instruction has been
// executed.
sim_->SetTraceParameters(sim_->GetTraceParameters() | LOG_DISASM);
}
// Disable instruction tracing after all instructions have been executed.
sim_->SetTraceParameters(sim_->GetTraceParameters() & ~LOG_DISASM);
if (sim_->IsSimulationFinished()) {
fprintf(ostream_,
"Debugger at the end of simulation, leaving simulator...\n");
return DebugExit;
}
return DebugContinue;
}
DebugReturn ContinueCmd::Action(const std::vector<std::string>& args) {
USE(args);
fprintf(ostream_, "Continuing...\n");
if (sim_->GetDebugger()->IsAtBreakpoint()) {
// This breakpoint has already been hit, so execute it before continuing.
sim_->ExecuteInstruction();
}
return DebugExit;
}
DebugReturn PrintCmd::Action(const std::vector<std::string>& args) {
if (args.size() != 1) {
fprintf(ostream_,
"Error: use `print <register|all>` to print the contents of a"
" specific register or all registers.\n");
return DebugContinue;
}
if (args.front() == "all") {
sim_->PrintRegisters();
sim_->PrintZRegisters();
} else if (args.front() == "system") {
sim_->PrintSystemRegisters();
} else if (args.front() == "ffr") {
sim_->PrintFFR();
} else {
auto reg = Debugger::ParseRegString(args.front());
if (!reg) {
fprintf(ostream_,
"Error: incorrect register format, use e.g: X0, x0, etc...\n");
return DebugContinue;
}
// Ensure the stack pointer is printed instead of the zero register.
if ((*reg).second == kSpRegCode) {
(*reg).second = kSPRegInternalCode;
}
// Registers are printed in different ways depending on their type.
switch ((*reg).first) {
case 'W':
sim_->PrintRegister(
(*reg).second,
static_cast<Simulator::PrintRegisterFormat>(
Simulator::PrintRegisterFormat::kPrintWReg |
Simulator::PrintRegisterFormat::kPrintRegPartial));
break;
case 'X':
sim_->PrintRegister((*reg).second,
Simulator::PrintRegisterFormat::kPrintXReg);
break;
case 'V':
sim_->PrintVRegister((*reg).second);
break;
case 'Z':
sim_->PrintZRegister((*reg).second);
break;
case 'P':
sim_->PrintPRegister((*reg).second);
break;
default:
// ParseRegString should only allow valid register characters.
VIXL_UNREACHABLE();
}
}
return DebugContinue;
}
DebugReturn TraceCmd::Action(const std::vector<std::string>& args) {
if (args.size() != 0) {
fprintf(ostream_, "Error: use `trace` to toggle tracing of registers.\n");
return DebugContinue;
}
int trace_params = sim_->GetTraceParameters();
if ((trace_params & LOG_ALL) != LOG_ALL) {
fprintf(ostream_,
"Enabling disassembly, registers and memory write tracing\n");
sim_->SetTraceParameters(trace_params | LOG_ALL);
} else {
fprintf(ostream_,
"Disabling disassembly, registers and memory write tracing\n");
sim_->SetTraceParameters(trace_params & ~LOG_ALL);
}
return DebugContinue;
}
DebugReturn GdbCmd::Action(const std::vector<std::string>& args) {
if (args.size() != 0) {
fprintf(ostream_,
"Error: use `gdb` to enter GDB from the simulator debugger.\n");
return DebugContinue;
}
HostBreakpoint();
return DebugContinue;
}
} // namespace aarch64
} // namespace vixl
#endif // VIXL_INCLUDE_SIMULATOR_AARCH64

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -25,6 +25,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "instructions-aarch64.h"
#include "assembler-aarch64.h"
namespace vixl {
@ -35,7 +36,8 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
unsigned width) {
VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) ||
(width == 32));
VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
(reg_size == kSRegSize) || (reg_size == kDRegSize));
uint64_t result = value & ((UINT64_C(1) << width) - 1);
for (unsigned i = width; i < reg_size; i *= 2) {
result |= (result << i);
@ -43,6 +45,442 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
return result;
}
bool Instruction::CanTakeSVEMovprfx(const char* form,
const Instruction* movprfx) const {
return CanTakeSVEMovprfx(Hash(form), movprfx);
}
bool Instruction::CanTakeSVEMovprfx(uint32_t form_hash,
const Instruction* movprfx) const {
bool movprfx_is_predicated = movprfx->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z;
bool movprfx_is_unpredicated =
movprfx->Mask(SVEConstructivePrefix_UnpredicatedMask) == MOVPRFX_z_z;
VIXL_ASSERT(movprfx_is_predicated != movprfx_is_unpredicated);
int movprfx_zd = movprfx->GetRd();
int movprfx_pg = movprfx_is_predicated ? movprfx->GetPgLow8() : -1;
VectorFormat movprfx_vform =
movprfx_is_predicated ? movprfx->GetSVEVectorFormat() : kFormatUndefined;
bool pg_matches_low8 = movprfx_pg == GetPgLow8();
bool vform_matches = movprfx_vform == GetSVEVectorFormat();
bool zd_matches = movprfx_zd == GetRd();
bool zd_isnt_zn = movprfx_zd != GetRn();
bool zd_isnt_zm = movprfx_zd != GetRm();
switch (form_hash) {
case "cdot_z_zzzi_s"_h:
case "sdot_z_zzzi_s"_h:
case "sudot_z_zzzi_s"_h:
case "udot_z_zzzi_s"_h:
case "usdot_z_zzzi_s"_h:
return (GetRd() != static_cast<int>(ExtractBits(18, 16))) &&
movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
case "cdot_z_zzzi_d"_h:
case "sdot_z_zzzi_d"_h:
case "udot_z_zzzi_d"_h:
return (GetRd() != static_cast<int>(ExtractBits(19, 16))) &&
movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
case "fmlalb_z_zzzi_s"_h:
case "fmlalt_z_zzzi_s"_h:
case "fmlslb_z_zzzi_s"_h:
case "fmlslt_z_zzzi_s"_h:
case "smlalb_z_zzzi_d"_h:
case "smlalb_z_zzzi_s"_h:
case "smlalt_z_zzzi_d"_h:
case "smlalt_z_zzzi_s"_h:
case "smlslb_z_zzzi_d"_h:
case "smlslb_z_zzzi_s"_h:
case "smlslt_z_zzzi_d"_h:
case "smlslt_z_zzzi_s"_h:
case "sqdmlalb_z_zzzi_d"_h:
case "sqdmlalb_z_zzzi_s"_h:
case "sqdmlalt_z_zzzi_d"_h:
case "sqdmlalt_z_zzzi_s"_h:
case "sqdmlslb_z_zzzi_d"_h:
case "sqdmlslb_z_zzzi_s"_h:
case "sqdmlslt_z_zzzi_d"_h:
case "sqdmlslt_z_zzzi_s"_h:
case "umlalb_z_zzzi_d"_h:
case "umlalb_z_zzzi_s"_h:
case "umlalt_z_zzzi_d"_h:
case "umlalt_z_zzzi_s"_h:
case "umlslb_z_zzzi_d"_h:
case "umlslb_z_zzzi_s"_h:
case "umlslt_z_zzzi_d"_h:
case "umlslt_z_zzzi_s"_h:
return (GetRd() != GetSVEMulLongZmAndIndex().first) &&
movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
case "cmla_z_zzzi_h"_h:
case "cmla_z_zzzi_s"_h:
case "fcmla_z_zzzi_h"_h:
case "fcmla_z_zzzi_s"_h:
case "fmla_z_zzzi_d"_h:
case "fmla_z_zzzi_h"_h:
case "fmla_z_zzzi_s"_h:
case "fmls_z_zzzi_d"_h:
case "fmls_z_zzzi_h"_h:
case "fmls_z_zzzi_s"_h:
case "mla_z_zzzi_d"_h:
case "mla_z_zzzi_h"_h:
case "mla_z_zzzi_s"_h:
case "mls_z_zzzi_d"_h:
case "mls_z_zzzi_h"_h:
case "mls_z_zzzi_s"_h:
case "sqrdcmlah_z_zzzi_h"_h:
case "sqrdcmlah_z_zzzi_s"_h:
case "sqrdmlah_z_zzzi_d"_h:
case "sqrdmlah_z_zzzi_h"_h:
case "sqrdmlah_z_zzzi_s"_h:
case "sqrdmlsh_z_zzzi_d"_h:
case "sqrdmlsh_z_zzzi_h"_h:
case "sqrdmlsh_z_zzzi_s"_h:
return (GetRd() != GetSVEMulZmAndIndex().first) &&
movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
case "adclb_z_zzz"_h:
case "adclt_z_zzz"_h:
case "bcax_z_zzz"_h:
case "bsl1n_z_zzz"_h:
case "bsl2n_z_zzz"_h:
case "bsl_z_zzz"_h:
case "cdot_z_zzz"_h:
case "cmla_z_zzz"_h:
case "eor3_z_zzz"_h:
case "eorbt_z_zz"_h:
case "eortb_z_zz"_h:
case "fmlalb_z_zzz"_h:
case "fmlalt_z_zzz"_h:
case "fmlslb_z_zzz"_h:
case "fmlslt_z_zzz"_h:
case "nbsl_z_zzz"_h:
case "saba_z_zzz"_h:
case "sabalb_z_zzz"_h:
case "sabalt_z_zzz"_h:
case "sbclb_z_zzz"_h:
case "sbclt_z_zzz"_h:
case "sdot_z_zzz"_h:
case "smlalb_z_zzz"_h:
case "smlalt_z_zzz"_h:
case "smlslb_z_zzz"_h:
case "smlslt_z_zzz"_h:
case "sqdmlalb_z_zzz"_h:
case "sqdmlalbt_z_zzz"_h:
case "sqdmlalt_z_zzz"_h:
case "sqdmlslb_z_zzz"_h:
case "sqdmlslbt_z_zzz"_h:
case "sqdmlslt_z_zzz"_h:
case "sqrdcmlah_z_zzz"_h:
case "sqrdmlah_z_zzz"_h:
case "sqrdmlsh_z_zzz"_h:
case "uaba_z_zzz"_h:
case "uabalb_z_zzz"_h:
case "uabalt_z_zzz"_h:
case "udot_z_zzz"_h:
case "umlalb_z_zzz"_h:
case "umlalt_z_zzz"_h:
case "umlslb_z_zzz"_h:
case "umlslt_z_zzz"_h:
case "usdot_z_zzz_s"_h:
case "fmmla_z_zzz_s"_h:
case "fmmla_z_zzz_d"_h:
case "smmla_z_zzz"_h:
case "ummla_z_zzz"_h:
case "usmmla_z_zzz"_h:
return movprfx_is_unpredicated && zd_isnt_zm && zd_isnt_zn && zd_matches;
case "addp_z_p_zz"_h:
case "cadd_z_zz"_h:
case "clasta_z_p_zz"_h:
case "clastb_z_p_zz"_h:
case "decd_z_zs"_h:
case "dech_z_zs"_h:
case "decw_z_zs"_h:
case "ext_z_zi_des"_h:
case "faddp_z_p_zz"_h:
case "fmaxnmp_z_p_zz"_h:
case "fmaxp_z_p_zz"_h:
case "fminnmp_z_p_zz"_h:
case "fminp_z_p_zz"_h:
case "ftmad_z_zzi"_h:
case "incd_z_zs"_h:
case "inch_z_zs"_h:
case "incw_z_zs"_h:
case "insr_z_v"_h:
case "smaxp_z_p_zz"_h:
case "sminp_z_p_zz"_h:
case "splice_z_p_zz_des"_h:
case "sqcadd_z_zz"_h:
case "sqdecd_z_zs"_h:
case "sqdech_z_zs"_h:
case "sqdecw_z_zs"_h:
case "sqincd_z_zs"_h:
case "sqinch_z_zs"_h:
case "sqincw_z_zs"_h:
case "srsra_z_zi"_h:
case "ssra_z_zi"_h:
case "umaxp_z_p_zz"_h:
case "uminp_z_p_zz"_h:
case "uqdecd_z_zs"_h:
case "uqdech_z_zs"_h:
case "uqdecw_z_zs"_h:
case "uqincd_z_zs"_h:
case "uqinch_z_zs"_h:
case "uqincw_z_zs"_h:
case "ursra_z_zi"_h:
case "usra_z_zi"_h:
case "xar_z_zzi"_h:
return movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
case "add_z_zi"_h:
case "and_z_zi"_h:
case "decp_z_p_z"_h:
case "eor_z_zi"_h:
case "incp_z_p_z"_h:
case "insr_z_r"_h:
case "mul_z_zi"_h:
case "orr_z_zi"_h:
case "smax_z_zi"_h:
case "smin_z_zi"_h:
case "sqadd_z_zi"_h:
case "sqdecp_z_p_z"_h:
case "sqincp_z_p_z"_h:
case "sqsub_z_zi"_h:
case "sub_z_zi"_h:
case "subr_z_zi"_h:
case "umax_z_zi"_h:
case "umin_z_zi"_h:
case "uqadd_z_zi"_h:
case "uqdecp_z_p_z"_h:
case "uqincp_z_p_z"_h:
case "uqsub_z_zi"_h:
return movprfx_is_unpredicated && zd_matches;
case "cpy_z_p_i"_h:
if (movprfx_is_predicated) {
if (!vform_matches) return false;
if (movprfx_pg != GetRx<19, 16>()) return false;
}
// Only the merging form can take movprfx.
if (ExtractBit(14) == 0) return false;
return zd_matches;
case "fcpy_z_p_i"_h:
return (movprfx_is_unpredicated ||
((movprfx_pg == GetRx<19, 16>()) && vform_matches)) &&
zd_matches;
case "flogb_z_p_z"_h:
return (movprfx_is_unpredicated ||
((movprfx_vform == GetSVEVectorFormat(17)) && pg_matches_low8)) &&
zd_isnt_zn && zd_matches;
case "asr_z_p_zi"_h:
case "asrd_z_p_zi"_h:
case "lsl_z_p_zi"_h:
case "lsr_z_p_zi"_h:
case "sqshl_z_p_zi"_h:
case "sqshlu_z_p_zi"_h:
case "srshr_z_p_zi"_h:
case "uqshl_z_p_zi"_h:
case "urshr_z_p_zi"_h:
return (movprfx_is_unpredicated ||
((movprfx_vform ==
SVEFormatFromLaneSizeInBytesLog2(
GetSVEImmShiftAndLaneSizeLog2(true).second)) &&
pg_matches_low8)) &&
zd_matches;
case "fcvt_z_p_z_d2h"_h:
case "fcvt_z_p_z_d2s"_h:
case "fcvt_z_p_z_h2d"_h:
case "fcvt_z_p_z_s2d"_h:
case "fcvtx_z_p_z_d2s"_h:
case "fcvtzs_z_p_z_d2w"_h:
case "fcvtzs_z_p_z_d2x"_h:
case "fcvtzs_z_p_z_fp162x"_h:
case "fcvtzs_z_p_z_s2x"_h:
case "fcvtzu_z_p_z_d2w"_h:
case "fcvtzu_z_p_z_d2x"_h:
case "fcvtzu_z_p_z_fp162x"_h:
case "fcvtzu_z_p_z_s2x"_h:
case "scvtf_z_p_z_w2d"_h:
case "scvtf_z_p_z_x2d"_h:
case "scvtf_z_p_z_x2fp16"_h:
case "scvtf_z_p_z_x2s"_h:
case "ucvtf_z_p_z_w2d"_h:
case "ucvtf_z_p_z_x2d"_h:
case "ucvtf_z_p_z_x2fp16"_h:
case "ucvtf_z_p_z_x2s"_h:
return (movprfx_is_unpredicated ||
((movprfx_vform == kFormatVnD) && pg_matches_low8)) &&
zd_isnt_zn && zd_matches;
case "fcvtzs_z_p_z_fp162h"_h:
case "fcvtzu_z_p_z_fp162h"_h:
case "scvtf_z_p_z_h2fp16"_h:
case "ucvtf_z_p_z_h2fp16"_h:
return (movprfx_is_unpredicated ||
((movprfx_vform == kFormatVnH) && pg_matches_low8)) &&
zd_isnt_zn && zd_matches;
case "fcvt_z_p_z_h2s"_h:
case "fcvt_z_p_z_s2h"_h:
case "fcvtzs_z_p_z_fp162w"_h:
case "fcvtzs_z_p_z_s2w"_h:
case "fcvtzu_z_p_z_fp162w"_h:
case "fcvtzu_z_p_z_s2w"_h:
case "scvtf_z_p_z_w2fp16"_h:
case "scvtf_z_p_z_w2s"_h:
case "ucvtf_z_p_z_w2fp16"_h:
case "ucvtf_z_p_z_w2s"_h:
return (movprfx_is_unpredicated ||
((movprfx_vform == kFormatVnS) && pg_matches_low8)) &&
zd_isnt_zn && zd_matches;
case "fcmla_z_p_zzz"_h:
case "fmad_z_p_zzz"_h:
case "fmla_z_p_zzz"_h:
case "fmls_z_p_zzz"_h:
case "fmsb_z_p_zzz"_h:
case "fnmad_z_p_zzz"_h:
case "fnmla_z_p_zzz"_h:
case "fnmls_z_p_zzz"_h:
case "fnmsb_z_p_zzz"_h:
case "mad_z_p_zzz"_h:
case "mla_z_p_zzz"_h:
case "mls_z_p_zzz"_h:
case "msb_z_p_zzz"_h:
return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
zd_isnt_zm && zd_isnt_zn && zd_matches;
case "abs_z_p_z"_h:
case "add_z_p_zz"_h:
case "and_z_p_zz"_h:
case "asr_z_p_zw"_h:
case "asr_z_p_zz"_h:
case "asrr_z_p_zz"_h:
case "bic_z_p_zz"_h:
case "cls_z_p_z"_h:
case "clz_z_p_z"_h:
case "cnot_z_p_z"_h:
case "cnt_z_p_z"_h:
case "cpy_z_p_v"_h:
case "eor_z_p_zz"_h:
case "fabd_z_p_zz"_h:
case "fabs_z_p_z"_h:
case "fadd_z_p_zz"_h:
case "fcadd_z_p_zz"_h:
case "fdiv_z_p_zz"_h:
case "fdivr_z_p_zz"_h:
case "fmax_z_p_zz"_h:
case "fmaxnm_z_p_zz"_h:
case "fmin_z_p_zz"_h:
case "fminnm_z_p_zz"_h:
case "fmul_z_p_zz"_h:
case "fmulx_z_p_zz"_h:
case "fneg_z_p_z"_h:
case "frecpx_z_p_z"_h:
case "frinta_z_p_z"_h:
case "frinti_z_p_z"_h:
case "frintm_z_p_z"_h:
case "frintn_z_p_z"_h:
case "frintp_z_p_z"_h:
case "frintx_z_p_z"_h:
case "frintz_z_p_z"_h:
case "fscale_z_p_zz"_h:
case "fsqrt_z_p_z"_h:
case "fsub_z_p_zz"_h:
case "fsubr_z_p_zz"_h:
case "lsl_z_p_zw"_h:
case "lsl_z_p_zz"_h:
case "lslr_z_p_zz"_h:
case "lsr_z_p_zw"_h:
case "lsr_z_p_zz"_h:
case "lsrr_z_p_zz"_h:
case "mul_z_p_zz"_h:
case "neg_z_p_z"_h:
case "not_z_p_z"_h:
case "orr_z_p_zz"_h:
case "rbit_z_p_z"_h:
case "revb_z_z"_h:
case "revh_z_z"_h:
case "revw_z_z"_h:
case "sabd_z_p_zz"_h:
case "sadalp_z_p_z"_h:
case "sdiv_z_p_zz"_h:
case "sdivr_z_p_zz"_h:
case "shadd_z_p_zz"_h:
case "shsub_z_p_zz"_h:
case "shsubr_z_p_zz"_h:
case "smax_z_p_zz"_h:
case "smin_z_p_zz"_h:
case "smulh_z_p_zz"_h:
case "sqabs_z_p_z"_h:
case "sqadd_z_p_zz"_h:
case "sqneg_z_p_z"_h:
case "sqrshl_z_p_zz"_h:
case "sqrshlr_z_p_zz"_h:
case "sqshl_z_p_zz"_h:
case "sqshlr_z_p_zz"_h:
case "sqsub_z_p_zz"_h:
case "sqsubr_z_p_zz"_h:
case "srhadd_z_p_zz"_h:
case "srshl_z_p_zz"_h:
case "srshlr_z_p_zz"_h:
case "sub_z_p_zz"_h:
case "subr_z_p_zz"_h:
case "suqadd_z_p_zz"_h:
case "sxtb_z_p_z"_h:
case "sxth_z_p_z"_h:
case "sxtw_z_p_z"_h:
case "uabd_z_p_zz"_h:
case "uadalp_z_p_z"_h:
case "udiv_z_p_zz"_h:
case "udivr_z_p_zz"_h:
case "uhadd_z_p_zz"_h:
case "uhsub_z_p_zz"_h:
case "uhsubr_z_p_zz"_h:
case "umax_z_p_zz"_h:
case "umin_z_p_zz"_h:
case "umulh_z_p_zz"_h:
case "uqadd_z_p_zz"_h:
case "uqrshl_z_p_zz"_h:
case "uqrshlr_z_p_zz"_h:
case "uqshl_z_p_zz"_h:
case "uqshlr_z_p_zz"_h:
case "uqsub_z_p_zz"_h:
case "uqsubr_z_p_zz"_h:
case "urecpe_z_p_z"_h:
case "urhadd_z_p_zz"_h:
case "urshl_z_p_zz"_h:
case "urshlr_z_p_zz"_h:
case "ursqrte_z_p_z"_h:
case "usqadd_z_p_zz"_h:
case "uxtb_z_p_z"_h:
case "uxth_z_p_z"_h:
case "uxtw_z_p_z"_h:
return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
zd_isnt_zn && zd_matches;
case "cpy_z_p_r"_h:
case "fadd_z_p_zs"_h:
case "fmax_z_p_zs"_h:
case "fmaxnm_z_p_zs"_h:
case "fmin_z_p_zs"_h:
case "fminnm_z_p_zs"_h:
case "fmul_z_p_zs"_h:
case "fsub_z_p_zs"_h:
case "fsubr_z_p_zs"_h:
return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
zd_matches;
default:
return false;
}
} // NOLINT(readability/fn_size)
bool Instruction::IsLoad() const {
if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) {
@ -103,6 +541,68 @@ bool Instruction::IsStore() const {
}
std::pair<int, int> Instruction::GetSVEPermuteIndexAndLaneSizeLog2() const {
uint32_t imm_2 = ExtractBits<0x00C00000>();
uint32_t tsz_5 = ExtractBits<0x001F0000>();
uint32_t imm_7 = (imm_2 << 5) | tsz_5;
int lane_size_in_byte_log_2 = std::min(CountTrailingZeros(tsz_5), 5);
int index = ExtractUnsignedBitfield32(6, lane_size_in_byte_log_2 + 1, imm_7);
return std::make_pair(index, lane_size_in_byte_log_2);
}
// Get the register and index for SVE indexed multiplies encoded in the forms:
// .h : Zm = <18:16>, index = <22><20:19>
// .s : Zm = <18:16>, index = <20:19>
// .d : Zm = <19:16>, index = <20>
std::pair<int, int> Instruction::GetSVEMulZmAndIndex() const {
int reg_code = GetRmLow16();
int index = ExtractBits(20, 19);
// For .h, index uses bit zero of the size field, so kFormatVnB below implies
// half-word lane, with most-significant bit of the index zero.
switch (GetSVEVectorFormat()) {
case kFormatVnD:
index >>= 1; // Only bit 20 in the index for D lanes.
break;
case kFormatVnH:
index += 4; // Bit 22 is the top bit of index.
VIXL_FALLTHROUGH();
case kFormatVnB:
case kFormatVnS:
reg_code &= 7; // Three bits used for the register.
break;
default:
VIXL_UNIMPLEMENTED();
break;
}
return std::make_pair(reg_code, index);
}
// Get the register and index for SVE indexed long multiplies encoded in the
// forms:
// .h : Zm = <18:16>, index = <20:19><11>
// .s : Zm = <19:16>, index = <20><11>
std::pair<int, int> Instruction::GetSVEMulLongZmAndIndex() const {
int reg_code = GetRmLow16();
int index = ExtractBit(11);
// For long multiplies, the SVE size field <23:22> encodes the destination
// element size. The source element size is half the width.
switch (GetSVEVectorFormat()) {
case kFormatVnS:
reg_code &= 7;
index |= ExtractBits(20, 19) << 1;
break;
case kFormatVnD:
index |= ExtractBit(20) << 1;
break;
default:
VIXL_UNIMPLEMENTED();
break;
}
return std::make_pair(reg_code, index);
}
// Logical immediates can't encode zero, so a return value of zero is used to
// indicate a failure case. Specifically, where the constraints on imm_s are
// not met.
@ -111,7 +611,114 @@ uint64_t Instruction::GetImmLogical() const {
int32_t n = GetBitN();
int32_t imm_s = GetImmSetBits();
int32_t imm_r = GetImmRotate();
return DecodeImmBitMask(n, imm_s, imm_r, reg_size);
}
// Logical immediates can't encode zero, so a return value of zero is used to
// indicate a failure case. Specifically, where the constraints on imm_s are
// not met.
uint64_t Instruction::GetSVEImmLogical() const {
int n = GetSVEBitN();
int imm_s = GetSVEImmSetBits();
int imm_r = GetSVEImmRotate();
int lane_size_in_bytes_log2 = GetSVEBitwiseImmLaneSizeInBytesLog2();
switch (lane_size_in_bytes_log2) {
case kDRegSizeInBytesLog2:
case kSRegSizeInBytesLog2:
case kHRegSizeInBytesLog2:
case kBRegSizeInBytesLog2: {
int lane_size_in_bits = 1 << (lane_size_in_bytes_log2 + 3);
return DecodeImmBitMask(n, imm_s, imm_r, lane_size_in_bits);
}
default:
return 0;
}
}
std::pair<int, int> Instruction::GetSVEImmShiftAndLaneSizeLog2(
bool is_predicated) const {
Instr tsize =
is_predicated ? ExtractBits<0x00C00300>() : ExtractBits<0x00D80000>();
Instr imm_3 =
is_predicated ? ExtractBits<0x000000E0>() : ExtractBits<0x00070000>();
if (tsize == 0) {
// The bit field `tsize` means undefined if it is zero, so return a
// convenience value kWMinInt to indicate a failure case.
return std::make_pair(kWMinInt, kWMinInt);
}
int lane_size_in_bytes_log_2 = 32 - CountLeadingZeros(tsize, 32) - 1;
int esize = (1 << lane_size_in_bytes_log_2) * kBitsPerByte;
int shift = (2 * esize) - ((tsize << 3) | imm_3);
return std::make_pair(shift, lane_size_in_bytes_log_2);
}
int Instruction::GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb) const {
Instr dtype_h = ExtractBits(dtype_h_lsb + 1, dtype_h_lsb);
if (is_signed) {
dtype_h = dtype_h ^ 0x3;
}
return dtype_h;
}
int Instruction::GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb) const {
Instr dtype_l = ExtractBits(dtype_l_lsb + 1, dtype_l_lsb);
if (is_signed) {
dtype_l = dtype_l ^ 0x3;
}
return dtype_l;
}
int Instruction::GetSVEBitwiseImmLaneSizeInBytesLog2() const {
int n = GetSVEBitN();
int imm_s = GetSVEImmSetBits();
unsigned type_bitset =
(n << SVEImmSetBits_width) | (~imm_s & GetUintMask(SVEImmSetBits_width));
// An lane size is constructed from the n and imm_s bits according to
// the following table:
//
// N imms size
// 0 0xxxxx 32
// 0 10xxxx 16
// 0 110xxx 8
// 0 1110xx 8
// 0 11110x 8
// 1 xxxxxx 64
if (type_bitset == 0) {
// Bail out early since `HighestSetBitPosition` doesn't accept zero
// value input.
return -1;
}
switch (HighestSetBitPosition(type_bitset)) {
case 6:
return kDRegSizeInBytesLog2;
case 5:
return kSRegSizeInBytesLog2;
case 4:
return kHRegSizeInBytesLog2;
case 3:
case 2:
case 1:
return kBRegSizeInBytesLog2;
default:
// RESERVED encoding.
return -1;
}
}
int Instruction::GetSVEExtractImmediate() const {
const int imm8h_mask = 0x001F0000;
const int imm8l_mask = 0x00001C00;
return ExtractBits<imm8h_mask | imm8l_mask>();
}
uint64_t Instruction::DecodeImmBitMask(int32_t n,
int32_t imm_s,
int32_t imm_r,
int32_t size) const {
// An integer is constructed from the n, imm_s and imm_r bits according to
// the following table:
//
@ -146,7 +753,7 @@ uint64_t Instruction::GetImmLogical() const {
return 0;
}
uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1;
return RepeatBitsAcrossReg(reg_size,
return RepeatBitsAcrossReg(size,
RotateRight(bits, imm_r & mask, width),
width);
}
@ -397,8 +1004,6 @@ void Instruction::SetImmLLiteral(const Instruction* source) {
VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
VIXL_ASSERT(vform == kFormat8H || vform == kFormat4S || vform == kFormat2D ||
vform == kFormatH || vform == kFormatS || vform == kFormatD);
switch (vform) {
case kFormat8H:
return kFormat8B;
@ -406,12 +1011,20 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
return kFormat4H;
case kFormat2D:
return kFormat2S;
case kFormat1Q:
return kFormat1D;
case kFormatH:
return kFormatB;
case kFormatS:
return kFormatH;
case kFormatD:
return kFormatS;
case kFormatVnH:
return kFormatVnB;
case kFormatVnS:
return kFormatVnH;
case kFormatVnD:
return kFormatVnS;
default:
VIXL_UNREACHABLE();
return kFormatUndefined;
@ -420,8 +1033,6 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
VectorFormat VectorFormatDoubleWidth(VectorFormat vform) {
VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S ||
vform == kFormatB || vform == kFormatH || vform == kFormatS);
switch (vform) {
case kFormat8B:
return kFormat8H;
@ -435,6 +1046,12 @@ VectorFormat VectorFormatDoubleWidth(VectorFormat vform) {
return kFormatS;
case kFormatS:
return kFormatD;
case kFormatVnB:
return kFormatVnH;
case kFormatVnH:
return kFormatVnS;
case kFormatVnS:
return kFormatVnD;
default:
VIXL_UNREACHABLE();
return kFormatUndefined;
@ -480,6 +1097,14 @@ VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform) {
return kFormat2S;
case kFormat2D:
return kFormat4S;
case kFormat1Q:
return kFormat2D;
case kFormatVnH:
return kFormatVnB;
case kFormatVnS:
return kFormatVnH;
case kFormatVnD:
return kFormatVnS;
default:
VIXL_UNREACHABLE();
return kFormatUndefined;
@ -518,8 +1143,8 @@ VectorFormat VectorFormatHalfLanes(VectorFormat vform) {
}
VectorFormat ScalarFormatFromLaneSize(int laneSize) {
switch (laneSize) {
VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits) {
switch (lane_size_in_bits) {
case 8:
return kFormatB;
case 16:
@ -535,6 +1160,70 @@ VectorFormat ScalarFormatFromLaneSize(int laneSize) {
}
bool IsSVEFormat(VectorFormat vform) {
switch (vform) {
case kFormatVnB:
case kFormatVnH:
case kFormatVnS:
case kFormatVnD:
case kFormatVnQ:
case kFormatVnO:
return true;
default:
return false;
}
}
VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes) {
switch (lane_size_in_bytes) {
case 1:
return kFormatVnB;
case 2:
return kFormatVnH;
case 4:
return kFormatVnS;
case 8:
return kFormatVnD;
case 16:
return kFormatVnQ;
default:
VIXL_UNREACHABLE();
return kFormatUndefined;
}
}
VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits) {
switch (lane_size_in_bits) {
case 8:
case 16:
case 32:
case 64:
case 128:
return SVEFormatFromLaneSizeInBytes(lane_size_in_bits / kBitsPerByte);
default:
VIXL_UNREACHABLE();
return kFormatUndefined;
}
}
VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log2) {
switch (lane_size_in_bytes_log2) {
case 0:
case 1:
case 2:
case 3:
case 4:
return SVEFormatFromLaneSizeInBytes(1 << lane_size_in_bytes_log2);
default:
VIXL_UNREACHABLE();
return kFormatUndefined;
}
}
VectorFormat ScalarFormatFromFormat(VectorFormat vform) {
return ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
}
@ -542,6 +1231,7 @@ VectorFormat ScalarFormatFromFormat(VectorFormat vform) {
unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
VIXL_ASSERT(vform != kFormatUndefined);
VIXL_ASSERT(!IsSVEFormat(vform));
switch (vform) {
case kFormatB:
return kBRegSize;
@ -551,14 +1241,20 @@ unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
case kFormat2H:
return kSRegSize;
case kFormatD:
return kDRegSize;
case kFormat8B:
case kFormat4H:
case kFormat2S:
case kFormat1D:
return kDRegSize;
default:
case kFormat16B:
case kFormat8H:
case kFormat4S:
case kFormat2D:
case kFormat1Q:
return kQRegSize;
default:
VIXL_UNREACHABLE();
return 0;
}
}
@ -574,20 +1270,29 @@ unsigned LaneSizeInBitsFromFormat(VectorFormat vform) {
case kFormatB:
case kFormat8B:
case kFormat16B:
case kFormatVnB:
return 8;
case kFormatH:
case kFormat2H:
case kFormat4H:
case kFormat8H:
case kFormatVnH:
return 16;
case kFormatS:
case kFormat2S:
case kFormat4S:
case kFormatVnS:
return 32;
case kFormatD:
case kFormat1D:
case kFormat2D:
case kFormatVnD:
return 64;
case kFormat1Q:
case kFormatVnQ:
return 128;
case kFormatVnO:
return 256;
default:
VIXL_UNREACHABLE();
return 0;
@ -606,20 +1311,26 @@ int LaneSizeInBytesLog2FromFormat(VectorFormat vform) {
case kFormatB:
case kFormat8B:
case kFormat16B:
case kFormatVnB:
return 0;
case kFormatH:
case kFormat2H:
case kFormat4H:
case kFormat8H:
case kFormatVnH:
return 1;
case kFormatS:
case kFormat2S:
case kFormat4S:
case kFormatVnS:
return 2;
case kFormatD:
case kFormat1D:
case kFormat2D:
case kFormatVnD:
return 3;
case kFormatVnQ:
return 4;
default:
VIXL_UNREACHABLE();
return 0;
@ -643,6 +1354,7 @@ int LaneCountFromFormat(VectorFormat vform) {
case kFormat2D:
return 2;
case kFormat1D:
case kFormat1Q:
case kFormatB:
case kFormatH:
case kFormatS:
@ -697,17 +1409,19 @@ bool IsVectorFormat(VectorFormat vform) {
int64_t MaxIntFromFormat(VectorFormat vform) {
return INT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
int lane_size = LaneSizeInBitsFromFormat(vform);
return static_cast<int64_t>(GetUintMask(lane_size) >> 1);
}
int64_t MinIntFromFormat(VectorFormat vform) {
return INT64_MIN >> (64 - LaneSizeInBitsFromFormat(vform));
return -MaxIntFromFormat(vform) - 1;
}
uint64_t MaxUintFromFormat(VectorFormat vform) {
return UINT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
return GetUintMask(LaneSizeInBitsFromFormat(vform));
}
} // namespace aarch64
} // namespace vixl

View File

@ -1,916 +0,0 @@
// Copyright 2014, VIXL authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of ARM Limited nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "instrument-aarch64.h"
namespace vixl {
namespace aarch64 {
Counter::Counter(const char* name, CounterType type)
: count_(0), enabled_(false), type_(type) {
VIXL_ASSERT(name != NULL);
strncpy(name_, name, kCounterNameMaxLength);
// Make sure `name_` is always NULL-terminated, even if the source's length is
// higher.
name_[kCounterNameMaxLength - 1] = '\0';
}
void Counter::Enable() { enabled_ = true; }
void Counter::Disable() { enabled_ = false; }
bool Counter::IsEnabled() { return enabled_; }
void Counter::Increment() {
if (enabled_) {
count_++;
}
}
uint64_t Counter::GetCount() {
uint64_t result = count_;
if (type_ == Gauge) {
// If the counter is a Gauge, reset the count after reading.
count_ = 0;
}
return result;
}
const char* Counter::GetName() { return name_; }
CounterType Counter::GetType() { return type_; }
struct CounterDescriptor {
const char* name;
CounterType type;
};
static const CounterDescriptor kCounterList[] =
{{"Instruction", Cumulative},
{"Move Immediate", Gauge},
{"Add/Sub DP", Gauge},
{"Logical DP", Gauge},
{"Other Int DP", Gauge},
{"FP DP", Gauge},
{"Conditional Select", Gauge},
{"Conditional Compare", Gauge},
{"Unconditional Branch", Gauge},
{"Compare and Branch", Gauge},
{"Test and Branch", Gauge},
{"Conditional Branch", Gauge},
{"Load Integer", Gauge},
{"Load FP", Gauge},
{"Load Pair", Gauge},
{"Load Literal", Gauge},
{"Store Integer", Gauge},
{"Store FP", Gauge},
{"Store Pair", Gauge},
{"PC Addressing", Gauge},
{"Other", Gauge},
{"NEON", Gauge},
{"Crypto", Gauge}};
Instrument::Instrument(const char* datafile, uint64_t sample_period)
: output_stream_(stdout), sample_period_(sample_period) {
// Set up the output stream. If datafile is non-NULL, use that file. If it
// can't be opened, or datafile is NULL, use stdout.
if (datafile != NULL) {
output_stream_ = fopen(datafile, "w");
if (output_stream_ == NULL) {
printf("Can't open output file %s. Using stdout.\n", datafile);
output_stream_ = stdout;
}
}
static const int num_counters =
sizeof(kCounterList) / sizeof(CounterDescriptor);
// Dump an instrumentation description comment at the top of the file.
fprintf(output_stream_, "# counters=%d\n", num_counters);
fprintf(output_stream_, "# sample_period=%" PRIu64 "\n", sample_period_);
// Construct Counter objects from counter description array.
for (int i = 0; i < num_counters; i++) {
Counter* counter = new Counter(kCounterList[i].name, kCounterList[i].type);
counters_.push_back(counter);
}
DumpCounterNames();
}
Instrument::~Instrument() {
// Dump any remaining instruction data to the output file.
DumpCounters();
// Free all the counter objects.
std::list<Counter*>::iterator it;
for (it = counters_.begin(); it != counters_.end(); it++) {
delete *it;
}
if (output_stream_ != stdout) {
fclose(output_stream_);
}
}
void Instrument::Update() {
// Increment the instruction counter, and dump all counters if a sample period
// has elapsed.
static Counter* counter = GetCounter("Instruction");
VIXL_ASSERT(counter->GetType() == Cumulative);
counter->Increment();
if ((sample_period_ != 0) && counter->IsEnabled() &&
(counter->GetCount() % sample_period_) == 0) {
DumpCounters();
}
}
void Instrument::DumpCounters() {
// Iterate through the counter objects, dumping their values to the output
// stream.
std::list<Counter*>::const_iterator it;
for (it = counters_.begin(); it != counters_.end(); it++) {
fprintf(output_stream_, "%" PRIu64 ",", (*it)->GetCount());
}
fprintf(output_stream_, "\n");
fflush(output_stream_);
}
void Instrument::DumpCounterNames() {
// Iterate through the counter objects, dumping the counter names to the
// output stream.
std::list<Counter*>::const_iterator it;
for (it = counters_.begin(); it != counters_.end(); it++) {
fprintf(output_stream_, "%s,", (*it)->GetName());
}
fprintf(output_stream_, "\n");
fflush(output_stream_);
}
void Instrument::HandleInstrumentationEvent(unsigned event) {
switch (event) {
case InstrumentStateEnable:
Enable();
break;
case InstrumentStateDisable:
Disable();
break;
default:
DumpEventMarker(event);
}
}
void Instrument::DumpEventMarker(unsigned marker) {
// Dumpan event marker to the output stream as a specially formatted comment
// line.
static Counter* counter = GetCounter("Instruction");
fprintf(output_stream_,
"# %c%c @ %" PRId64 "\n",
marker & 0xff,
(marker >> 8) & 0xff,
counter->GetCount());
}
Counter* Instrument::GetCounter(const char* name) {
// Get a Counter object by name from the counter list.
std::list<Counter*>::const_iterator it;
for (it = counters_.begin(); it != counters_.end(); it++) {
if (strcmp((*it)->GetName(), name) == 0) {
return *it;
}
}
// A Counter by that name does not exist: print an error message to stderr
// and the output file, and exit.
static const char* error_message =
"# Error: Unknown counter \"%s\". Exiting.\n";
fprintf(stderr, error_message, name);
fprintf(output_stream_, error_message, name);
exit(1);
}
void Instrument::Enable() {
std::list<Counter*>::iterator it;
for (it = counters_.begin(); it != counters_.end(); it++) {
(*it)->Enable();
}
}
void Instrument::Disable() {
std::list<Counter*>::iterator it;
for (it = counters_.begin(); it != counters_.end(); it++) {
(*it)->Disable();
}
}
void Instrument::VisitPCRelAddressing(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("PC Addressing");
counter->Increment();
}
void Instrument::VisitAddSubImmediate(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Add/Sub DP");
counter->Increment();
}
void Instrument::VisitLogicalImmediate(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Logical DP");
counter->Increment();
}
void Instrument::VisitMoveWideImmediate(const Instruction* instr) {
Update();
static Counter* counter = GetCounter("Move Immediate");
if (instr->IsMovn() && (instr->GetRd() == kZeroRegCode)) {
unsigned imm = instr->GetImmMoveWide();
HandleInstrumentationEvent(imm);
} else {
counter->Increment();
}
}
void Instrument::VisitBitfield(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Other Int DP");
counter->Increment();
}
void Instrument::VisitExtract(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Other Int DP");
counter->Increment();
}
void Instrument::VisitUnconditionalBranch(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Unconditional Branch");
counter->Increment();
}
void Instrument::VisitUnconditionalBranchToRegister(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Unconditional Branch");
counter->Increment();
}
void Instrument::VisitCompareBranch(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Compare and Branch");
counter->Increment();
}
void Instrument::VisitTestBranch(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Test and Branch");
counter->Increment();
}
void Instrument::VisitConditionalBranch(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Conditional Branch");
counter->Increment();
}
void Instrument::VisitSystem(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Other");
counter->Increment();
}
void Instrument::VisitException(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Other");
counter->Increment();
}
void Instrument::InstrumentLoadStorePair(const Instruction* instr) {
static Counter* load_pair_counter = GetCounter("Load Pair");
static Counter* store_pair_counter = GetCounter("Store Pair");
if (instr->Mask(LoadStorePairLBit) != 0) {
load_pair_counter->Increment();
} else {
store_pair_counter->Increment();
}
}
void Instrument::VisitLoadStorePairPostIndex(const Instruction* instr) {
Update();
InstrumentLoadStorePair(instr);
}
void Instrument::VisitLoadStorePairOffset(const Instruction* instr) {
Update();
InstrumentLoadStorePair(instr);
}
void Instrument::VisitLoadStorePairPreIndex(const Instruction* instr) {
Update();
InstrumentLoadStorePair(instr);
}
void Instrument::VisitLoadStorePairNonTemporal(const Instruction* instr) {
Update();
InstrumentLoadStorePair(instr);
}
void Instrument::VisitLoadStoreExclusive(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Other");
counter->Increment();
}
void Instrument::VisitAtomicMemory(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Other");
counter->Increment();
}
void Instrument::VisitLoadLiteral(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Load Literal");
counter->Increment();
}
void Instrument::InstrumentLoadStore(const Instruction* instr) {
static Counter* load_int_counter = GetCounter("Load Integer");
static Counter* store_int_counter = GetCounter("Store Integer");
static Counter* load_fp_counter = GetCounter("Load FP");
static Counter* store_fp_counter = GetCounter("Store FP");
switch (instr->Mask(LoadStoreMask)) {
case STRB_w:
case STRH_w:
case STR_w:
VIXL_FALLTHROUGH();
case STR_x:
store_int_counter->Increment();
break;
case STR_s:
VIXL_FALLTHROUGH();
case STR_d:
store_fp_counter->Increment();
break;
case LDRB_w:
case LDRH_w:
case LDR_w:
case LDR_x:
case LDRSB_x:
case LDRSH_x:
case LDRSW_x:
case LDRSB_w:
VIXL_FALLTHROUGH();
case LDRSH_w:
load_int_counter->Increment();
break;
case LDR_s:
VIXL_FALLTHROUGH();
case LDR_d:
load_fp_counter->Increment();
break;
}
}
void Instrument::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
Update();
InstrumentLoadStore(instr);
}
void Instrument::VisitLoadStorePostIndex(const Instruction* instr) {
USE(instr);
Update();
InstrumentLoadStore(instr);
}
void Instrument::VisitLoadStorePreIndex(const Instruction* instr) {
Update();
InstrumentLoadStore(instr);
}
void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) {
Update();
InstrumentLoadStore(instr);
}
void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
Update();
InstrumentLoadStore(instr);
}
void Instrument::VisitLogicalShifted(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Logical DP");
counter->Increment();
}
void Instrument::VisitAddSubShifted(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Add/Sub DP");
counter->Increment();
}
void Instrument::VisitAddSubExtended(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Add/Sub DP");
counter->Increment();
}
void Instrument::VisitAddSubWithCarry(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Add/Sub DP");
counter->Increment();
}
void Instrument::VisitConditionalCompareRegister(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Conditional Compare");
counter->Increment();
}
void Instrument::VisitConditionalCompareImmediate(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Conditional Compare");
counter->Increment();
}
void Instrument::VisitConditionalSelect(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Conditional Select");
counter->Increment();
}
void Instrument::VisitDataProcessing1Source(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Other Int DP");
counter->Increment();
}
void Instrument::VisitDataProcessing2Source(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Other Int DP");
counter->Increment();
}
void Instrument::VisitDataProcessing3Source(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Other Int DP");
counter->Increment();
}
void Instrument::VisitFPCompare(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("FP DP");
counter->Increment();
}
void Instrument::VisitFPConditionalCompare(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Conditional Compare");
counter->Increment();
}
void Instrument::VisitFPConditionalSelect(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Conditional Select");
counter->Increment();
}
void Instrument::VisitFPImmediate(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("FP DP");
counter->Increment();
}
void Instrument::VisitFPDataProcessing1Source(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("FP DP");
counter->Increment();
}
void Instrument::VisitFPDataProcessing2Source(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("FP DP");
counter->Increment();
}
void Instrument::VisitFPDataProcessing3Source(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("FP DP");
counter->Increment();
}
void Instrument::VisitFPIntegerConvert(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("FP DP");
counter->Increment();
}
void Instrument::VisitFPFixedPointConvert(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("FP DP");
counter->Increment();
}
void Instrument::VisitCrypto2RegSHA(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Crypto");
counter->Increment();
}
void Instrument::VisitCrypto3RegSHA(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Crypto");
counter->Increment();
}
void Instrument::VisitCryptoAES(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Crypto");
counter->Increment();
}
void Instrument::VisitNEON2RegMisc(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEON2RegMiscFP16(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEON3Same(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEON3SameFP16(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEON3SameExtra(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEON3Different(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONAcrossLanes(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONByIndexedElement(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONCopy(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONExtract(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONLoadStoreMultiStructPostIndex(
const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONLoadStoreSingleStructPostIndex(
const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONModifiedImmediate(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONScalar2RegMisc(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONScalar3Diff(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONScalar3Same(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONScalar3SameFP16(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONScalar3SameExtra(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONScalarByIndexedElement(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONScalarCopy(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONScalarPairwise(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONScalarShiftImmediate(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONShiftImmediate(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONTable(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitNEONPerm(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("NEON");
counter->Increment();
}
void Instrument::VisitUnallocated(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Other");
counter->Increment();
}
void Instrument::VisitUnimplemented(const Instruction* instr) {
USE(instr);
Update();
static Counter* counter = GetCounter("Other");
counter->Increment();
}
} // namespace aarch64
} // namespace vixl

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -30,32 +30,32 @@ namespace vixl {
namespace aarch64 {
// CPURegList utilities.
CPURegister CPURegList::PopLowestIndex() {
if (IsEmpty()) {
return NoCPUReg;
}
int index = CountTrailingZeros(list_);
VIXL_ASSERT((1 << index) & list_);
CPURegister CPURegList::PopLowestIndex(RegList mask) {
RegList list = list_ & mask;
if (list == 0) return NoCPUReg;
int index = CountTrailingZeros(list);
VIXL_ASSERT(((static_cast<RegList>(1) << index) & list) != 0);
Remove(index);
return CPURegister(index, size_, type_);
}
CPURegister CPURegList::PopHighestIndex() {
VIXL_ASSERT(IsValid());
if (IsEmpty()) {
return NoCPUReg;
}
int index = CountLeadingZeros(list_);
CPURegister CPURegList::PopHighestIndex(RegList mask) {
RegList list = list_ & mask;
if (list == 0) return NoCPUReg;
int index = CountLeadingZeros(list);
index = kRegListSizeInBits - 1 - index;
VIXL_ASSERT((1 << index) & list_);
VIXL_ASSERT(((static_cast<RegList>(1) << index) & list) != 0);
Remove(index);
return CPURegister(index, size_, type_);
}
bool CPURegList::IsValid() const {
if ((type_ == CPURegister::kRegister) || (type_ == CPURegister::kVRegister)) {
if (type_ == CPURegister::kNoRegister) {
// We can't use IsEmpty here because that asserts IsValid().
return list_ == 0;
} else {
bool is_valid = true;
// Try to create a CPURegister for each element in the list.
for (int i = 0; i < kRegListSizeInBits; i++) {
@ -64,11 +64,6 @@ bool CPURegList::IsValid() const {
}
}
return is_valid;
} else if (type_ == CPURegister::kNoRegister) {
// We can't use IsEmpty here because that asserts IsValid().
return list_ == 0;
} else {
return false;
}
}
@ -149,145 +144,6 @@ const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV();
const CPURegList kCallerSaved = CPURegList::GetCallerSaved();
const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV();
// Registers.
#define WREG(n) w##n,
const Register Register::wregisters[] = {AARCH64_REGISTER_CODE_LIST(WREG)};
#undef WREG
#define XREG(n) x##n,
const Register Register::xregisters[] = {AARCH64_REGISTER_CODE_LIST(XREG)};
#undef XREG
#define BREG(n) b##n,
const VRegister VRegister::bregisters[] = {AARCH64_REGISTER_CODE_LIST(BREG)};
#undef BREG
#define HREG(n) h##n,
const VRegister VRegister::hregisters[] = {AARCH64_REGISTER_CODE_LIST(HREG)};
#undef HREG
#define SREG(n) s##n,
const VRegister VRegister::sregisters[] = {AARCH64_REGISTER_CODE_LIST(SREG)};
#undef SREG
#define DREG(n) d##n,
const VRegister VRegister::dregisters[] = {AARCH64_REGISTER_CODE_LIST(DREG)};
#undef DREG
#define QREG(n) q##n,
const VRegister VRegister::qregisters[] = {AARCH64_REGISTER_CODE_LIST(QREG)};
#undef QREG
#define VREG(n) v##n,
const VRegister VRegister::vregisters[] = {AARCH64_REGISTER_CODE_LIST(VREG)};
#undef VREG
const Register& Register::GetWRegFromCode(unsigned code) {
if (code == kSPRegInternalCode) {
return wsp;
} else {
VIXL_ASSERT(code < kNumberOfRegisters);
return wregisters[code];
}
}
const Register& Register::GetXRegFromCode(unsigned code) {
if (code == kSPRegInternalCode) {
return sp;
} else {
VIXL_ASSERT(code < kNumberOfRegisters);
return xregisters[code];
}
}
const VRegister& VRegister::GetBRegFromCode(unsigned code) {
VIXL_ASSERT(code < kNumberOfVRegisters);
return bregisters[code];
}
const VRegister& VRegister::GetHRegFromCode(unsigned code) {
VIXL_ASSERT(code < kNumberOfVRegisters);
return hregisters[code];
}
const VRegister& VRegister::GetSRegFromCode(unsigned code) {
VIXL_ASSERT(code < kNumberOfVRegisters);
return sregisters[code];
}
const VRegister& VRegister::GetDRegFromCode(unsigned code) {
VIXL_ASSERT(code < kNumberOfVRegisters);
return dregisters[code];
}
const VRegister& VRegister::GetQRegFromCode(unsigned code) {
VIXL_ASSERT(code < kNumberOfVRegisters);
return qregisters[code];
}
const VRegister& VRegister::GetVRegFromCode(unsigned code) {
VIXL_ASSERT(code < kNumberOfVRegisters);
return vregisters[code];
}
const Register& CPURegister::W() const {
VIXL_ASSERT(IsValidRegister());
return Register::GetWRegFromCode(code_);
}
const Register& CPURegister::X() const {
VIXL_ASSERT(IsValidRegister());
return Register::GetXRegFromCode(code_);
}
const VRegister& CPURegister::B() const {
VIXL_ASSERT(IsValidVRegister());
return VRegister::GetBRegFromCode(code_);
}
const VRegister& CPURegister::H() const {
VIXL_ASSERT(IsValidVRegister());
return VRegister::GetHRegFromCode(code_);
}
const VRegister& CPURegister::S() const {
VIXL_ASSERT(IsValidVRegister());
return VRegister::GetSRegFromCode(code_);
}
const VRegister& CPURegister::D() const {
VIXL_ASSERT(IsValidVRegister());
return VRegister::GetDRegFromCode(code_);
}
const VRegister& CPURegister::Q() const {
VIXL_ASSERT(IsValidVRegister());
return VRegister::GetQRegFromCode(code_);
}
const VRegister& CPURegister::V() const {
VIXL_ASSERT(IsValidVRegister());
return VRegister::GetVRegFromCode(code_);
}
// Operand.
Operand::Operand(int64_t immediate)
: immediate_(immediate),
@ -296,6 +152,12 @@ Operand::Operand(int64_t immediate)
extend_(NO_EXTEND),
shift_amount_(0) {}
Operand::Operand(IntegerOperand immediate)
: immediate_(immediate.AsIntN(64)),
reg_(NoReg),
shift_(NO_SHIFT),
extend_(NO_EXTEND),
shift_amount_(0) {}
Operand::Operand(Register reg, Shift shift, unsigned shift_amount)
: reg_(reg),
@ -471,6 +333,24 @@ MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode)
}
bool MemOperand::IsPlainRegister() const {
return IsImmediateOffset() && (GetOffset() == 0);
}
bool MemOperand::IsEquivalentToPlainRegister() const {
if (regoffset_.Is(NoReg)) {
// Immediate offset, pre-index or post-index.
return GetOffset() == 0;
} else if (GetRegisterOffset().IsZero()) {
// Zero register offset, pre-index or post-index.
// We can ignore shift and extend options because they all result in zero.
return true;
}
return false;
}
bool MemOperand::IsImmediateOffset() const {
return (addrmode_ == Offset) && regoffset_.Is(NoReg);
}
@ -480,12 +360,16 @@ bool MemOperand::IsRegisterOffset() const {
return (addrmode_ == Offset) && !regoffset_.Is(NoReg);
}
bool MemOperand::IsPreIndex() const { return addrmode_ == PreIndex; }
bool MemOperand::IsPostIndex() const { return addrmode_ == PostIndex; }
bool MemOperand::IsImmediatePreIndex() const {
return IsPreIndex() && regoffset_.Is(NoReg);
}
bool MemOperand::IsImmediatePostIndex() const {
return IsPostIndex() && regoffset_.Is(NoReg);
}
void MemOperand::AddOffset(int64_t offset) {
VIXL_ASSERT(IsImmediateOffset());
@ -493,6 +377,63 @@ void MemOperand::AddOffset(int64_t offset) {
}
bool SVEMemOperand::IsValid() const {
#ifdef VIXL_DEBUG
{
// It should not be possible for an SVEMemOperand to match multiple types.
int count = 0;
if (IsScalarPlusImmediate()) count++;
if (IsScalarPlusScalar()) count++;
if (IsScalarPlusVector()) count++;
if (IsVectorPlusImmediate()) count++;
if (IsVectorPlusScalar()) count++;
if (IsVectorPlusVector()) count++;
VIXL_ASSERT(count <= 1);
}
#endif
// We can't have a register _and_ an immediate offset.
if ((offset_ != 0) && (!regoffset_.IsNone())) return false;
if (shift_amount_ != 0) {
// Only shift and extend modifiers can take a shift amount.
switch (mod_) {
case NO_SVE_OFFSET_MODIFIER:
case SVE_MUL_VL:
return false;
case SVE_LSL:
case SVE_UXTW:
case SVE_SXTW:
// Fall through.
break;
}
}
return IsScalarPlusImmediate() || IsScalarPlusScalar() ||
IsScalarPlusVector() || IsVectorPlusImmediate() ||
IsVectorPlusScalar() || IsVectorPlusVector();
}
bool SVEMemOperand::IsEquivalentToScalar() const {
if (IsScalarPlusImmediate()) {
return GetImmediateOffset() == 0;
}
if (IsScalarPlusScalar()) {
// We can ignore the shift because it will still result in zero.
return GetScalarOffset().IsZero();
}
// Forms involving vectors are never equivalent to a single scalar.
return false;
}
bool SVEMemOperand::IsPlainRegister() const {
if (IsScalarPlusImmediate()) {
return GetImmediateOffset() == 0;
}
return false;
}
GenericOperand::GenericOperand(const CPURegister& reg)
: cpu_register_(reg), mem_op_size_(0) {
if (reg.IsQ()) {
@ -524,5 +465,5 @@ bool GenericOperand::Equals(const GenericOperand& other) const {
}
return false;
}
}
} // namespace vixl::aarch64
} // namespace aarch64
} // namespace vixl

View File

@ -26,10 +26,10 @@
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
#include "simulator-aarch64.h"
#include "utils-vixl.h"
#include "simulator-aarch64.h"
namespace vixl {
namespace aarch64 {

View File

@ -0,0 +1,322 @@
// Copyright 2019, VIXL authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of ARM Limited nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "registers-aarch64.h"
#include <sstream>
#include <string>
namespace vixl {
namespace aarch64 {
std::string CPURegister::GetArchitecturalName() const {
std::ostringstream name;
if (IsZRegister()) {
name << 'z' << GetCode();
if (HasLaneSize()) {
name << '.' << GetLaneSizeSymbol();
}
} else if (IsPRegister()) {
name << 'p' << GetCode();
if (HasLaneSize()) {
name << '.' << GetLaneSizeSymbol();
}
switch (qualifiers_) {
case kNoQualifiers:
break;
case kMerging:
name << "/m";
break;
case kZeroing:
name << "/z";
break;
}
} else {
VIXL_UNIMPLEMENTED();
}
return name.str();
}
unsigned CPURegister::GetMaxCodeFor(CPURegister::RegisterBank bank) {
switch (bank) {
case kNoRegisterBank:
return 0;
case kRRegisterBank:
return Register::GetMaxCode();
case kVRegisterBank:
#ifdef VIXL_HAS_CONSTEXPR
VIXL_STATIC_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
#else
VIXL_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
#endif
return VRegister::GetMaxCode();
case kPRegisterBank:
return PRegister::GetMaxCode();
}
VIXL_UNREACHABLE();
return 0;
}
bool CPURegister::IsValidRegister() const {
return ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)) &&
(bank_ == kRRegisterBank) &&
((size_ == kEncodedWRegSize) || (size_ == kEncodedXRegSize)) &&
(qualifiers_ == kNoQualifiers) && (lane_size_ == size_);
}
bool CPURegister::IsValidVRegister() const {
VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
return (code_ < kNumberOfVRegisters) && (bank_ == kVRegisterBank) &&
((size_ >= kEncodedBRegSize) && (size_ <= kEncodedQRegSize)) &&
(qualifiers_ == kNoQualifiers) &&
(lane_size_ != kEncodedUnknownSize) && (lane_size_ <= size_);
}
bool CPURegister::IsValidFPRegister() const {
return IsValidVRegister() && IsFPRegister();
}
bool CPURegister::IsValidZRegister() const {
VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
// Z registers are valid with or without a lane size, so we don't need to
// check lane_size_.
return (code_ < kNumberOfZRegisters) && (bank_ == kVRegisterBank) &&
(size_ == kEncodedUnknownSize) && (qualifiers_ == kNoQualifiers);
}
bool CPURegister::IsValidPRegister() const {
VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
// P registers are valid with or without a lane size, so we don't need to
// check lane_size_.
return (code_ < kNumberOfPRegisters) && (bank_ == kPRegisterBank) &&
(size_ == kEncodedUnknownSize) &&
((qualifiers_ == kNoQualifiers) || (qualifiers_ == kMerging) ||
(qualifiers_ == kZeroing));
}
bool CPURegister::IsValid() const {
return IsValidRegister() || IsValidVRegister() || IsValidZRegister() ||
IsValidPRegister();
}
// Most coercions simply invoke the necessary constructor.
#define VIXL_CPUREG_COERCION_LIST(U) \
U(Register, W, R) \
U(Register, X, R) \
U(VRegister, B, V) \
U(VRegister, H, V) \
U(VRegister, S, V) \
U(VRegister, D, V) \
U(VRegister, Q, V) \
U(VRegister, V, V) \
U(ZRegister, Z, V) \
U(PRegister, P, P)
#define VIXL_DEFINE_CPUREG_COERCION(RET_TYPE, CTOR_TYPE, BANK) \
RET_TYPE CPURegister::CTOR_TYPE() const { \
VIXL_ASSERT(GetBank() == k##BANK##RegisterBank); \
return CTOR_TYPE##Register(GetCode()); \
}
VIXL_CPUREG_COERCION_LIST(VIXL_DEFINE_CPUREG_COERCION)
#undef VIXL_CPUREG_COERCION_LIST
#undef VIXL_DEFINE_CPUREG_COERCION
// NEON lane-format coercions always return VRegisters.
#define VIXL_CPUREG_NEON_COERCION_LIST(V) \
V(8, B) \
V(16, B) \
V(2, H) \
V(4, H) \
V(8, H) \
V(2, S) \
V(4, S) \
V(1, D) \
V(2, D) \
V(1, Q)
#define VIXL_DEFINE_CPUREG_NEON_COERCION(LANES, LANE_TYPE) \
VRegister VRegister::V##LANES##LANE_TYPE() const { \
VIXL_ASSERT(IsVRegister()); \
return VRegister(GetCode(), LANES * k##LANE_TYPE##RegSize, LANES); \
}
VIXL_CPUREG_NEON_COERCION_LIST(VIXL_DEFINE_CPUREG_NEON_COERCION)
#undef VIXL_CPUREG_NEON_COERCION_LIST
#undef VIXL_DEFINE_CPUREG_NEON_COERCION
// Semantic type coercion for sdot and udot.
// TODO: Use the qualifiers_ field to distinguish this from ::S().
VRegister VRegister::S4B() const {
VIXL_ASSERT(IsVRegister());
return SRegister(GetCode());
}
bool AreAliased(const CPURegister& reg1,
const CPURegister& reg2,
const CPURegister& reg3,
const CPURegister& reg4,
const CPURegister& reg5,
const CPURegister& reg6,
const CPURegister& reg7,
const CPURegister& reg8) {
int number_of_valid_regs = 0;
int number_of_valid_vregs = 0;
int number_of_valid_pregs = 0;
RegList unique_regs = 0;
RegList unique_vregs = 0;
RegList unique_pregs = 0;
const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
for (size_t i = 0; i < ArrayLength(regs); i++) {
switch (regs[i].GetBank()) {
case CPURegister::kRRegisterBank:
number_of_valid_regs++;
unique_regs |= regs[i].GetBit();
break;
case CPURegister::kVRegisterBank:
number_of_valid_vregs++;
unique_vregs |= regs[i].GetBit();
break;
case CPURegister::kPRegisterBank:
number_of_valid_pregs++;
unique_pregs |= regs[i].GetBit();
break;
case CPURegister::kNoRegisterBank:
VIXL_ASSERT(regs[i].IsNone());
break;
}
}
int number_of_unique_regs = CountSetBits(unique_regs);
int number_of_unique_vregs = CountSetBits(unique_vregs);
int number_of_unique_pregs = CountSetBits(unique_pregs);
VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
VIXL_ASSERT(number_of_valid_vregs >= number_of_unique_vregs);
VIXL_ASSERT(number_of_valid_pregs >= number_of_unique_pregs);
return (number_of_valid_regs != number_of_unique_regs) ||
(number_of_valid_vregs != number_of_unique_vregs) ||
(number_of_valid_pregs != number_of_unique_pregs);
}
bool AreSameSizeAndType(const CPURegister& reg1,
const CPURegister& reg2,
const CPURegister& reg3,
const CPURegister& reg4,
const CPURegister& reg5,
const CPURegister& reg6,
const CPURegister& reg7,
const CPURegister& reg8) {
VIXL_ASSERT(reg1.IsValid());
bool match = true;
match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1);
match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1);
match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1);
match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1);
match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1);
match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1);
match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1);
return match;
}
bool AreEven(const CPURegister& reg1,
const CPURegister& reg2,
const CPURegister& reg3,
const CPURegister& reg4,
const CPURegister& reg5,
const CPURegister& reg6,
const CPURegister& reg7,
const CPURegister& reg8) {
VIXL_ASSERT(reg1.IsValid());
bool even = (reg1.GetCode() % 2) == 0;
even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0);
even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0);
even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0);
even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0);
even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0);
even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0);
even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0);
return even;
}
bool AreConsecutive(const CPURegister& reg1,
const CPURegister& reg2,
const CPURegister& reg3,
const CPURegister& reg4) {
VIXL_ASSERT(reg1.IsValid());
if (!reg2.IsValid()) {
return true;
} else if (reg2.GetCode() !=
((reg1.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
return false;
}
if (!reg3.IsValid()) {
return true;
} else if (reg3.GetCode() !=
((reg2.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
return false;
}
if (!reg4.IsValid()) {
return true;
} else if (reg4.GetCode() !=
((reg3.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
return false;
}
return true;
}
bool AreSameFormat(const CPURegister& reg1,
const CPURegister& reg2,
const CPURegister& reg3,
const CPURegister& reg4) {
VIXL_ASSERT(reg1.IsValid());
bool match = true;
match &= !reg2.IsValid() || reg2.IsSameFormat(reg1);
match &= !reg3.IsValid() || reg3.IsSameFormat(reg1);
match &= !reg4.IsValid() || reg4.IsSameFormat(reg1);
return match;
}
bool AreSameLaneSize(const CPURegister& reg1,
const CPURegister& reg2,
const CPURegister& reg3,
const CPURegister& reg4) {
VIXL_ASSERT(reg1.IsValid());
bool match = true;
match &=
!reg2.IsValid() || (reg2.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
match &=
!reg3.IsValid() || (reg3.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
match &=
!reg4.IsValid() || (reg4.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
return match;
}
} // namespace aarch64
} // namespace vixl

File diff suppressed because it is too large Load Diff