diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index e5dc2fe9d8fd06..bd7ee59e0a1017 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -349,6 +349,10 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_MultiCoreJitNoProfileGather, W("MultiCoreJitNo #endif +#ifdef TARGET_ARM64 +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitUseScalableVectorT, W("JitUseScalableVectorT"), 0, "Accelerate Vector with SVE if available.") +#endif + /// /// Loader heap /// diff --git a/src/coreclr/inc/corhdr.h b/src/coreclr/inc/corhdr.h index 291cadbe2c18a6..3546c88c5884e3 100644 --- a/src/coreclr/inc/corhdr.h +++ b/src/coreclr/inc/corhdr.h @@ -1761,6 +1761,7 @@ typedef enum CorInfoHFAElemType : unsigned { CORINFO_HFA_ELEM_DOUBLE, CORINFO_HFA_ELEM_VECTOR64, CORINFO_HFA_ELEM_VECTOR128, + CORINFO_HFA_ELEM_VECTORT, } CorInfoHFAElemType; // diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 4c3bf607017696..e091061c1fcbd3 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -362,6 +362,15 @@ bool CodeGen::genInstrWithConstant(instruction ins, immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size); break; + case INS_sve_ldr: + case INS_sve_str: + { + assert(size == EA_SCALABLE); + ssize_t count = imm / genTypeSize(TYP_SIMDSV); + immFitsInIns = (-256 <= count && count < 256); + } + break; + default: assert(!"Unexpected instruction in genInstrWithConstant"); break; @@ -2075,10 +2084,14 @@ void CodeGen::instGen_Set_Reg_To_Base_Plus_Imm(emitAttr size, // If the imm values < 12 bits, we can use a single "add rsvd, reg2, #imm". // Otherwise, use "mov rsvd, #imm", followed up "add rsvd, reg2, rsvd". - if (imm < 4096) + if (0 <= imm && imm < 4096) { GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, dstReg, baseReg, imm); } + else if (-4095 <= imm && imm < 0) + { + GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, dstReg, baseReg, -imm); + } else { instGen_Set_Reg_To_Imm(size, dstReg, imm); @@ -2274,6 +2287,9 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre switch (tree->TypeGet()) { + case TYP_SIMDSV: + attr = EA_16BYTE; // TODO-SVE: Implement scalable vector constant + FALLTHROUGH; case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: @@ -2999,7 +3015,7 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) } } emitAttr attr = emitActualTypeSize(targetType); - GetEmitter()->emitIns_Mov(INS_mov, attr, retReg, op1->GetRegNum(), /* canSkip */ !movRequired); + inst_Mov(targetType, retReg, op1->GetRegNum(), !movRequired, attr); } /*********************************************************************************************** @@ -5306,7 +5322,7 @@ void CodeGen::genSimdUpperSave(GenTreeIntrinsic* node) GenTreeLclVar* lclNode = op1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); - assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16); + assert(varDsc->TypeIs(TYP_STRUCT, TYP_SIMD12, TYP_SIMD16, TYP_SIMDSV)); // TODO-SVE: Handle AAPCS for Z registers regNumber tgtReg = node->GetRegNum(); assert(tgtReg != REG_NA); @@ -5362,7 +5378,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) GenTreeLclVar* lclNode = op1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); - assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16); + assert(varDsc->TypeIs(TYP_STRUCT, TYP_SIMD12, TYP_SIMD16, TYP_SIMDSV)); // TODO-SVE: Handle AAPCS for Z registers regNumber srcReg = node->GetRegNum(); assert(srcReg != REG_NA); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 8d90b0d3766f10..3c70b65f7a8b69 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -809,8 +809,13 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) #endif // TARGET_ARM64 { emitAttr storeAttr = emitTypeSize(source->TypeGet()); - emit->emitIns_S_R(INS_str, storeAttr, srcReg, varNumOut, argOffsetOut); + emit->emitIns_S_R(ins_Store(source->TypeGet()), storeAttr, srcReg, varNumOut, argOffsetOut); +#ifdef TARGET_ARM64 + argOffsetOut += + storeAttr == EA_SCALABLE ? compiler->getVectorTByteLength() : EA_SIZE_IN_BYTES(storeAttr); +#else argOffsetOut += EA_SIZE_IN_BYTES(storeAttr); +#endif } assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area return; diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index d673a0678ceee5..a7117310ef79cf 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3266,6 +3266,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) #if defined(TARGET_ARM64) // On arm64 SIMD parameters are HFAs and passed in multiple float // registers while we can enregister them as single registers. + // TODO-SVE: Ensure this works for Z registers as well. GetEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(edge->type), node->reg, sourceReg, edge->destOffset / genTypeSize(edge->type), 0); #elif defined(UNIX_AMD64_ABI) @@ -5906,7 +5907,7 @@ unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass) var_types hfaType = GetHfaType(hClass); unsigned classSize = info.compCompHnd->getClassSize(hClass); // Note that the retail build issues a warning about a potential division by zero without the Max function - unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType))); + unsigned elemSize = Max((unsigned)1, genTypeSize(genActualType(hfaType))); return classSize / elemSize; #endif // TARGET_ARM64 } diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 339ac1eab1610d..c33beea3ea46be 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2269,8 +2269,19 @@ void CodeGen::genCodeForCast(GenTreeOp* tree) genLongToIntCast(tree); } #endif // !TARGET_64BIT +#ifdef TARGET_ARM64 + else if (targetType == TYP_SIMDSV || tree->gtOp1->TypeGet() == TYP_SIMDSV) + { + // TODO-SVE: Can we avoid generating these casts altogether? + assert(genTypeSize(tree->CastToType()) == genTypeSize(tree->CastFromType())); + genConsumeOperands(tree); + inst_Mov(tree->CastToType(), tree->GetRegNum(), tree->gtOp1->GetRegNum(), true); + genProduceReg(tree); + } +#endif else { + assert(varTypeIsIntegral(targetType) && varTypeIsIntegral(tree->gtOp1)); // Casts int <--> int genIntToIntCast(tree->AsCast()); } diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index f5c618fccea289..35503b8f4185fe 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -107,11 +107,12 @@ inline bool _our_GetThreadCycles(uint64_t* cycleOut) #endif // which host OS -const BYTE genTypeSizes[] = { +BYTE _initGenTypeSizes[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) sz, #include "typelist.h" #undef DEF_TP }; +const BYTE (&genTypeSizes)[TYP_COUNT] = _initGenTypeSizes; const BYTE genTypeAlignments[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) al, @@ -609,13 +610,18 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS // Start by determining if we have an HFA/HVA with a single element. if (GlobalJitOptions::compFeatureHfa) { - switch (structSize) - { - case 4: - case 8: + if (structSize == 4 || + structSize == 8 #ifdef TARGET_ARM64 - case 16: -#endif // TARGET_ARM64 + // Can pass in V register if structSize == 16, and Z registers for structs with sizes in + // multiples of 16-bytes, depending on hardware availability. + || structSize == 16 || ((structSize % 16 == 0) && (structSize == genTypeSize(TYP_SIMDSV))) +#endif + ) + { + var_types hfaType = GetHfaType(clsHnd); + // We're only interested in the case where the struct size is equal to the size of the hfaType. + if (varTypeIsValidHfaType(hfaType)) { var_types hfaType = GetHfaType(clsHnd); // We're only interested in the case where the struct size is equal to the size of the hfaType. @@ -861,7 +867,15 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, // The largest "primitive type" is MAX_PASS_SINGLEREG_BYTES // so we can skip calling getPrimitiveTypeForStruct when we // have a struct that is larger than that. - if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES)) + // + // On ARM64 we can pass structures in scalable vector registers + // which may allow larger structures on some hardware. +#ifdef TARGET_ARM64 + unsigned maxStructSize = max((unsigned)MAX_PASS_SINGLEREG_BYTES, getVectorTByteLength()); +#else + unsigned maxStructSize = MAX_PASS_SINGLEREG_BYTES; +#endif + if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= maxStructSize)) { // We set the "primitive" useType based upon the structSize // and also examine the clsHnd to see if it is an HFA of count one @@ -6795,6 +6809,14 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, break; } +#if defined(FEATURE_SIMD) && defined(TARGET_ARM64) + // Initialize the size of Vector from the EE. + _initGenTypeSizes[TYP_SIMDSV] = (BYTE)getVectorTByteLength(); + _initGenTypeSizes[TYP_MASK] = (BYTE)getMaskByteLength(); + assert(genTypeSize(TYP_SIMDSV) >= 16); + assert(genTypeSize(TYP_MASK) >= 2); +#endif + info.compRetType = JITtype2varType(methodInfo->args.retType); if (info.compRetType == TYP_STRUCT) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e7f3dae76f4102..79c36f24c4746d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -157,6 +157,10 @@ inline var_types HfaTypeFromElemKind(CorInfoHFAElemType kind) return TYP_SIMD8; case CORINFO_HFA_ELEM_VECTOR128: return TYP_SIMD16; +#ifdef TARGET_ARM64 + case CORINFO_HFA_ELEM_VECTORT: + return TYP_SIMDSV; +#endif #endif case CORINFO_HFA_ELEM_NONE: return TYP_UNDEF; @@ -178,6 +182,10 @@ inline CorInfoHFAElemType HfaElemKindFromType(var_types type) return CORINFO_HFA_ELEM_VECTOR64; case TYP_SIMD16: return CORINFO_HFA_ELEM_VECTOR128; +#ifdef TARGET_ARM64 + case TYP_SIMDSV: + return CORINFO_HFA_ELEM_VECTORT; +#endif #endif case TYP_UNDEF: return CORINFO_HFA_ELEM_NONE; @@ -8212,7 +8220,7 @@ class Compiler assert(type != TYP_STRUCT); // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. - return ((type == TYP_SIMD16) || (type == TYP_SIMD12)); + return ((type == TYP_SIMDSV) || (type == TYP_SIMD16) || (type == TYP_SIMD12)); } #else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) #error("Unknown target architecture for FEATURE_PARTIAL_SIMD_CALLEE_SAVE") @@ -9079,6 +9087,8 @@ class Compiler return isSIMDClass(clsHnd) || isHWSIMDClass(clsHnd); } + var_types getSIMDType(CORINFO_CLASS_HANDLE typeHnd, CorInfoType* baseType = nullptr); + // Get the base (element) type and size in bytes for a SIMD type. Returns CORINFO_TYPE_UNDEF // if it is not a SIMD type or is an unsupported base JIT type. CorInfoType getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes = nullptr); @@ -9164,6 +9174,16 @@ class Compiler #endif } +#ifdef TARGET_ARM64 + uint32_t getMaskByteLength() + { + // Predicate registers have 1 bit for each byte in the vector register. + // We round up to an int as the CLR prefers to work in integers. + assert((getVectorTByteLength() % 8) == 0); + return (uint32_t)roundUp((size_t)getVectorTByteLength() / 8, sizeof(int)); + } +#endif + // The minimum and maximum possible number of bytes in a SIMD vector. // getMaxVectorByteLength @@ -12407,7 +12427,7 @@ const instruction INS_BREAKPOINT = INS_ebreak; /*****************************************************************************/ -extern const BYTE genTypeSizes[]; +extern const BYTE (&genTypeSizes)[TYP_COUNT]; extern const BYTE genTypeAlignments[]; extern const BYTE genTypeStSzs[]; extern const BYTE genActualTypes[]; diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 12280cde617228..5c9c38c7be13a6 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1137,13 +1137,12 @@ inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask) * Return the size in bytes of the given type. */ -extern const BYTE genTypeSizes[TYP_COUNT]; +extern const BYTE (&genTypeSizes)[TYP_COUNT]; template inline unsigned genTypeSize(T value) { assert((unsigned)TypeGet(value) < ArrLen(genTypeSizes)); - return genTypeSizes[TypeGet(value)]; } @@ -1158,6 +1157,11 @@ extern const BYTE genTypeStSzs[TYP_COUNT]; template inline unsigned genTypeStSz(T value) { +#ifdef TARGET_ARM64 + // The size of these types cannot be evaluated in static contexts. + noway_assert(TypeGet(value) != TYP_SIMDSV); + noway_assert(TypeGet(value) != TYP_MASK); +#endif assert((unsigned)TypeGet(value) < ArrLen(genTypeStSzs)); return genTypeStSzs[TypeGet(value)]; diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index e88e10568712d3..c326462538f2cb 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -4266,6 +4266,13 @@ void emitter::emitIns_Mov( { assert(insOptsNone(opt)); + if (attr == EA_SCALABLE) + { + // NEON mov is acceptable for scalable vectors when the vector byte length is 128-bit. + // TODO-SVE: This should not be permitted once Vector has been migrated to SVE. + assert(codeGen->compiler->getVectorTByteLength() == 16); + } + if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) { // These instructions have no side effect and can be skipped @@ -4340,6 +4347,7 @@ void emitter::emitIns_Mov( case INS_fmov: { assert(isValidVectorElemsizeFloat(size)); + assert(attr != EA_SCALABLE); if (canSkip && (dstReg == srcReg)) { @@ -4387,35 +4395,22 @@ void emitter::emitIns_Mov( case INS_sve_mov: { + assert(attr == EA_SCALABLE); if (isPredicateRegister(dstReg) && isPredicateRegister(srcReg)) { assert((opt == INS_OPTS_SCALABLE_B) || insOptsNone(opt)); - opt = INS_OPTS_SCALABLE_B; - attr = EA_SCALABLE; - - if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) - { - return; - } + opt = INS_OPTS_SCALABLE_B; fmt = IF_SVE_CZ_4A_L; } else if (isVectorRegister(dstReg) && isVectorRegister(srcReg)) { - assert(insOptsScalable(opt)); - - if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) - { - return; - } + assert(insOptsScalable(opt) || insOptsNone(opt)); + opt = INS_OPTS_SCALABLE_D; fmt = IF_SVE_AU_3A; } else if (isVectorRegister(dstReg) && isGeneralRegisterOrSP(srcReg)) { assert(insOptsScalable(opt)); - if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) - { - return; - } srcReg = encodingSPtoZR(srcReg); fmt = IF_SVE_CB_2A; } @@ -4424,6 +4419,11 @@ void emitter::emitIns_Mov( unreached(); } + if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) + { + return; + } + break; } default: diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 0178826d918017..ca2ed9f3f86d56 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3276,6 +3276,9 @@ unsigned Compiler::gtHashValue(GenTree* tree) #endif // TARGET_XARCH case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: // TODO-SVE: Implement scalable vector constant +#endif { add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[3]); FALLTHROUGH; @@ -4532,6 +4535,12 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ unsigned naturalMul = 0; #ifdef TARGET_ARM64 + if (type == TYP_SIMDSV) + { + // TODO-SVE: Investigate separately if it's worth using addressing modes + // for scalable types. + return false; + } // Multiplier should be a "natural-scale" power of two number which is equal to target's width. // // *(ulong*)(data + index * 8); - can be optimized @@ -7986,6 +7995,7 @@ GenTreeVecCon* Compiler::gtNewVconNode(var_types type) GenTreeVecCon* Compiler::gtNewVconNode(var_types type, void* data) { GenTreeVecCon* vecCon = new (this, GT_CNS_VEC) GenTreeVecCon(type); + // TODO-SVE: Implement scalable vector constant memcpy(&vecCon->gtSimdVal, data, genTypeSize(type)); return vecCon; } @@ -8089,7 +8099,7 @@ GenTree* Compiler::gtNewOneConNode(var_types type, var_types simdBaseType /* = T { GenTreeVecCon* one = gtNewVconNode(type); - unsigned simdSize = genTypeSize(type); + unsigned simdSize = max(genTypeSize(type), (unsigned)sizeof(simd_t)); uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); switch (simdBaseType) @@ -8326,6 +8336,8 @@ GenTree* Compiler::gtNewConWithPattern(var_types type, uint8_t pattern) #if defined(TARGET_XARCH) case TYP_SIMD32: case TYP_SIMD64: +#elif defined(TARGET_ARM64) + case TYP_SIMDSV: // TODO-SVE: Implement scalable vector constant #endif // TARGET_XARCH { GenTreeVecCon* node = gtNewVconNode(type); @@ -12276,6 +12288,9 @@ void Compiler::gtDispConst(GenTree* tree) } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: // TODO-SVE: Implement scalable vector constant +#endif { printf("<0x%08x, 0x%08x, 0x%08x, 0x%08x>", vecCon->gtSimdVal.u32[0], vecCon->gtSimdVal.u32[1], vecCon->gtSimdVal.u32[2], vecCon->gtSimdVal.u32[3]); @@ -18389,6 +18404,9 @@ void GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { simd16_t result = {}; EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd16Val); @@ -18451,6 +18469,9 @@ void GenTreeVecCon::EvaluateBinaryInPlace(genTreeOps oper, bool scalar, var_type } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: // TODO-SVE: Implement scalable vector constant +#endif { simd16_t result = {}; EvaluateBinarySimd(oper, scalar, baseType, &result, gtSimd16Val, other->gtSimd16Val); @@ -20925,7 +20946,7 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types ty GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeGet() == type); @@ -21018,7 +21039,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); @@ -21732,7 +21753,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -21812,7 +21833,7 @@ GenTree* Compiler::gtNewSimdCvtNode(var_types type, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -21948,7 +21969,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -22200,7 +22221,7 @@ GenTree* Compiler::gtNewSimdCmpOpNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -22489,12 +22510,11 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { assert(type == TYP_INT); + assert(op1 != nullptr); - var_types simdType = getSIMDTypeForSize(simdSize); + var_types simdType = op1->TypeGet(); assert(varTypeIsSIMD(simdType)); - - assert(op1 != nullptr); - assert(op1->TypeIs(simdType)); + assert(genTypeSize(simdType) == simdSize); assert(op2 != nullptr); assert(op2->TypeIs(simdType)); @@ -22624,11 +22644,10 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( { assert(type == TYP_INT); - var_types simdType = getSIMDTypeForSize(simdSize); - assert(varTypeIsSIMD(simdType)); - assert(op1 != nullptr); - assert(op1->TypeIs(simdType)); + var_types simdType = op1->TypeGet(); + assert(varTypeIsSIMD(simdType)); + assert(genTypeSize(simdType) == simdSize); assert(op2 != nullptr); assert(op2->TypeIs(simdType)); @@ -22754,7 +22773,7 @@ GenTree* Compiler::gtNewSimdCndSelNode( var_types type, GenTree* op1, GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -23179,7 +23198,7 @@ GenTree* Compiler::gtNewSimdCreateSequenceNode( // is constant than there isn't any real optimization we can do and we need the full computation. assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); @@ -23350,14 +23369,14 @@ GenTree* Compiler::gtNewSimdCreateSequenceNode( GenTree* Compiler::gtNewSimdDotProdNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - var_types simdType = getSIMDTypeForSize(simdSize); - assert(varTypeIsSIMD(simdType)); + assert(varTypeIsSIMD(type)); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); - assert(op1->TypeIs(simdType)); + assert(op1->TypeIs(type)); assert(op2 != nullptr); - assert(op2->TypeIs(simdType)); + assert(op2->TypeIs(type)); var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsSIMD(type)); @@ -23391,7 +23410,7 @@ GenTree* Compiler::gtNewSimdDotProdNode( GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -23436,7 +23455,7 @@ GenTree* Compiler::gtNewSimdFmaNode( var_types type, GenTree* op1, GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -23551,7 +23570,7 @@ GenTree* Compiler::gtNewSimdGetElementNode( GenTree* Compiler::gtNewSimdGetIndicesNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); @@ -23704,7 +23723,7 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -23731,7 +23750,7 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -23782,7 +23801,7 @@ GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, CorInfoTy GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -23813,7 +23832,7 @@ GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, CorInfo GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -23853,7 +23872,7 @@ GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoT GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -23884,7 +23903,7 @@ GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -23926,7 +23945,7 @@ GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -23974,7 +23993,7 @@ GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, GenTree* Compiler::gtNewSimdIsNormalNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -24037,7 +24056,7 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -24064,7 +24083,7 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -24106,7 +24125,7 @@ GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -24157,7 +24176,7 @@ GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -24214,7 +24233,7 @@ GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -24240,7 +24259,7 @@ GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, CorInfoType GenTree* Compiler::gtNewSimdLoadNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); @@ -24435,7 +24454,7 @@ GenTree* Compiler::gtNewSimdMinMaxNode(var_types type, else if (!varTypeIsLong(simdBaseType)) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); } NamedIntrinsic intrinsic = NI_Illegal; @@ -25077,7 +25096,7 @@ GenTree* Compiler::gtNewSimdMinMaxNativeNode( else { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); } NamedIntrinsic intrinsic = NI_Illegal; @@ -25190,7 +25209,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -25662,7 +25681,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -25723,7 +25742,7 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isShuffleNative) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -26326,7 +26345,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isShuffleNative) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -26909,7 +26928,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( GenTree* Compiler::gtNewSimdSqrtNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -26967,7 +26986,7 @@ GenTree* Compiler::gtNewSimdStoreNode(GenTree* op1, GenTree* op2, CorInfoType si assert(op2 != nullptr); assert(varTypeIsSIMD(op2)); - assert(getSIMDTypeForSize(simdSize) == op2->TypeGet()); + assert(genTypeSize(op2->TypeGet()) == simdSize); var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); @@ -27084,11 +27103,10 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(GenTree* op1, GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - var_types simdType = getSIMDTypeForSize(simdSize); - assert(varTypeIsSIMD(simdType)); - assert(op1 != nullptr); - assert(op1->TypeIs(simdType)); + var_types simdType = op1->TypeGet(); + assert(varTypeIsSIMD(simdType)); + assert(genTypeSize(simdType) == simdSize); var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); @@ -27416,7 +27434,7 @@ GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoTy GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -27461,7 +27479,7 @@ GenTree* Compiler::gtNewSimdUnOpNode( genTreeOps op, var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -27556,7 +27574,7 @@ GenTree* Compiler::gtNewSimdUnOpNode( GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -27743,6 +27761,12 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo tmp1 = gtNewSimdGetLowerNode(TYP_SIMD8, tmp1, simdBaseJitType, 16); } + if (type == TYP_SIMDSV) + { + // TODO-SVE: Implement SVE widen for Vector + tmp1 = gtNewCastNode(type, tmp1, false, tmp1->TypeGet()); + } + return tmp1; #else #error Unsupported platform @@ -27752,7 +27776,7 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + assert(genTypeSize(type) == simdSize); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -28078,15 +28102,15 @@ GenTreeFieldList* Compiler::gtConvertParamOpToFieldList(GenTree* op, unsigned fi unsigned fieldSize = opVarDsc->lvExactSize() / fieldCount; GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); int offset = 0; - unsigned sizeBytes = 0; CORINFO_CLASS_HANDLE structType; for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) { CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(clsHnd, fieldId); JitType2PreciseVarType(info.compCompHnd->getFieldType(fieldHandle, &structType)); - getBaseJitTypeAndSizeOfSIMDType(structType, &sizeBytes); - var_types simdType = getSIMDTypeForSize(sizeBytes); + + unsigned int size = info.compCompHnd->getClassSize(structType); + var_types simdType = getSIMDTypeForSize(size); GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, simdType, offset); fieldList->AddField(this, fldNode, offset, simdType); @@ -29626,10 +29650,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( Compiler* comp, genTreeOps oper, GenTree* op1, var_types simdBaseType, unsigned simdSize, bool isScalar) { - var_types simdType = comp->getSIMDTypeForSize(simdSize); assert(varTypeIsArithmetic(simdBaseType)); - assert(varTypeIsSIMD(simdType)); #if defined(TARGET_XARCH) if ((simdSize == 64) || (simdSize == 32)) @@ -29647,7 +29669,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( } assert(op1 != nullptr); - assert(op1->TypeIs(simdType)); + var_types simdType = op1->TypeGet(); + assert(varTypeIsSIMD(simdType)); + assert(genTypeSize(simdType) == simdSize); NamedIntrinsic id = NI_Illegal; @@ -29720,13 +29744,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, unsigned simdSize, bool isScalar) { - var_types simdType = comp->getSIMDTypeForSize(simdSize); - assert(varTypeIsArithmetic(simdBaseType)); - assert(varTypeIsSIMD(simdType)); assert(op1 != nullptr); - assert(op1->TypeIs(simdType)); + var_types simdType = op1->TypeGet(); + assert(varTypeIsSIMD(simdType)); + assert(genTypeSize(simdType) == simdSize); assert(op2 != nullptr); #if defined(TARGET_XARCH) @@ -29997,6 +30020,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_X86Base_MultiplyLow; } #elif defined(TARGET_ARM64) + // TODO-SVE: When SVE is enabled for Vector, use SVE intrinsic instead. if ((simdSize == 8) && (isScalar || (simdBaseType == TYP_DOUBLE))) { id = NI_AdvSimd_MultiplyScalar; @@ -30293,16 +30317,14 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, bool isScalar, bool reverseCond) { - var_types simdType = comp->getSIMDTypeForSize(simdSize); + assert(op1 != nullptr); + assert(op2 != nullptr); + var_types simdType = op1->TypeGet(); + assert(genTypeSize(simdType) == simdSize); assert(varTypeIsMask(type) || (type == simdType)); - assert(varTypeIsArithmetic(simdBaseType)); assert(varTypeIsSIMD(simdType)); - assert(op1 != nullptr); - assert(op1->TypeIs(simdType)); - assert(op2 != nullptr); - #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { @@ -30634,11 +30656,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp( Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize, bool reverseCond) { - var_types simdType = comp->getSIMDTypeForSize(simdSize); - assert(varTypeIsMask(type) || (type == simdType)); - + assert(varTypeIsMask(type) || varTypeIsSIMD(type)); assert(varTypeIsArithmetic(simdBaseType)); - assert(varTypeIsSIMD(simdType)); + assert(genTypeSize(type) == simdSize); var_types lookupType = type; @@ -32479,8 +32499,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector512_ToScalar: #endif { - var_types simdType = getSIMDTypeForSize(simdSize); - if (varTypeIsFloating(retType)) { double result = cnsNode->AsVecCon()->ToScalarFloating(simdBaseType); @@ -32740,8 +32758,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } - var_types simdType = getSIMDTypeForSize(simdSize); - if (varTypeIsFloating(retType)) { double result = cnsNode->AsVecCon()->GetElementFloating(simdBaseType, index); @@ -33612,8 +33628,9 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) if (op2->IsCnsVec() && op3->IsCnsVec()) { assert(ni == NI_Sve_ConditionalSelect); - assert(op2->gtType == TYP_SIMD16); - assert(op3->gtType == TYP_SIMD16); + // TODO-SVE: Implement scalable vector constant + assert(op2->gtType == TYP_SIMD16 || op2->gtType == TYP_SIMDSV); + assert(op3->gtType == TYP_SIMD16 || op3->gtType == TYP_SIMDSV); simd16_t op1SimdVal = {}; EvaluateSimdCvtMaskToVector(simdBaseType, &op1SimdVal, op1->AsMskCon()->gtSimdMaskVal); @@ -33684,8 +33701,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } - var_types simdType = getSIMDTypeForSize(simdSize); - if (varTypeIsFloating(simdBaseType)) { double value = op3->AsDblCon()->DconValue(); @@ -33820,6 +33835,9 @@ GenTreeMskCon* Compiler::gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: // TODO-SVE: Implement scalable vector constant +#endif { EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd16Val); break; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 100f17301ea267..6e8737792916dc 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -7020,7 +7020,11 @@ struct GenTreeVecCon : public GenTree } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { + // TODO-SVE: Implement scalable vector constant simd16_t result = {}; BroadcastConstantToSimd(&result, scalar); gtSimd16Val = result; @@ -7076,6 +7080,9 @@ struct GenTreeVecCon : public GenTree } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { simd16_t result = {}; EvaluateWithElementFloating(simdBaseType, &result, gtSimd16Val, index, value); @@ -7129,6 +7136,9 @@ struct GenTreeVecCon : public GenTree } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { simd16_t result = {}; EvaluateWithElementIntegral(simdBaseType, &result, gtSimd16Val, index, value); @@ -7176,6 +7186,9 @@ struct GenTreeVecCon : public GenTree } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { return gtSimd16Val.IsAllBitsSet(); } @@ -7224,6 +7237,9 @@ struct GenTreeVecCon : public GenTree } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: // TODO-SVE: Implement scalable vector constant +#endif { return left->gtSimd16Val == right->gtSimd16Val; } @@ -7267,6 +7283,9 @@ struct GenTreeVecCon : public GenTree } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { return gtSimd16Val.IsZero(); } @@ -7306,6 +7325,9 @@ struct GenTreeVecCon : public GenTree } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { return EvaluateGetElementFloating(simdBaseType, gtSimd16Val, index); } @@ -7344,6 +7366,9 @@ struct GenTreeVecCon : public GenTree } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { return EvaluateGetElementIntegral(simdBaseType, gtSimd16Val, index); } diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 83909542992db3..5a4ed811abdcc1 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1502,9 +1502,7 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE { if (!varTypeIsSIMD(argType)) { - unsigned int argSizeBytes; - (void)getBaseJitTypeAndSizeOfSIMDType(argClass, &argSizeBytes); - argType = getSIMDTypeForSize(argSizeBytes); + argType = getSIMDType(argClass); } assert(varTypeIsSIMD(argType)); @@ -1889,29 +1887,20 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; GenTree* retNode = nullptr; - if (retType == TYP_STRUCT) + if (retType == TYP_STRUCT && !HWIntrinsicInfo::IsMultiReg(intrinsic)) { - unsigned int sizeBytes; - simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes); - - if (HWIntrinsicInfo::IsMultiReg(intrinsic)) - { - assert(sizeBytes == 0); - } + retType = impNormStructType(sig->retTypeSigClass, &simdBaseJitType); #ifdef TARGET_ARM64 - else if ((intrinsic == NI_AdvSimd_LoadAndInsertScalar) || (intrinsic == NI_AdvSimd_Arm64_LoadAndInsertScalar)) + if ((intrinsic == NI_AdvSimd_LoadAndInsertScalar) || (intrinsic == NI_AdvSimd_Arm64_LoadAndInsertScalar)) { - CorInfoType pSimdBaseJitType = CORINFO_TYPE_UNDEF; - var_types retFieldType = impNormStructType(sig->retTypeSigClass, &pSimdBaseJitType); - - if (retFieldType == TYP_STRUCT) + if (retType == TYP_STRUCT) { CORINFO_CLASS_HANDLE structType; unsigned int sizeBytes = 0; // LoadAndInsertScalar that returns 2,3 or 4 vectors - assert(pSimdBaseJitType == CORINFO_TYPE_UNDEF); + assert(simdBaseJitType == CORINFO_TYPE_UNDEF); unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sig->retTypeSigClass); assert(fieldCount > 1); CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(sig->retTypeClass, 0); @@ -1937,24 +1926,20 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } else { - assert((retFieldType == TYP_SIMD8) || (retFieldType == TYP_SIMD16)); + assert((retType == TYP_SIMD8) || (retType == TYP_SIMD16)); assert(isSupportedBaseType(intrinsic, simdBaseJitType)); - retType = getSIMDTypeForSize(sizeBytes); } } -#endif else - { +#endif // We want to return early here for cases where retType was TYP_STRUCT as per method signature and // rather than deferring the decision after getting the simdBaseJitType of arg. - if (!isSupportedBaseType(intrinsic, simdBaseJitType)) + if (retType == TYP_UNDEF || !isSupportedBaseType(intrinsic, simdBaseJitType)) { return nullptr; } - assert(sizeBytes != 0); - retType = getSIMDTypeForSize(sizeBytes); - } + assert((varTypeIsSIMD(retType) || varTypeIsStruct(retType)) && isSupportedBaseType(intrinsic, simdBaseJitType)); } simdBaseJitType = getBaseJitTypeFromArgIfNeeded(intrinsic, sig, simdBaseJitType); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 7e73e3e2fee48c..b616b2798d7c07 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -800,13 +800,22 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(!sig->hasThis()); assert(numArgs == 1); - // We fold away the cast here, as it only exists to satisfy - // the type system. It is safe to do this here since the retNode type - // and the signature return type are both the same TYP_SIMD. - retNode = impSIMDPopStack(); SetOpLclRelatedToSIMDIntrinsic(retNode); - assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + + if (intrinsic == NI_Vector128_AsVector && JitConfig.JitUseScalableVectorT()) + { + // A cast node is required to convert from TYP_SIMD16 to TYP_SIMDSV. + assert(retNode->TypeGet() == TYP_SIMD16); + retNode = gtNewCastNode(TYP_SIMDSV, retNode, false, retNode->TypeGet()); + } + else + { + // We fold away the cast here, as it only exists to satisfy + // the type system. It is safe to do this here since the retNode type + // and the signature return type are both the same TYP_SIMD. + assert(retNode->gtType == getSIMDType(sig->retTypeSigClass)); + } break; } @@ -901,7 +910,20 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = impSIMDPopStack(); SetOpLclRelatedToSIMDIntrinsic(retNode); - assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + + if (retNode->TypeGet() == TYP_SIMDSV) + { + // Truncate TYP_SIMDSV to TYP_SIMD16. This is a no-op and just keeps + // the type system consistent. + retNode = gtNewCastNode(TYP_SIMD16, retNode, false, retNode->TypeGet()); + } + else + { + // We fold away the cast here, as it only exists to satisfy + // the type system. It is safe to do this here since the retNode type + // and the signature return type are both the same TYP_SIMD. + } + assert(retNode->gtType == getSIMDType(sig->retTypeSigClass)); break; } @@ -1339,12 +1361,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (!varTypeIsLong(simdBaseType)) { - var_types simdType = getSIMDTypeForSize(simdSize); - op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdDotProdNode(simdType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdDotProdNode(op1->TypeGet(), op1, op2, simdBaseJitType, simdSize); retNode = gtNewSimdGetElementNode(retType, retNode, gtNewIconNode(0), simdBaseJitType, simdSize); } break; @@ -3503,6 +3523,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } } + if (retNode != nullptr && retType == TYP_SIMDSV) + { + // If we've been asked to return a scalable Vector we should either see + // a scalable vector type or a mask type come out of this function. + assert(retNode->TypeIs(TYP_SIMDSV, TYP_MASK)); + } + assert(!isScalar || isValidScalarIntrinsic); return retNode; diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 624c59347b475b..40fc41db5373a9 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -370,7 +370,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert((targetReg == op1Reg) || (targetReg != op2Reg) || genIsSameLocalVar(intrin.op1, intrin.op2)); assert((targetReg == op1Reg) || (targetReg != op3Reg) || genIsSameLocalVar(intrin.op1, intrin.op3)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); HWIntrinsicImmOpHelper helper(this, intrin.op4, node); @@ -414,7 +415,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert((targetReg == op1Reg) || (targetReg != op2Reg) || genIsSameLocalVar(intrin.op1, intrin.op2)); assert((targetReg == op1Reg) || (targetReg != op3Reg) || genIsSameLocalVar(intrin.op1, intrin.op3)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op2Reg, op3Reg, 0, opt); } else @@ -441,7 +442,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (isRMW) { - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); emitShift(intrin.op3, op2Reg); } else @@ -545,8 +546,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // If falseReg value and embMaskOp1Reg value are same, then just mov the value // to the target. - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, embMaskOp1Reg, - /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, embMaskOp1Reg, true); } else { @@ -651,8 +651,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // At this point, target != embMaskOp1Reg != falseReg, so just go ahead // and move the falseReg unpredicated into targetReg. // Cannot use movprfx for zeroing mask operations. - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, falseReg, - /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, falseReg, true); } else { @@ -1083,9 +1082,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert((targetReg == op2Reg) || (targetReg != op1Reg) || genIsSameLocalVar(intrin.op1, intrin.op2)); - GetEmitter()->emitIns_Mov(ins_Move_Extend(intrin.op2->TypeGet(), false), - emitTypeSize(node), targetReg, op2Reg, - /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op2Reg, true); GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt); } else @@ -1104,8 +1101,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert((targetReg == op1Reg) || (targetReg != op2Reg) || genIsSameLocalVar(intrin.op1, intrin.op2)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, - /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt); } else @@ -1126,8 +1122,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert((targetReg == op2Reg) || (targetReg != op3Reg) || genIsSameLocalVar(intrin.op2, intrin.op3)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op2Reg, - /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op2Reg, true); GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op3Reg, opt); } else @@ -1137,8 +1132,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert((targetReg == op1Reg) || (targetReg != op3Reg) || genIsSameLocalVar(intrin.op1, intrin.op3)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, - /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt); } } @@ -1353,7 +1347,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert(isRMW); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); // fmov (scalar) zeros the upper bits and is not safe to use assert(!intrin.op3->isContainedFltOrDblImmed()); @@ -1387,7 +1381,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert(isRMW); assert((targetReg == op1Reg) || (targetReg != op3Reg) || genIsSameLocalVar(intrin.op1, intrin.op3)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); HWIntrinsicImmOpHelper helper(this, intrin.op2, node); @@ -1404,7 +1399,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert(isRMW); assert((targetReg == op1Reg) || (targetReg != op3Reg) || genIsSameLocalVar(intrin.op1, intrin.op3)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); const int resultIndex = (int)intrin.op2->AsIntCon()->gtIconVal; const int valueIndex = (int)intrin.op4->AsIntCon()->gtIconVal; @@ -1416,7 +1412,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert(isRMW); assert((targetReg == op1Reg) || (targetReg != op3Reg) || genIsSameLocalVar(intrin.op1, intrin.op3)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); HWIntrinsicImmOpHelper helper(this, intrin.op2, node); @@ -1456,8 +1453,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) targetFieldReg = node->GetRegByIndex(fieldIdx); op1FieldReg = fieldNode->GetRegNum(); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(fieldNode), targetFieldReg, op1FieldReg, - /* canSkip */ true); + inst_Mov(fieldNode->TypeGet(), targetFieldReg, op1FieldReg, true); fieldIdx++; } @@ -1643,7 +1639,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { ins = varTypeIsUnsigned(intrin.baseType) ? INS_usqadd : INS_suqadd; - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt); } else @@ -2012,7 +2008,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } assert((targetReg == op1Reg) || (targetReg != op3Reg) || genIsSameLocalVar(intrin.op1, intrin.op3)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt); break; } @@ -2400,7 +2396,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(isRMW); assert((targetReg == op1Reg) || (targetReg != op2Reg) || genIsSameLocalVar(intrin.op1, intrin.op2)); assert((targetReg == op1Reg) || (targetReg != op3Reg) || genIsSameLocalVar(intrin.op1, intrin.op3)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); if (intrin.op2->IsCnsIntOrI() && intrin.op3->IsCnsIntOrI()) { @@ -2453,7 +2449,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { // RMW semantics assert((targetReg == op1Reg) || (targetReg != op2Reg) || genIsSameLocalVar(intrin.op1, intrin.op2)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op1Reg, true); // Switch instruction if arg1 is unsigned. if (varTypeIsUnsigned(node->GetAuxiliaryType())) @@ -2493,7 +2489,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert(isRMW); assert((targetReg == op1Reg) || (targetReg != op2Reg) || genIsSameLocalVar(intrin.op1, intrin.op2)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + GetEmitter()->emitIns_Mov(INS_sve_mov, EA_SCALABLE, targetReg, op1Reg, /* canSkip */ true); HWIntrinsicImmOpHelper helper(this, intrin.op3, node); @@ -2512,7 +2508,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(isRMW); assert(emitter::isFloatReg(op2Reg) == varTypeIsFloating(intrin.baseType)); assert((targetReg == op1Reg) || (targetReg != op2Reg) || genIsSameLocalVar(intrin.op1, intrin.op2)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, + GetEmitter()->emitIns_Mov(INS_sve_mov, EA_SCALABLE, targetReg, op1Reg, /* canSkip */ true); GetEmitter()->emitInsSve_R_R(ins, emitSize, targetReg, op2Reg, opt); break; @@ -2538,7 +2534,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(isRMW); assert(HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id)); assert((targetReg == op2Reg) || (targetReg != op1Reg) || genIsSameLocalVar(intrin.op2, intrin.op1)); - GetEmitter()->emitIns_Mov(INS_sve_mov, emitTypeSize(node), targetReg, op2Reg, /* canSkip */ true); + GetEmitter()->emitIns_Mov(INS_sve_mov, EA_SCALABLE, targetReg, op2Reg, /* canSkip */ true); GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, INS_OPTS_SCALABLE_B); break; } @@ -2594,8 +2590,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert((targetReg == op2Reg) || (targetReg != op1Reg)); assert((targetReg == op2Reg) || (targetReg != op3Reg)); - GetEmitter()->emitIns_Mov(INS_mov, emitSize, targetReg, op2Reg, - /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op2Reg, true); GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op1Reg, op3Reg, opt, INS_SCALABLE_OPTS_NONE); break; @@ -2737,7 +2732,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve2_AddCarryWideningOdd: if (targetReg != op3Reg) { - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op3Reg, /* canSkip */ true); + inst_Mov(node->TypeGet(), targetReg, op3Reg, true); } GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); break; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 3bd72cb08e609a..1a22b3168b2fd9 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1185,12 +1185,12 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd, CorInfoTyp if (structSizeMightRepresentSIMDType(originalSize)) { - unsigned int sizeBytes; - CorInfoType simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(structHnd, &sizeBytes); - if (simdBaseJitType != CORINFO_TYPE_UNDEF) + CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + var_types simdType = getSIMDType(structHnd, &simdBaseJitType); + if (simdBaseJitType != CORINFO_TYPE_UNDEF && simdType != TYP_UNDEF) { - assert(sizeBytes == originalSize); - structType = getSIMDTypeForSize(sizeBytes); + assert(genTypeSize(simdType) == originalSize); + structType = simdType; if (pSimdBaseJitType != nullptr) { *pSimdBaseJitType = simdBaseJitType; diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 5e918a8481823f..797aca21f6e85e 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2183,6 +2183,13 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* return ins; } +#if defined(TARGET_ARM64) && defined(FEATURE_SIMD) + if (srcType == TYP_SIMDSV) + { + return INS_sve_ldr; + } +#endif + #if defined(FEATURE_MASKED_HW_INTRINSICS) if (varTypeUsesMaskReg(srcType)) { @@ -2386,6 +2393,15 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) #endif } +#if defined(TARGET_ARM64) && defined(FEATURE_SIMD) + if (dstType == TYP_SIMDSV) + { + // Can only copy vector to other vector. + assert(genIsValidFloatReg(srcReg)); + return INS_sve_mov; + } +#endif + #if defined(FEATURE_MASKED_HW_INTRINSICS) if (varTypeUsesMaskReg(dstType)) { @@ -2507,6 +2523,13 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false return ins; } +#if defined(TARGET_ARM64) && defined(FEATURE_SIMD) + if (dstType == TYP_SIMDSV) + { + return INS_sve_str; + } +#endif + #if defined(FEATURE_MASKED_HW_INTRINSICS) if (varTypeUsesMaskReg(dstType)) { diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 3a9188a3e1eb0f..445c134299c72a 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -844,6 +844,9 @@ CONFIG_STRING(JitRawHexCodeFile, "JitRawHexCodeFile") // 3: force all frames to use the frame types that save FP/LR registers with the callee-saved registers (at the top // of the frame) and also force using the large funclet frame variation (frame 5) if possible. CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, "JitSaveFpLrWithCalleeSavedRegisters", 0) + +// Experimental support for vector length agnostic implementation of Vector +RELEASE_CONFIG_INTEGER(JitUseScalableVectorT, "JitUseScalableVectorT", 0) #endif // defined(TARGET_ARM64) #if defined(TARGET_LOONGARCH64) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 4e6b559c989baf..ef30733c5ff9cd 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1435,15 +1435,10 @@ var_types Compiler::StructPromotionHelper::TryPromoteValueClassAsPrimitive(CORIN #ifdef FEATURE_SIMD if (compiler->isRuntimeIntrinsicsNamespace(namespaceName) || compiler->isNumericsNamespace(namespaceName)) { - unsigned simdSize; - CorInfoType simdBaseJitType = compiler->getBaseJitTypeAndSizeOfSIMDType(node.simdTypeHnd, &simdSize); - // We will only promote fields of SIMD types that fit into a SIMD register. - if (simdBaseJitType != CORINFO_TYPE_UNDEF) + var_types type = compiler->getSIMDType(node.simdTypeHnd); + if (type != TYP_UNDEF) { - if (compiler->structSizeMightRepresentSIMDType(simdSize)) - { - return compiler->getSIMDTypeForSize(simdSize); - } + return type; } } #endif @@ -3140,6 +3135,9 @@ void Compiler::lvaSortByRefCount() case TYP_SIMD32: case TYP_SIMD64: #endif // TARGET_XARCH +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif #ifdef FEATURE_MASKED_HW_INTRINSICS case TYP_MASK: #endif // FEATURE_MASKED_HW_INTRINSICS diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 25e5db40c20c6c..6341e3458a4205 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7347,6 +7347,12 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par // because we won't be able to use ldar/star return false; } + + if (parent->TypeIs(TYP_SIMDSV)) + { + // TODO-SVE: Investigate addressing modes for scalable types. + return false; + } #endif GenTree* base = nullptr; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index a2dd58244be7b1..5145977ff9c606 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1689,6 +1689,8 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) #if defined(TARGET_XARCH) case TYP_SIMD32: case TYP_SIMD64: +#elif defined(TARGET_ARM64) + case TYP_SIMDSV: #endif // TARGET_XARCH #ifdef FEATURE_MASKED_HW_INTRINSICS case TYP_MASK: diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index 7f2dbf0257fa2f..4537969099a7f3 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -1162,14 +1162,14 @@ class DecompositionPlan // Get the flags to set on a new indir. // // Parameters: - // type - Type of the indirection + // size - Size of the indirection // // Returns: // Flags to set. // - GenTreeFlags GetIndirFlags(var_types type) + GenTreeFlags GetIndirFlags(unsigned size) { - if (genTypeSize(type) == 1) + if (size == 1) { return m_indirFlags & ~GTF_IND_UNALIGNED; } diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index df9b0083798d1a..402fb111fcb206 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -295,6 +295,8 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc( #if defined(TARGET_XARCH) case TYP_SIMD32: case TYP_SIMD64: +#elif defined(TARGET_ARM64) + case TYP_SIMDSV: #endif // TARGET_XARCH #endif // FEATURE_SIMD #ifdef TARGET_64BIT @@ -435,6 +437,8 @@ void CodeGenInterface::siVarLoc::siFillRegisterVarLoc( #if defined(TARGET_XARCH) case TYP_SIMD32: case TYP_SIMD64: +#elif defined(TARGET_ARM64) + case TYP_SIMDSV: #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 7fdbd76afb0081..abe19fdb8bec1f 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -66,9 +66,9 @@ int Compiler::getSIMDVectorLength(CORINFO_CLASS_HANDLE typeHnd) // int Compiler::getSIMDTypeAlignment(var_types simdType) { +#ifdef TARGET_XARCH unsigned size = genTypeSize(simdType); -#ifdef TARGET_XARCH // Fixed length vectors have the following alignment preference // Vector2 = 8 byte alignment // Vector3/4 = 16-byte alignment @@ -93,9 +93,8 @@ int Compiler::getSIMDTypeAlignment(var_types simdType) return 64; } #elif defined(TARGET_ARM64) - // preferred alignment for 64-bit vectors is 8-bytes. - // For everything else, 16-bytes. - return (size == 8) ? 8 : 16; + assert(varTypeIsSIMD(simdType)); + return genTypeAlignments[simdType]; #else assert(!"getSIMDTypeAlignment() unimplemented on target arch"); unreached(); @@ -153,32 +152,21 @@ unsigned Compiler::getFFRegisterVarNum() } #endif -//---------------------------------------------------------------------------------- -// Return the base type and size of SIMD vector type given its type handle. -// -// Arguments: -// typeHnd - The handle of the type we're interested in. -// sizeBytes - out param -// -// Return Value: -// base type of SIMD vector. -// sizeBytes if non-null is set to size in bytes. -// -// Notes: -// If the size of the struct is already known call structSizeMightRepresentSIMDType -// to determine if this api needs to be called. -// -// The type handle passed here can only be used in a subset of JIT-EE calls -// since it may be called by promotion during AOT of a method that does -// not version with SPC. See CORINFO_TYPE_LAYOUT_NODE for the contract on -// the supported JIT-EE calls. -// -// TODO-Throughput: current implementation parses class name to find base type. Change -// this when we implement SIMD intrinsic identification for the final -// product. -// -CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes /*= nullptr */) +var_types Compiler::getSIMDType(CORINFO_CLASS_HANDLE typeHnd, CorInfoType* baseType) { + var_types type = TYP_UNDEF; + CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + + if ((typeHnd == nullptr) || !isIntrinsicType(typeHnd)) + { + return TYP_UNDEF; + } + + if (baseType != nullptr) + { + *baseType = simdBaseJitType; + } + if (m_simdHandleCache == nullptr) { if (impInlineInfo == nullptr) @@ -198,23 +186,9 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH } } - if (sizeBytes != nullptr) - { - *sizeBytes = 0; - } - - if ((typeHnd == nullptr) || !isIntrinsicType(typeHnd)) - { - return CORINFO_TYPE_UNDEF; - } - const char* namespaceName; const char* className = getClassNameFromMetadata(typeHnd, &namespaceName); - // fast path search using cached type handles of important types - CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; - unsigned size = 0; - if (isNumericsNamespace(namespaceName)) { switch (className[0]) @@ -223,14 +197,14 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { if (strcmp(className, "Plane") != 0) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } JITDUMP(" Known type Plane\n"); m_simdHandleCache->PlaneHandle = typeHnd; + type = TYP_SIMD16; simdBaseJitType = CORINFO_TYPE_FLOAT; - size = 4 * genTypeSize(TYP_FLOAT); break; } @@ -238,14 +212,14 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { if (strcmp(className, "Quaternion") != 0) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } JITDUMP(" Known type Quaternion\n"); m_simdHandleCache->QuaternionHandle = typeHnd; + type = TYP_SIMD16; simdBaseJitType = CORINFO_TYPE_FLOAT; - size = 4 * genTypeSize(TYP_FLOAT); break; } @@ -253,7 +227,7 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { if (strncmp(className, "Vector", 6) != 0) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } switch (className[6]) @@ -269,14 +243,14 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { if (className[7] != '\0') { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } JITDUMP(" Found Vector2\n"); m_simdHandleCache->Vector2Handle = typeHnd; + type = TYP_SIMD8; simdBaseJitType = CORINFO_TYPE_FLOAT; - size = 2 * genTypeSize(TYP_FLOAT); break; } @@ -284,14 +258,14 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { if (className[7] != '\0') { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } JITDUMP(" Found Vector3\n"); m_simdHandleCache->Vector3Handle = typeHnd; + type = TYP_SIMD12; simdBaseJitType = CORINFO_TYPE_FLOAT; - size = 3 * genTypeSize(TYP_FLOAT); break; } @@ -299,14 +273,14 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { if (className[7] != '\0') { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } JITDUMP(" Found Vector4\n"); m_simdHandleCache->Vector4Handle = typeHnd; + type = TYP_SIMD16; simdBaseJitType = CORINFO_TYPE_FLOAT; - size = 4 * genTypeSize(TYP_FLOAT); break; } @@ -314,7 +288,7 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { if ((className[7] != '1') || (className[8] != '\0')) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } CORINFO_CLASS_HANDLE typeArgHnd = info.compCompHnd->getTypeInstantiationArgument(typeHnd, 0); @@ -322,22 +296,28 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH if ((simdBaseJitType < CORINFO_TYPE_BYTE) || (simdBaseJitType > CORINFO_TYPE_DOUBLE)) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } JITDUMP(" Found Vector<%s>\n", varTypeName(JitType2PreciseVarType(simdBaseJitType))); - size = getVectorTByteLength(); - if (size == 0) + uint32_t vectlen = getVectorTByteLength(); + if (vectlen == 0) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } + +#ifdef TARGET_ARM64 + type = JitConfig.JitUseScalableVectorT() ? TYP_SIMDSV : getSIMDTypeForSize(vectlen); +#else + type = getSIMDTypeForSize(vectlen); +#endif break; } default: { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } } break; @@ -345,14 +325,14 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH default: { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } } } #ifdef FEATURE_HW_INTRINSICS else { - size = info.compCompHnd->getClassSize(typeHnd); + unsigned int size = info.compCompHnd->getClassSize(typeHnd); switch (size) { @@ -361,7 +341,7 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { if (strcmp(className, "Vector64`1") != 0) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } CORINFO_CLASS_HANDLE typeArgHnd = info.compCompHnd->getTypeInstantiationArgument(typeHnd, 0); @@ -369,10 +349,11 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH if ((simdBaseJitType < CORINFO_TYPE_BYTE) || (simdBaseJitType > CORINFO_TYPE_DOUBLE)) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } JITDUMP(" Found Vector64<%s>\n", varTypeName(JitType2PreciseVarType(simdBaseJitType))); + type = TYP_SIMD8; break; } #endif // TARGET_ARM64 @@ -381,7 +362,7 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { if (strcmp(className, "Vector128`1") != 0) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } CORINFO_CLASS_HANDLE typeArgHnd = info.compCompHnd->getTypeInstantiationArgument(typeHnd, 0); @@ -389,10 +370,11 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH if ((simdBaseJitType < CORINFO_TYPE_BYTE) || (simdBaseJitType > CORINFO_TYPE_DOUBLE)) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } JITDUMP(" Found Vector128<%s>\n", varTypeName(JitType2PreciseVarType(simdBaseJitType))); + type = TYP_SIMD16; break; } @@ -401,7 +383,7 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { if (strcmp(className, "Vector256`1") != 0) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } CORINFO_CLASS_HANDLE typeArgHnd = info.compCompHnd->getTypeInstantiationArgument(typeHnd, 0); @@ -409,16 +391,17 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH if ((simdBaseJitType < CORINFO_TYPE_BYTE) || (simdBaseJitType > CORINFO_TYPE_DOUBLE)) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } if (!compOpportunisticallyDependsOn(InstructionSet_AVX)) { // We must treat as a regular struct if AVX isn't supported - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } JITDUMP(" Found Vector256<%s>\n", varTypeName(JitType2PreciseVarType(simdBaseJitType))); + type = TYP_SIMD32; break; } @@ -426,7 +409,7 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { if (strcmp(className, "Vector512`1") != 0) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } CORINFO_CLASS_HANDLE typeArgHnd = info.compCompHnd->getTypeInstantiationArgument(typeHnd, 0); @@ -434,40 +417,84 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH if ((simdBaseJitType < CORINFO_TYPE_BYTE) || (simdBaseJitType > CORINFO_TYPE_DOUBLE)) { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } if (!compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // We must treat as a regular struct if AVX512 isn't supported - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } JITDUMP(" Found Vector512<%s>\n", varTypeName(JitType2PreciseVarType(simdBaseJitType))); + type = TYP_SIMD64; break; } #endif // TARGET_XARCH default: { - return CORINFO_TYPE_UNDEF; + return TYP_UNDEF; } } } #endif // FEATURE_HW_INTRINSICS - if (sizeBytes != nullptr) + if (baseType != nullptr) { - *sizeBytes = size; + *baseType = simdBaseJitType; } if (simdBaseJitType != CORINFO_TYPE_UNDEF) { - assert(size == info.compCompHnd->getClassSize(typeHnd)); + assert(genTypeSize(type) == info.compCompHnd->getClassSize(typeHnd)); setUsesSIMDTypes(true); } - return simdBaseJitType; + return type; +} + +//---------------------------------------------------------------------------------- +// Return the base type and size of SIMD vector type given its type handle. +// +// Arguments: +// typeHnd - The handle of the type we're interested in. +// sizeBytes - out param +// +// Return Value: +// base type of SIMD vector. +// sizeBytes if non-null is set to size in bytes. +// +// Notes: +// If the size of the struct is already known call structSizeMightRepresentSIMDType +// to determine if this api needs to be called. +// +// The type handle passed here can only be used in a subset of JIT-EE calls +// since it may be called by promotion during AOT of a method that does +// not version with SPC. See CORINFO_TYPE_LAYOUT_NODE for the contract on +// the supported JIT-EE calls. +// +// TODO-Throughput: current implementation parses class name to find base type. Change +// this when we implement SIMD intrinsic identification for the final +// product. +// +CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes /*= nullptr */) +{ + CorInfoType baseType = CORINFO_TYPE_UNDEF; + + if (sizeBytes != nullptr) + { + *sizeBytes = 0; + } + + var_types type = getSIMDType(typeHnd, &baseType); + + if (sizeBytes != nullptr && type != TYP_UNDEF) + { + *sizeBytes = genTypeSize(type); + } + + return baseType; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h index 865c177bc7bc32..627b5e1069ce26 100644 --- a/src/coreclr/jit/typelist.h +++ b/src/coreclr/jit/typelist.h @@ -4,6 +4,9 @@ #define GCS EA_GCREF #define BRS EA_BYREF #define EPS EA_PTRSIZE +#ifdef TARGET_ARM64 +#define EAS EA_SCALABLE +#endif #define PS TARGET_POINTER_SIZE #define PST (TARGET_POINTER_SIZE / sizeof(int)) @@ -63,9 +66,15 @@ DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, 16,16, 16, 4,16, VTR_FLOAT, available #if defined(TARGET_XARCH) DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, 32,32, 32, 8,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD64 ,"simd64" , TYP_SIMD64, 64,64, 64, 16,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) +#elif defined(TARGET_ARM64) +DEF_TP(SIMDSV ,"simdsv" , TYP_SIMDSV, 0,EAS,EAS, 0,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) +#ifdef TARGET_ARM64 +DEF_TP(MASK ,"mask" , TYP_MASK, 0,EAS,EAS, 0, 8, VTR_MASK, availableMaskRegs, RBM_MSK_CALLEE_SAVED, RBM_MSK_CALLEE_TRASH, VTF_S) +#else DEF_TP(MASK ,"mask" , TYP_MASK, 8, 8, 8, 2, 8, VTR_MASK, availableMaskRegs, RBM_MSK_CALLEE_SAVED, RBM_MSK_CALLEE_TRASH, VTF_S) +#endif #endif // FEATURE_MASKED_HW_INTRINSICS #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index e2de675a651578..d5be462aebe8b4 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -1701,6 +1701,9 @@ ValueNumStore::Chunk::Chunk(CompAllocator alloc, ValueNum* pNextBaseVN, var_type } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { m_defs = new (alloc) Alloc::Type[ChunkSize]; break; @@ -1986,6 +1989,9 @@ ValueNum ValueNumStore::VNForGenericCon(var_types typ, uint8_t* cnsVal) return VNForSimd12Con(val); } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { READ_VALUE(simd16_t); return VNForSimd16Con(val); @@ -2108,6 +2114,9 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ) } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: // TODO-SVE: Implement scalable vector constant +#endif { return VNForSimd16Con(simd16_t::Zero()); } @@ -2205,6 +2214,9 @@ ValueNum ValueNumStore::VNAllBitsForType(var_types typ, unsigned elementCount) } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { return VNForSimd16Con(simd16_t::AllBitsSet()); } @@ -2321,6 +2333,9 @@ ValueNum ValueNumStore::VNBroadcastForSimdType(var_types simdType, var_types sim } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { simd16_t result = BroadcastConstantToSimd(this, simdBaseType, valVN); return VNForSimd16Con(result); @@ -2386,7 +2401,11 @@ bool ValueNumStore::VNIsVectorNaN(var_types simdType, var_types simdBaseType, Va } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { + simdType = TYP_SIMD16; simd16_t tmp = GetConstantSimd16(valVN); memcpy(&vector, &tmp, genTypeSize(simdType)); break; @@ -2452,7 +2471,11 @@ bool ValueNumStore::VNIsVectorNegativeZero(var_types simdType, var_types simdBas } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { + simdType = TYP_SIMD16; simd16_t tmp = GetConstantSimd16(valVN); memcpy(&vector, &tmp, genTypeSize(simdType)); break; @@ -4509,6 +4532,18 @@ ValueNum ValueNumStore::EvalCastForConstantArgs(var_types typ, VNFunc func, Valu unreached(); } } +#ifdef TARGET_ARM64 + case TYP_SIMD16: + case TYP_SIMDSV: + { + // TODO-SVE: VN for constant SIMDSV is borrowing from SIMD16, + // does this need to change? + assert(castToType == TYP_SIMD16 || castToType == TYP_SIMDSV); + simd16_t arg0Val = GetConstantSimd16(arg0VN); + return VNForSimd16Con(arg0Val); + } +#endif + default: unreached(); } @@ -7547,6 +7582,9 @@ ValueNum EvaluateUnarySimd( } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { simd16_t arg0 = GetConstantSimd16(vns, baseType, arg0VN); @@ -7613,6 +7651,9 @@ ValueNum EvaluateBinarySimd(ValueNumStore* vns, } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { simd16_t arg0 = GetConstantSimd16(vns, baseType, arg0VN); simd16_t arg1 = GetConstantSimd16(vns, baseType, arg1VN); @@ -7743,6 +7784,9 @@ ValueNum EvaluateSimdGetElement( } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { return EvaluateSimdGetElement(vns, baseType, vns->GetConstantSimd16(arg0VN), arg1); } @@ -7787,6 +7831,9 @@ ValueNum EvaluateSimdCvtMaskToVector(ValueNumStore* vns, var_types simdType, var } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { simd16_t result = {}; EvaluateSimdCvtMaskToVector(baseType, &result, arg0); @@ -7837,6 +7884,9 @@ ValueNum EvaluateSimdCvtVectorToMask(ValueNumStore* vns, var_types simdType, var } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { simd16_t arg0 = GetConstantSimd16(vns, baseType, arg0VN); EvaluateSimdCvtVectorToMask(baseType, &result, arg0); @@ -8876,7 +8926,10 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( case NI_AdvSimd_MultiplyByScalar: case NI_AdvSimd_Arm64_MultiplyByScalar: { - assert((TypeOfVN(arg0VN) == type) && (TypeOfVN(arg1VN) == TYP_SIMD8)); + // TODO-SVE: We shouldn't see this intrinsic operating on Vector after porting to SVE + assert(TypeOfVN(arg0VN) == type || (type == TYP_SIMDSV && TypeOfVN(arg0VN) == TYP_SIMD16 && + genTypeSize(TYP_SIMDSV) == genTypeSize(TYP_SIMD16))); + assert(TypeOfVN(arg1VN) == TYP_SIMD8); if (!varTypeIsFloating(baseType)) { @@ -9150,6 +9203,9 @@ ValueNum EvaluateSimdWithElementFloating( } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { simd16_t result = {}; EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd16(arg0VN), arg1, arg2); @@ -9204,6 +9260,9 @@ ValueNum EvaluateSimdWithElementIntegral( } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { simd16_t result = {}; EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd16(arg0VN), arg1, arg2); @@ -10306,6 +10365,9 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: +#endif { simd16_t cnsVal = GetConstantSimd16(vn); printf("Simd16Cns[0x%08x, 0x%08x, 0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1], cnsVal.u32[2], @@ -11913,6 +11975,9 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) } case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDSV: // TODO-SVE: Implement scalable vector constant +#endif { simd16_t simd16Val; memcpy(&simd16Val, &tree->AsVecCon()->gtSimdVal, sizeof(simd16_t)); @@ -13462,7 +13527,16 @@ void Compiler::fgValueNumberCastTree(GenTree* tree) bool srcIsUnsigned = ((tree->gtFlags & GTF_UNSIGNED) != 0); bool hasOverflowCheck = tree->gtOverflowEx(); - assert(genActualType(castToType) == genActualType(tree->TypeGet())); // Ensure that the resultType is correct + // Ensure that the resultType is correct +#ifdef TARGET_ARM64 + // We can truncate scalable vectors to any SIMD type, and can zero + // extend SIMD types to a scalable vector. + assert(genActualType(castToType) == genActualType(tree->TypeGet()) || + (tree->TypeGet() == TYP_SIMDSV && varTypeIsSIMD(castToType)) || + (varTypeIsSIMD(tree->TypeGet()) && castToType == TYP_SIMDSV)); +#else + assert(genActualType(castToType) == genActualType(tree->TypeGet())); +#endif tree->gtVNPair = vnStore->VNPairForCast(srcVNPair, castToType, castFromType, srcIsUnsigned, hasOverflowCheck); } diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 70fc6ef34c6b4b..3cb6dc07784324 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -64,6 +64,7 @@ struct ArgLocDesc case CORINFO_HFA_ELEM_DOUBLE: return 8; case CORINFO_HFA_ELEM_VECTOR64: return 8; case CORINFO_HFA_ELEM_VECTOR128: return 16; + case CORINFO_HFA_ELEM_VECTORT: return EEJitManager::GetSizeOfVectorT(); default: _ASSERTE(!"Invalid HFA Type"); return 0; } } diff --git a/src/coreclr/vm/class.cpp b/src/coreclr/vm/class.cpp index f42793017ee9b4..d801f4e1aca432 100644 --- a/src/coreclr/vm/class.cpp +++ b/src/coreclr/vm/class.cpp @@ -1684,45 +1684,60 @@ bool MethodTable::IsHFA() #endif // !FEATURE_HFA //******************************************************************************* -int MethodTable::GetVectorSize() +CorInfoHFAElemType MethodTable::GetVectorHFA() { // This is supported for finding HVA types for Arm64. In order to support the altjit, // we support this on 64-bit platforms (i.e. Arm64 and X64). + CorInfoHFAElemType hfaType = CORINFO_HFA_ELEM_NONE; #ifdef TARGET_64BIT if (IsIntrinsicType()) { LPCUTF8 namespaceName; LPCUTF8 className = GetFullyQualifiedNameInfo(&namespaceName); - int vectorSize = 0; if (strcmp(className, "Vector`1") == 0) { _ASSERTE(strcmp(namespaceName, "System.Numerics") == 0); - vectorSize = GetNumInstanceFieldBytes(); +#ifdef TARGET_ARM64 + hfaType = ExecutionManager::GetEEJitManager()->UseScalableVectorT() ? CORINFO_HFA_ELEM_VECTORT : CORINFO_HFA_ELEM_VECTOR128; +#else + switch (GetNumInstanceFieldBytes()) + { + case 8: + hfaType = CORINFO_HFA_ELEM_VECTOR64; + break; + case 16: + hfaType = CORINFO_HFA_ELEM_VECTOR128; + break; + default: + _ASSERTE(false); + break; + } +#endif } else if (strcmp(className, "Vector128`1") == 0) { _ASSERTE(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0); - vectorSize = 16; + hfaType = CORINFO_HFA_ELEM_VECTOR128; } else if (strcmp(className, "Vector64`1") == 0) { _ASSERTE(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0); - vectorSize = 8; + hfaType = CORINFO_HFA_ELEM_VECTOR64; } - if (vectorSize != 0) + if (hfaType != CORINFO_HFA_ELEM_NONE) { // We need to verify that T (the element or "base" type) is a primitive type. TypeHandle typeArg = GetInstantiation()[0]; CorElementType corType = typeArg.GetSignatureCorElementType(); - if (((corType >= ELEMENT_TYPE_I1) && (corType <= ELEMENT_TYPE_R8)) || (corType == ELEMENT_TYPE_I) || (corType == ELEMENT_TYPE_U)) + if (!(((corType >= ELEMENT_TYPE_I1) && (corType <= ELEMENT_TYPE_R8)) || (corType == ELEMENT_TYPE_I) || (corType == ELEMENT_TYPE_U))) { - return vectorSize; + return CORINFO_HFA_ELEM_NONE; } } } #endif // TARGET_64BIT - return 0; + return hfaType; } //******************************************************************************* @@ -1744,10 +1759,11 @@ CorInfoHFAElemType MethodTable::GetHFAType() _ASSERTE(pMT->IsValueType()); _ASSERTE(pMT->GetNumInstanceFields() > 0); - int vectorSize = pMT->GetVectorSize(); - if (vectorSize != 0) + CorInfoHFAElemType hfaType = pMT->GetVectorHFA(); + + if (hfaType != CORINFO_HFA_ELEM_NONE) { - return (vectorSize == 8) ? CORINFO_HFA_ELEM_VECTOR64 : CORINFO_HFA_ELEM_VECTOR128; + return hfaType; } PTR_FieldDesc pFirstField = pMT->GetApproxFieldDescListRaw(); @@ -1816,7 +1832,7 @@ EEClass::CheckForHFA() // The opaque Vector types appear to have multiple fields, but need to be treated // as an opaque type of a single vector. - if (GetMethodTable()->GetVectorSize() != 0) + if (GetMethodTable()->GetVectorHFA() != CORINFO_HFA_ELEM_NONE) { #if defined(FEATURE_HFA) GetMethodTable()->SetIsHFA(); @@ -1842,27 +1858,13 @@ EEClass::CheckForHFA() { case ELEMENT_TYPE_VALUETYPE: { -#ifdef TARGET_ARM64 MethodTable* pMT; #if defined(FEATURE_HFA) pMT = pByValueClassCache[i]; #else pMT = pFD->LookupApproxFieldTypeHandle().AsMethodTable(); #endif - int thisElemSize = pMT->GetVectorSize(); - if (thisElemSize != 0) - { - fieldHFAType = (thisElemSize == 8) ? CORINFO_HFA_ELEM_VECTOR64 : CORINFO_HFA_ELEM_VECTOR128; - } - else -#endif // TARGET_ARM64 - { -#if defined(FEATURE_HFA) - fieldHFAType = pByValueClassCache[i]->GetHFAType(); -#else - fieldHFAType = pFD->LookupApproxFieldTypeHandle().AsMethodTable()->GetHFAType(); -#endif - } + fieldHFAType = pMT->GetHFAType(); int requiredAlignment; switch (fieldHFAType) @@ -1875,6 +1877,7 @@ EEClass::CheckForHFA() requiredAlignment = 8; break; case CORINFO_HFA_ELEM_VECTOR128: + case CORINFO_HFA_ELEM_VECTORT: requiredAlignment = 16; break; default: @@ -1946,6 +1949,10 @@ EEClass::CheckForHFA() case CORINFO_HFA_ELEM_VECTOR128: elemSize = 16; break; + case CORINFO_HFA_ELEM_VECTORT: + elemSize = EEJitManager::GetSizeOfVectorT(); + _ASSERTE(elemSize != 0); + break; #endif default: // ELEMENT_TYPE_END diff --git a/src/coreclr/vm/classlayoutinfo.cpp b/src/coreclr/vm/classlayoutinfo.cpp index 62ca8f733a2095..61665628d13f81 100644 --- a/src/coreclr/vm/classlayoutinfo.cpp +++ b/src/coreclr/vm/classlayoutinfo.cpp @@ -1186,6 +1186,10 @@ CorInfoHFAElemType EEClassNativeLayoutInfo::GetNativeHFATypeRaw() const #ifdef TARGET_ARM64 case CORINFO_HFA_ELEM_VECTOR64: elemSize = 8; break; case CORINFO_HFA_ELEM_VECTOR128: elemSize = 16; break; + case CORINFO_HFA_ELEM_VECTORT: + elemSize = EEJitManager::GetSizeOfVectorT(); + _ASSERTE(elemSize != 0); + break; #endif default: _ASSERTE(!"Invalid HFA Type"); } diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 0862961e8f1b62..cdaf5574eb0e45 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -2188,6 +2188,24 @@ class EEJitManager final : public EECodeGenManager return m_CPUCompileFlags; } + inline bool UseScalableVectorT() + { + LIMITED_METHOD_CONTRACT; +#ifdef TARGET_ARM64 + // Vector length discovery is currently dependent on running directly on Arm64. + return CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_JitUseScalableVectorT) + && m_CPUCompileFlags.IsSet(InstructionSet_Sve_Arm64); +#else + return false; +#endif + } + + static uint32_t GetSizeOfVectorT() + { + LIMITED_METHOD_CONTRACT; + return CoreLibBinder::GetClass(CLASS__VECTORT)->GetNumInstanceFieldBytes(); + } + private : Crst m_JitLoadLock; diff --git a/src/coreclr/vm/methodtable.h b/src/coreclr/vm/methodtable.h index 7d21c93a5ebfc6..cca82d4d3611d8 100644 --- a/src/coreclr/vm/methodtable.h +++ b/src/coreclr/vm/methodtable.h @@ -2001,8 +2001,8 @@ class MethodTable bool IsHFA(); #endif // FEATURE_HFA - // Returns the size in bytes of this type if it is a HW vector type; 0 otherwise. - int GetVectorSize(); + // Returns the HFA type if it is a HW vector type. + CorInfoHFAElemType GetVectorHFA(); // Get the HFA type. This is supported both with FEATURE_HFA, in which case it // depends on the cached bit on the class, or without, in which case it is recomputed diff --git a/src/coreclr/vm/methodtablebuilder.cpp b/src/coreclr/vm/methodtablebuilder.cpp index 1195153c5b12b8..8854cece09042d 100644 --- a/src/coreclr/vm/methodtablebuilder.cpp +++ b/src/coreclr/vm/methodtablebuilder.cpp @@ -1157,6 +1157,10 @@ MethodTableBuilder::CopyParentVtable() } } +#ifdef TARGET_ARM64 +extern "C" uint64_t GetSveLengthFromOS(); +#endif + //******************************************************************************* // Determine if this is the special SIMD type System.Numerics.Vector, whose // size is determined dynamically based on the hardware and the presence of JIT @@ -1169,7 +1173,7 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() { STANDARD_VM_CONTRACT; -#if defined(TARGET_X86) || defined(TARGET_AMD64) +#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_ARM64) if (!bmtProp->fIsIntrinsicType) return false; @@ -1185,24 +1189,37 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() if (strcmp(className, "Vector`1") != 0 || strcmp(nameSpace, "System.Numerics") != 0) return false; - CORJIT_FLAGS CPUCompileFlags = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags(); - uint32_t numInstanceFieldBytes = 16; + uint32_t vectorTSize = 0; + CORJIT_FLAGS CPUCompileFlags = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags(); +#if defined(TARGET_X86) || defined(TARGET_AMD64) if (CPUCompileFlags.IsSet(InstructionSet_VectorT512)) { - numInstanceFieldBytes = 64; + vectorTSize = 64; } else if (CPUCompileFlags.IsSet(InstructionSet_VectorT256)) { - numInstanceFieldBytes = 32; + vectorTSize = 32; + } +#elif defined(TARGET_ARM64) + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_JitUseScalableVectorT) + && CPUCompileFlags.IsSet(InstructionSet_Sve_Arm64)) + { + vectorTSize = (uint32_t) GetSveLengthFromOS(); + } + else if (CPUCompileFlags.IsSet(InstructionSet_VectorT128)) + { + vectorTSize = 16; } +#endif - if (numInstanceFieldBytes != 16) + if (vectorTSize > 0 && vectorTSize != 16) { - bmtFP->NumInstanceFieldBytes = numInstanceFieldBytes; + bmtFP->NumInstanceFieldBytes = vectorTSize; return true; } -#endif // TARGET_X86 || TARGET_AMD64 + +#endif // TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 return false; } diff --git a/src/tests/JIT/opt/SVE/ConstantMasks.cs b/src/tests/JIT/opt/SVE/ConstantMasks.cs index 078e60e9b55411..a7aa2116f5c194 100644 --- a/src/tests/JIT/opt/SVE/ConstantMasks.cs +++ b/src/tests/JIT/opt/SVE/ConstantMasks.cs @@ -67,14 +67,14 @@ static void CndSelectEmbedded(Vector mask, Vector op1, Vector op2 [MethodImpl(MethodImplOptions.NoInlining)] static void CndSelectEmbeddedFalseMask(Vector op1, Vector op2) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b + //ARM64-FULL-LINE: mov {{v0.16b, v1.16b|z0.d, z1.d}} Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.AbsoluteDifference(op1, op2), op2); Consume(result); } [MethodImpl(MethodImplOptions.NoInlining)] static void CndSelectEmbeddedZero(Vector op1, Vector op2) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b + //ARM64-FULL-LINE: mov {{v0.16b, v1.16b|z0.d, z1.d}} Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.AbsoluteDifference(op1, op2), op2); Consume(result); } @@ -112,14 +112,14 @@ static void CndSelectOptionalEmbedded(Vector mask, Vector op1, Vector< [MethodImpl(MethodImplOptions.NoInlining)] static void CndSelectOptionalEmbeddedFalseMask(Vector op1, Vector op2) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b + //ARM64-FULL-LINE: mov {{v0.16b, v1.16b|z0.d, z1.d}} Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.Add(op1, op2), op2); Consume(result); } [MethodImpl(MethodImplOptions.NoInlining)] static void CndSelectOptionalEmbeddedZero(Vector op1, Vector op2) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b + //ARM64-FULL-LINE: mov {{v0.16b, v1.16b|z0.d, z1.d}} Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.Add(op1, op2), op2); Consume(result); } @@ -153,14 +153,14 @@ static void CndSelectEmbeddedOneOp(Vector mask, Vector op1) { [MethodImpl(MethodImplOptions.NoInlining)] static void CndSelectEmbeddedOneOpFalseMask(Vector dummy, Vector op1) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b + //ARM64-FULL-LINE: mov {{v0.16b, v1.16b|z0.d, z1.d}} Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.Abs(op1), op1); Consume(result); } [MethodImpl(MethodImplOptions.NoInlining)] static void CndSelectEmbeddedOneOpZero(Vector dummy, Vector op1) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b + //ARM64-FULL-LINE: mov {{v0.16b, v1.16b|z0.d, z1.d}} Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.Abs(op1), op1); Consume(result); } @@ -199,14 +199,14 @@ static void CndSelectEmbeddedReduction(Vector mask, Vector op1, Vecto [MethodImpl(MethodImplOptions.NoInlining)] static void CndSelectEmbeddedReductionFalseMask(Vector op1, Vector opf) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b + //ARM64-FULL-LINE: mov {{v0.16b, v1.16b|z0.d, z1.d}} Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt64(), Sve.AddAcross(op1), opf); Consume(result); } [MethodImpl(MethodImplOptions.NoInlining)] static void CndSelectEmbeddedReductionZero(Vector op1, Vector opf) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b + //ARM64-FULL-LINE: mov {{v0.16b, v1.16b|z0.d, z1.d}} Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.AddAcross(op1), opf); Consume(result); } diff --git a/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.cs b/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.cs index ba23ebe08f07c9..8e9f07c5039a9c 100644 --- a/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.cs +++ b/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.cs @@ -235,7 +235,7 @@ static void CndSelectEmbeddedReductionF(Vector mask, Vector op1) { [MethodImpl(MethodImplOptions.NoInlining)] static void CndSelectEmbeddedReductionZ(Vector mask, Vector op1) { //ARM64-FULL-LINE: cmpne {{p[0-9]+}}.d, {{p[0-9]+}}/z, {{z[0-9]+}}.d, #0 - //ARM64-FULL-LINE-NEXT: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s //ARM64-FULL-LINE-NEXT: movi {{v[0-9]+}}.4s, #0 //ARM64-FULL-LINE-NEXT: sel {{z[0-9]+}}.d, {{p[0-9]+}}, {{z[0-9]+}}.d, {{z[0-9]+}}.d