-
Notifications
You must be signed in to change notification settings - Fork 5.2k
Description
In the repro code below sizeof(TChar) is known during JIT, still code is suboptimal in disasm - some helper (?) method is invoked instead of instruction and SIMD vector value is placed on stack due to this.
This issue is quite notable with shifts and typecasting during concrete value calculation, but to my memory there were similar cases with other instructions.
Repro code:
[MethodImpl(MethodImplOptions.NoInlining)]
static void Test<TChar>() where TChar : unmanaged
{
Vector128<byte> v = Sse2.SetZeroVector128<byte>();
v = Sse2.ShiftRightLogical128BitLane(v, (byte)sizeof(TChar)); // <======
Console.WriteLine(Sse41.Extract(v, 0));
}
static void Main(string[] args)
{
Test<byte>();
Test<ushort>();
}Disasm:
--- ...\Program2.cs ---
Vector128 v = Sse2.SetZeroVector128();
000007FE75244A20 sub rsp,58h
000007FE75244A24 xor eax,eax
000007FE75244A26 mov qword ptr [rsp+40h],rax
000007FE75244A2B mov qword ptr [rsp+48h],rax
000007FE75244A30 pxor xmm0,xmm0
000007FE75244A34 movaps xmmword ptr [rsp+40h],xmm0
v = Sse2.ShiftRightLogical128BitLane(v, (byte)sizeof(TChar));
000007FE75244A39 movaps xmm0,xmmword ptr [rsp+40h]
000007FE75244A3E movaps xmmword ptr [rsp+30h],xmm0
000007FE75244A43 lea rcx,[rsp+40h]
000007FE75244A48 lea rdx,[rsp+20h]
000007FE75244A4D mov r8,qword ptr [rsp+30h]
000007FE75244A52 mov qword ptr [rdx],r8
000007FE75244A55 mov r8,qword ptr [rsp+38h]
000007FE75244A5A mov qword ptr [rdx+8],r8
000007FE75244A5E lea rdx,[rsp+20h]
000007FE75244A63 mov r8d,1
000007FE75244A69 call 000007FE752421A0 <=================
Console.WriteLine(Sse41.Extract(v, 0));
000007FE75244A6E movaps xmm0,xmmword ptr [rsp+40h]
000007FE75244A73 pextrb ecx,xmm0,0
000007FE75244A79 call 000007FE75241AC8
000007FE75244A7E nop
000007FE75244A7F add rsp,58h
000007FE75244A83 ret
--- No source file -------------------------------------------------------------
category:cq
theme:hardware-intrinsics
skill-level:expert
cost:large