KEMBAR78
Simplify and optimize Math(F).Round by MichalPetryka · Pull Request #98186 · dotnet/runtime · GitHub
Skip to content

Conversation

@MichalPetryka
Copy link
Contributor

@MichalPetryka MichalPetryka commented Feb 8, 2024

Simplifies and optimizes parameterized Round overloads.

Code:

internal static class RoundDisasm
{
	public static double RoundAway(double d) => Math.Round(d, MidpointRounding.AwayFromZero);
	public static float RoundAway(float f) => MathF.Round(f, MidpointRounding.AwayFromZero);
	public static double RoundTruncate(double d) => Math.Round(d, MidpointRounding.ToZero);
	public static float RoundTruncate(float f) => MathF.Round(f, MidpointRounding.ToZero);
	public static double Round(double d, MidpointRounding m) => Math.Round(d, m);
	public static float Round(float f, MidpointRounding m) => MathF.Round(f, m);
	public static double Round2(double d) => Math.Round(d, 2);
	public static float Round2(float f) => MathF.Round(f, 2);
	public static double Round4(double d) => Math.Round(d, 4, MidpointRounding.ToZero);
	public static float Round4(float f) => MathF.Round(f, 4, MidpointRounding.ToZero);
}

.NET 8

; Assembly listing for method RoundDisasm:RoundAway(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       C5F828C8             vmovaps  xmm1, xmm0
       C5F8101521000000     vmovups  xmm2, xmmword ptr [reloc @RWD00]
       C5F154CA             vandpd   xmm1, xmm1, xmm2
       62F1F518560D23000000 vorpd    xmm1, xmm1, qword ptr [reloc @RWD16] {1to2}
       C5F358C0             vaddsd   xmm0, xmm1, xmm0
       C4E3790BC00B         vroundsd xmm0, xmm0, xmm0, 11
 
G_M000_IG03:                ;; offset=0x0027
       C3                   ret      
 
RWD00  	dq	8000000000000000h, 8000000000000000h
RWD16  	dq	3FDFFFFFFFFFFFFFh	;          0.5

; Total bytes of code 40

; Assembly listing for method RoundDisasm:RoundAway(float):float (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       C5F828C8             vmovaps  xmm1, xmm0
       C5F8101521000000     vmovups  xmm2, xmmword ptr [reloc @RWD00]
       C5F054CA             vandps   xmm1, xmm1, xmm2
       62F17418560D23000000 vorps    xmm1, xmm1, dword ptr [reloc @RWD16] {1to4}
       C5F258C0             vaddss   xmm0, xmm1, xmm0
       C4E3790AC00B         vroundss xmm0, xmm0, xmm0, 11
 
G_M000_IG03:                ;; offset=0x0027
       C3                   ret      
 
RWD00  	dq	8000000080000000h, 8000000080000000h
RWD16  	dd	3EFFFFFFh		;       0.5

; Total bytes of code 40

; Assembly listing for method RoundDisasm:RoundTruncate(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       33D2                 xor      edx, edx
       41B802000000         mov      r8d, 2
 
G_M000_IG03:                ;; offset=0x000B
       FF259F041E00         tail.jmp [System.Math:Round(double,int,int):double]
 
; Total bytes of code 17

; Assembly listing for method RoundDisasm:RoundTruncate(float):float (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       33D2                 xor      edx, edx
       41B802000000         mov      r8d, 2
 
G_M000_IG03:                ;; offset=0x000B
       FF25F78F1E00         tail.jmp [System.MathF:Round(float,int,int):float]
 
; Total bytes of code 17

; Assembly listing for method RoundDisasm:Round(double,int):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       448BC2               mov      r8d, edx
       33D2                 xor      edx, edx
 
G_M000_IG03:                ;; offset=0x0008
       FF2542041E00         tail.jmp [System.Math:Round(double,int,int):double]
 
; Total bytes of code 14

; Assembly listing for method RoundDisasm:Round(float,int):float (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       448BC2               mov      r8d, edx
       33D2                 xor      edx, edx
 
G_M000_IG03:                ;; offset=0x0008
       FF25AA8F1E00         tail.jmp [System.MathF:Round(float,int,int):float]
 
; Total bytes of code 14

; Assembly listing for method RoundDisasm:Round2(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       BA02000000           mov      edx, 2
       4533C0               xor      r8d, r8d
 
G_M000_IG03:                ;; offset=0x000B
       FF25FF031E00         tail.jmp [System.Math:Round(double,int,int):double]
 
; Total bytes of code 17

; Assembly listing for method RoundDisasm:Round2(float):float (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       BA02000000           mov      edx, 2
       4533C0               xor      r8d, r8d
 
G_M000_IG03:                ;; offset=0x000B
       FF25578F1E00         tail.jmp [System.MathF:Round(float,int,int):float]
 
; Total bytes of code 17

; Assembly listing for method RoundDisasm:Round4(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       BA04000000           mov      edx, 4
       41B802000000         mov      r8d, 2
 
G_M000_IG03:                ;; offset=0x000E
       FF259C031E00         tail.jmp [System.Math:Round(double,int,int):double]
 
; Total bytes of code 20

; Assembly listing for method RoundDisasm:Round4(float):float (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       BA04000000           mov      edx, 4
       41B802000000         mov      r8d, 2
 
G_M000_IG03:                ;; offset=0x000E
       FF25F48E1E00         tail.jmp [System.MathF:Round(float,int,int):float]
 
; Total bytes of code 20

This branch

; Assembly listing for method RoundDisasm:RoundAway(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  4,  4   )  double  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  2,  2   )  double  ->  mm0         "Inline return value spill temp"
;* V03 tmp2         [V03    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V04 tmp3         [V04    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
;* V05 tmp4         [V05    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M25588_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M25588_IG02:  ;; offset=0x0003
       C5F8100D25000000     vmovups  xmm1, xmmword ptr [reloc @RWD00]
       C5F828D0             vmovaps  xmm2, xmm0
       62F3ED08250D26000000CA vpternlogq xmm1, xmm2, xmmword ptr [reloc @RWD16], -54
       C5F358C0             vaddsd   xmm0, xmm1, xmm0
       C4E3790BC00B         vroundsd xmm0, xmm0, xmm0, 11
						;; size=33 bbWeight=1 PerfScore 15.25
G_M25588_IG03:  ;; offset=0x0024
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00
RWD00  	dq	8000000000000000h, 8000000000000000h
RWD16  	dq	3FDFFFFFFFFFFFFFh, 3FDFFFFFFFFFFFFFh


; Total bytes of code 37, prolog size 3, PerfScore 17.25, instruction count 7, allocated bytes for code 37 (MethodHash=318b9c0b) for method RoundDisasm:RoundAway(double):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:RoundAway(float):float (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  4,  4   )   float  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  2,  2   )   float  ->  mm0         "Inline return value spill temp"
;* V03 tmp2         [V03    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V04 tmp3         [V04    ] (  0,  0   )   float  ->  zero-ref    "Inline return value spill temp"
;* V05 tmp4         [V05    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M10964_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M10964_IG02:  ;; offset=0x0003
       C5F8100D25000000     vmovups  xmm1, xmmword ptr [reloc @RWD00]
       C5F828D0             vmovaps  xmm2, xmm0
       62F36D08250D26000000CA vpternlogd xmm1, xmm2, xmmword ptr [reloc @RWD16], -54
       C5F258C0             vaddss   xmm0, xmm1, xmm0
       C4E3790AC00B         vroundss xmm0, xmm0, xmm0, 11
						;; size=33 bbWeight=1 PerfScore 15.25
G_M10964_IG03:  ;; offset=0x0024
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00
RWD00  	dq	8000000080000000h, 8000000080000000h
RWD16  	dq	3EFFFFFF3EFFFFFFh, 3EFFFFFF3EFFFFFFh


; Total bytes of code 37, prolog size 3, PerfScore 17.25, instruction count 7, allocated bytes for code 37 (MethodHash=b997d52b) for method RoundDisasm:RoundAway(float):float (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:RoundTruncate(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  3,  3   )  double  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  2,  2   )  double  ->  mm0         "Inline return value spill temp"
;* V03 tmp2         [V03    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V04 tmp3         [V04    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
;* V05 tmp4         [V05    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M39156_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M39156_IG02:  ;; offset=0x0003
       C4E3790BC00B         vroundsd xmm0, xmm0, xmm0, 11
						;; size=6 bbWeight=1 PerfScore 7.00
G_M39156_IG03:  ;; offset=0x0009
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 10, prolog size 3, PerfScore 9.00, instruction count 3, allocated bytes for code 10 (MethodHash=6fe1670b) for method RoundDisasm:RoundTruncate(double):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:RoundTruncate(float):float (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  3,  3   )   float  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  2,  2   )   float  ->  mm0         "Inline return value spill temp"
;* V03 tmp2         [V03    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V04 tmp3         [V04    ] (  0,  0   )   float  ->  zero-ref    "Inline return value spill temp"
;* V05 tmp4         [V05    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M8148_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M8148_IG02:  ;; offset=0x0003
       C4E3790AC00B         vroundss xmm0, xmm0, xmm0, 11
						;; size=6 bbWeight=1 PerfScore 7.00
G_M8148_IG03:  ;; offset=0x0009
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 10, prolog size 3, PerfScore 9.00, instruction count 3, allocated bytes for code 10 (MethodHash=41fee02b) for method RoundDisasm:RoundTruncate(float):float (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round(double,int):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T01] (  8,  5   )  double  ->  mm0         single-def
;  V01 arg1         [V01,T00] (  5,  4   )     int  ->  rbx         single-def
;  V02 OutArgs      [V02    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V03 tmp1         [V03,T02] (  6,  3.50)  double  ->  mm0         "Inline return value spill temp"
;* V04 tmp2         [V04    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V05 tmp3         [V05    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
;  V06 tmp4         [V06,T03] (  2,  0   )     ref  ->  rdx         single-def "argument with side effect"
;
; Lcl frame size = 32

G_M6149_IG01:  ;; offset=0x0000
       53                   push     rbx
       4883EC20             sub      rsp, 32
       C5F877               vzeroupper 
       8BDA                 mov      ebx, edx
						;; size=10 bbWeight=1 PerfScore 2.50
G_M6149_IG02:  ;; offset=0x000A
       83FB04               cmp      ebx, 4
       775F                 ja       SHORT G_M6149_IG09
       8BCB                 mov      ecx, ebx
       488D1578000000       lea      rdx, [reloc @RWD00]
       8B148A               mov      edx, dword ptr [rdx+4*rcx]
       488D05E8FFFFFF       lea      rax, G_M6149_IG02
       4803D0               add      rdx, rax
       FFE2                 jmp      rdx
						;; size=29 bbWeight=1 PerfScore 7.25
G_M6149_IG03:  ;; offset=0x0027
       C5F8100D81000000     vmovups  xmm1, xmmword ptr [reloc @RWD32]
       C5F828D0             vmovaps  xmm2, xmm0
       62F3ED08250D82000000CA vpternlogq xmm1, xmm2, xmmword ptr [reloc @RWD48], -54
       C5F358C0             vaddsd   xmm0, xmm1, xmm0
       C4E3790BC00B         vroundsd xmm0, xmm0, xmm0, 11
       EB1E                 jmp      SHORT G_M6149_IG08
						;; size=35 bbWeight=0.50 PerfScore 8.62
G_M6149_IG04:  ;; offset=0x004A
       C4E3790BC004         vroundsd xmm0, xmm0, xmm0, 4
       EB16                 jmp      SHORT G_M6149_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M6149_IG05:  ;; offset=0x0052
       C4E3790BC00B         vroundsd xmm0, xmm0, xmm0, 11
       EB0E                 jmp      SHORT G_M6149_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M6149_IG06:  ;; offset=0x005A
       C4E3790BC009         vroundsd xmm0, xmm0, xmm0, 9
       EB06                 jmp      SHORT G_M6149_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M6149_IG07:  ;; offset=0x0062
       C4E3790BC00A         vroundsd xmm0, xmm0, xmm0, 10
						;; size=6 bbWeight=0.50 PerfScore 3.50
G_M6149_IG08:  ;; offset=0x0068
       4883C420             add      rsp, 32
       5B                   pop      rbx
       C3                   ret      
						;; size=6 bbWeight=1 PerfScore 1.75
G_M6149_IG09:  ;; offset=0x006E
       B9840B0000           mov      ecx, 0xB84
       48BA00400F6CFD7F0000 mov      rdx, 0x7FFD6C0F4000
       E83EE64D5F           call     CORINFO_HELP_STRCNS
       488BD0               mov      rdx, rax
       8BCB                 mov      ecx, ebx
       FF15E3957C00         call     [System.ThrowHelper:ThrowArgumentException_InvalidEnumValue[int](int,System.String)]
       CC                   int3     
						;; size=32 bbWeight=0 PerfScore 0.00
RWD00  	dd	00000040h ; case G_M6149_IG04
       	dd	0000001Dh ; case G_M6149_IG03
       	dd	00000048h ; case G_M6149_IG05
       	dd	00000050h ; case G_M6149_IG06
       	dd	00000058h ; case G_M6149_IG07
RWD20  	dd	00000000h, 00000000h, 00000000h
RWD32  	dq	8000000000000000h, 8000000000000000h
RWD48  	dq	3FDFFFFFFFFFFFFFh, 3FDFFFFFFFFFFFFFh


; Total bytes of code 142, prolog size 8, PerfScore 37.12, instruction count 35, allocated bytes for code 142 (MethodHash=d24ae7fa) for method RoundDisasm:Round(double,int):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round(float,int):float (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T01] (  8,  5   )   float  ->  mm0         single-def
;  V01 arg1         [V01,T00] (  5,  4   )     int  ->  rbx         single-def
;  V02 OutArgs      [V02    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V03 tmp1         [V03,T02] (  6,  3.50)   float  ->  mm0         "Inline return value spill temp"
;* V04 tmp2         [V04    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V05 tmp3         [V05    ] (  0,  0   )   float  ->  zero-ref    "Inline return value spill temp"
;  V06 tmp4         [V06,T03] (  2,  0   )     ref  ->  rdx         single-def "argument with side effect"
;
; Lcl frame size = 32

G_M10373_IG01:  ;; offset=0x0000
       53                   push     rbx
       4883EC20             sub      rsp, 32
       C5F877               vzeroupper 
       8BDA                 mov      ebx, edx
						;; size=10 bbWeight=1 PerfScore 2.50
G_M10373_IG02:  ;; offset=0x000A
       83FB04               cmp      ebx, 4
       775F                 ja       SHORT G_M10373_IG09
       8BCB                 mov      ecx, ebx
       488D1578000000       lea      rdx, [reloc @RWD00]
       8B148A               mov      edx, dword ptr [rdx+4*rcx]
       488D05E8FFFFFF       lea      rax, G_M10373_IG02
       4803D0               add      rdx, rax
       FFE2                 jmp      rdx
						;; size=29 bbWeight=1 PerfScore 7.25
G_M10373_IG03:  ;; offset=0x0027
       C5F8100D81000000     vmovups  xmm1, xmmword ptr [reloc @RWD32]
       C5F828D0             vmovaps  xmm2, xmm0
       62F36D08250D82000000CA vpternlogd xmm1, xmm2, xmmword ptr [reloc @RWD48], -54
       C5F258C0             vaddss   xmm0, xmm1, xmm0
       C4E3790AC00B         vroundss xmm0, xmm0, xmm0, 11
       EB1E                 jmp      SHORT G_M10373_IG08
						;; size=35 bbWeight=0.50 PerfScore 8.62
G_M10373_IG04:  ;; offset=0x004A
       C4E3790AC004         vroundss xmm0, xmm0, xmm0, 4
       EB16                 jmp      SHORT G_M10373_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M10373_IG05:  ;; offset=0x0052
       C4E3790AC00B         vroundss xmm0, xmm0, xmm0, 11
       EB0E                 jmp      SHORT G_M10373_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M10373_IG06:  ;; offset=0x005A
       C4E3790AC009         vroundss xmm0, xmm0, xmm0, 9
       EB06                 jmp      SHORT G_M10373_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M10373_IG07:  ;; offset=0x0062
       C4E3790AC00A         vroundss xmm0, xmm0, xmm0, 10
						;; size=6 bbWeight=0.50 PerfScore 3.50
G_M10373_IG08:  ;; offset=0x0068
       4883C420             add      rsp, 32
       5B                   pop      rbx
       C3                   ret      
						;; size=6 bbWeight=1 PerfScore 1.75
G_M10373_IG09:  ;; offset=0x006E
       B9840B0000           mov      ecx, 0xB84
       48BA00400F6CFD7F0000 mov      rdx, 0x7FFD6C0F4000
       E84EE54D5F           call     CORINFO_HELP_STRCNS
       488BD0               mov      rdx, rax
       8BCB                 mov      ecx, ebx
       FF15F3947C00         call     [System.ThrowHelper:ThrowArgumentException_InvalidEnumValue[int](int,System.String)]
       CC                   int3     
						;; size=32 bbWeight=0 PerfScore 0.00
RWD00  	dd	00000040h ; case G_M10373_IG04
       	dd	0000001Dh ; case G_M10373_IG03
       	dd	00000048h ; case G_M10373_IG05
       	dd	00000050h ; case G_M10373_IG06
       	dd	00000058h ; case G_M10373_IG07
RWD20  	dd	00000000h, 00000000h, 00000000h
RWD32  	dq	8000000080000000h, 8000000080000000h
RWD48  	dq	3EFFFFFF3EFFFFFFh, 3EFFFFFF3EFFFFFFh


; Total bytes of code 142, prolog size 8, PerfScore 37.12, instruction count 35, allocated bytes for code 142 (MethodHash=e4b5d77a) for method RoundDisasm:Round(float,int):float (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round2(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 2 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T01] (  4,  3.50)  double  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T00] (  4,  7   )  double  ->  mm1         "Inlining Arg"
;* V03 tmp2         [V03    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[double]>
;* V04 tmp3         [V04,T04] (  0,  0   )  double  ->  zero-ref    "Inline stloc first use temp"
;* V05 tmp4         [V05    ] (  0,  0   )  struct (16) zero-ref    "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[double]>
;  V06 tmp5         [V06,T03] (  2,  1   )  double  ->  mm1         "Inline return value spill temp"
;* V07 tmp6         [V07    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;  V08 tmp7         [V08,T02] (  2,  2   )  double  ->  mm1         "Inlining Arg"
;* V09 tmp8         [V09    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
;* V10 tmp9         [V10    ] (  0,  0   )   byref  ->  zero-ref    single-def "field V03._reference (fldOffset=0x0)" P-INDEP
;* V11 tmp10        [V11    ] (  0,  0   )     int  ->  zero-ref    "field V03._length (fldOffset=0x8)" P-INDEP
;* V12 tmp11        [V12    ] (  0,  0   )   byref  ->  zero-ref    single-def "field V05._reference (fldOffset=0x0)" P-INDEP
;* V13 tmp12        [V13    ] (  0,  0   )     int  ->  zero-ref    "field V05._length (fldOffset=0x8)" P-INDEP
;* V14 tmp13        [V14    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M8040_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M8040_IG02:  ;; offset=0x0003
       C5F828C8             vmovaps  xmm1, xmm0
       C5F0541531000000     vandps   xmm2, xmm1, xmmword ptr [reloc @RWD00]
       C5FB101D39000000     vmovsd   xmm3, qword ptr [reloc @RWD16]
       C5F92EDA             vucomisd xmm3, xmm2
       7616                 jbe      SHORT G_M8040_IG04
						;; size=26 bbWeight=1 PerfScore 8.25
G_M8040_IG03:  ;; offset=0x001D
       C5FB590D33000000     vmulsd   xmm1, xmm0, qword ptr [reloc @RWD24]
       C4E3710BC904         vroundsd xmm1, xmm1, xmm1, 4
       C5F35E0D25000000     vdivsd   xmm1, xmm1, qword ptr [reloc @RWD24]
						;; size=22 bbWeight=0.50 PerfScore 13.00
G_M8040_IG04:  ;; offset=0x0033
       C5F828C1             vmovaps  xmm0, xmm1
						;; size=4 bbWeight=1 PerfScore 0.25
G_M8040_IG05:  ;; offset=0x0037
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00
RWD00  	dq	7FFFFFFFFFFFFFFFh, 7FFFFFFFFFFFFFFFh
RWD16  	dq	4341C37937E08000h	;        1e+16
RWD24  	dq	4059000000000000h	;          100


; Total bytes of code 56, prolog size 3, PerfScore 23.50, instruction count 11, allocated bytes for code 56 (MethodHash=166ce097) for method RoundDisasm:Round2(double):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round2(float):float (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 2 single block inlinees; 3 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T01] (  4,  3.50)   float  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T00] (  4,  7   )   float  ->  mm1         "Inlining Arg"
;* V03 tmp2         [V03    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[float]>
;* V04 tmp3         [V04,T04] (  0,  0   )   float  ->  zero-ref    "Inline stloc first use temp"
;* V05 tmp4         [V05    ] (  0,  0   )  struct (16) zero-ref    "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[float]>
;  V06 tmp5         [V06,T03] (  2,  1   )   float  ->  mm1         "Inline return value spill temp"
;* V07 tmp6         [V07    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;  V08 tmp7         [V08,T02] (  2,  2   )   float  ->  mm1         "Inlining Arg"
;* V09 tmp8         [V09    ] (  0,  0   )   float  ->  zero-ref    "Inline return value spill temp"
;* V10 tmp9         [V10    ] (  0,  0   )   byref  ->  zero-ref    single-def "field V03._reference (fldOffset=0x0)" P-INDEP
;* V11 tmp10        [V11    ] (  0,  0   )     int  ->  zero-ref    "field V03._length (fldOffset=0x8)" P-INDEP
;* V12 tmp11        [V12    ] (  0,  0   )   byref  ->  zero-ref    single-def "field V05._reference (fldOffset=0x0)" P-INDEP
;* V13 tmp12        [V13    ] (  0,  0   )     int  ->  zero-ref    "field V05._length (fldOffset=0x8)" P-INDEP
;* V14 tmp13        [V14    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M12104_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M12104_IG02:  ;; offset=0x0003
       C5F828C8             vmovaps  xmm1, xmm0
       C5F0541531000000     vandps   xmm2, xmm1, xmmword ptr [reloc @RWD00]
       C5FA101D39000000     vmovss   xmm3, dword ptr [reloc @RWD16]
       C5F82EDA             vucomiss xmm3, xmm2
       7616                 jbe      SHORT G_M12104_IG04
						;; size=26 bbWeight=1 PerfScore 8.25
G_M12104_IG03:  ;; offset=0x001D
       C5FA590D2F000000     vmulss   xmm1, xmm0, dword ptr [reloc @RWD20]
       C4E3710AC904         vroundss xmm1, xmm1, xmm1, 4
       C5F25E0D21000000     vdivss   xmm1, xmm1, dword ptr [reloc @RWD20]
						;; size=22 bbWeight=0.50 PerfScore 12.00
G_M12104_IG04:  ;; offset=0x0033
       C5F828C1             vmovaps  xmm0, xmm1
						;; size=4 bbWeight=1 PerfScore 0.25
G_M12104_IG05:  ;; offset=0x0037
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00
RWD00  	dq	7FFFFFFF7FFFFFFFh, 7FFFFFFF7FFFFFFFh
RWD16  	dd	4CBEBC20h		;     1e+08
RWD20  	dd	42C80000h		;       100


; Total bytes of code 56, prolog size 3, PerfScore 22.50, instruction count 11, allocated bytes for code 56 (MethodHash=a66ed0b7) for method RoundDisasm:Round2(float):float (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round4(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 1 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T01] (  4,  3.50)  double  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T00] (  4,  7   )  double  ->  mm1         "Inlining Arg"
;* V03 tmp2         [V03    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[double]>
;* V04 tmp3         [V04,T04] (  0,  0   )  double  ->  zero-ref    "Inline stloc first use temp"
;* V05 tmp4         [V05    ] (  0,  0   )  struct (16) zero-ref    "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[double]>
;  V06 tmp5         [V06,T03] (  2,  1   )  double  ->  mm1         "Inline return value spill temp"
;* V07 tmp6         [V07    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;  V08 tmp7         [V08,T02] (  2,  2   )  double  ->  mm1         "Inlining Arg"
;* V09 tmp8         [V09    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
;* V10 tmp9         [V10    ] (  0,  0   )   byref  ->  zero-ref    single-def "field V03._reference (fldOffset=0x0)" P-INDEP
;* V11 tmp10        [V11    ] (  0,  0   )     int  ->  zero-ref    "field V03._length (fldOffset=0x8)" P-INDEP
;* V12 tmp11        [V12    ] (  0,  0   )   byref  ->  zero-ref    single-def "field V05._reference (fldOffset=0x0)" P-INDEP
;* V13 tmp12        [V13    ] (  0,  0   )     int  ->  zero-ref    "field V05._length (fldOffset=0x8)" P-INDEP
;* V14 tmp13        [V14    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M22062_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M22062_IG02:  ;; offset=0x0003
       C5F828C8             vmovaps  xmm1, xmm0
       C5F0541531000000     vandps   xmm2, xmm1, xmmword ptr [reloc @RWD00]
       C5FB101D39000000     vmovsd   xmm3, qword ptr [reloc @RWD16]
       C5F92EDA             vucomisd xmm3, xmm2
       7616                 jbe      SHORT G_M22062_IG04
						;; size=26 bbWeight=1 PerfScore 8.25
G_M22062_IG03:  ;; offset=0x001D
       C5FB590D33000000     vmulsd   xmm1, xmm0, qword ptr [reloc @RWD24]
       C4E3710BC90B         vroundsd xmm1, xmm1, xmm1, 11
       C5F35E0D25000000     vdivsd   xmm1, xmm1, qword ptr [reloc @RWD24]
						;; size=22 bbWeight=0.50 PerfScore 13.00
G_M22062_IG04:  ;; offset=0x0033
       C5F828C1             vmovaps  xmm0, xmm1
						;; size=4 bbWeight=1 PerfScore 0.25
G_M22062_IG05:  ;; offset=0x0037
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00
RWD00  	dq	7FFFFFFFFFFFFFFFh, 7FFFFFFFFFFFFFFFh
RWD16  	dq	4341C37937E08000h	;        1e+16
RWD24  	dq	40C3880000000000h	;        10000


; Total bytes of code 56, prolog size 3, PerfScore 23.50, instruction count 11, allocated bytes for code 56 (MethodHash=b456a9d1) for method RoundDisasm:Round4(double):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round4(float):float (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 3 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T01] (  4,  3.50)   float  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T00] (  4,  7   )   float  ->  mm1         "Inlining Arg"
;* V03 tmp2         [V03    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[float]>
;* V04 tmp3         [V04,T04] (  0,  0   )   float  ->  zero-ref    "Inline stloc first use temp"
;* V05 tmp4         [V05    ] (  0,  0   )  struct (16) zero-ref    "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[float]>
;  V06 tmp5         [V06,T03] (  2,  1   )   float  ->  mm1         "Inline return value spill temp"
;* V07 tmp6         [V07    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;  V08 tmp7         [V08,T02] (  2,  2   )   float  ->  mm1         "Inlining Arg"
;* V09 tmp8         [V09    ] (  0,  0   )   float  ->  zero-ref    "Inline return value spill temp"
;* V10 tmp9         [V10    ] (  0,  0   )   byref  ->  zero-ref    single-def "field V03._reference (fldOffset=0x0)" P-INDEP
;* V11 tmp10        [V11    ] (  0,  0   )     int  ->  zero-ref    "field V03._length (fldOffset=0x8)" P-INDEP
;* V12 tmp11        [V12    ] (  0,  0   )   byref  ->  zero-ref    single-def "field V05._reference (fldOffset=0x0)" P-INDEP
;* V13 tmp12        [V13    ] (  0,  0   )     int  ->  zero-ref    "field V05._length (fldOffset=0x8)" P-INDEP
;* V14 tmp13        [V14    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M65422_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M65422_IG02:  ;; offset=0x0003
       C5F828C8             vmovaps  xmm1, xmm0
       C5F0541531000000     vandps   xmm2, xmm1, xmmword ptr [reloc @RWD00]
       C5FA101D39000000     vmovss   xmm3, dword ptr [reloc @RWD16]
       C5F82EDA             vucomiss xmm3, xmm2
       7616                 jbe      SHORT G_M65422_IG04
						;; size=26 bbWeight=1 PerfScore 8.25
G_M65422_IG03:  ;; offset=0x001D
       C5FA590D2F000000     vmulss   xmm1, xmm0, dword ptr [reloc @RWD20]
       C4E3710AC90B         vroundss xmm1, xmm1, xmm1, 11
       C5F25E0D21000000     vdivss   xmm1, xmm1, dword ptr [reloc @RWD20]
						;; size=22 bbWeight=0.50 PerfScore 12.00
G_M65422_IG04:  ;; offset=0x0033
       C5F828C1             vmovaps  xmm0, xmm1
						;; size=4 bbWeight=1 PerfScore 0.25
G_M65422_IG05:  ;; offset=0x0037
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00
RWD00  	dq	7FFFFFFF7FFFFFFFh, 7FFFFFFF7FFFFFFFh
RWD16  	dd	4CBEBC20h		;     1e+08
RWD20  	dd	461C4000h		;     10000


; Total bytes of code 56, prolog size 3, PerfScore 22.50, instruction count 11, allocated bytes for code 56 (MethodHash=15960071) for method RoundDisasm:Round4(float):float (FullOpts)
; ============================================================

Fixes #98164.

@ghost ghost added area-System.Numerics community-contribution Indicates that the PR has been added by a community member labels Feb 8, 2024
@ghost
Copy link

ghost commented Feb 8, 2024

Tagging subscribers to this area: @dotnet/area-system-numerics
See info in area-owners.md if you want to be subscribed.

Issue Details

Simplifies and optimizes parameterized Round overloads.

Code:

internal static class RoundDisasm
{
	public static double RoundAway(double d) => Math.Round(d, MidpointRounding.AwayFromZero);
	public static double RoundAway(float f) => MathF.Round(f, MidpointRounding.AwayFromZero);
	public static double RoundTruncate(double d) => Math.Round(d, MidpointRounding.ToZero);
	public static double RoundTruncate(float f) => MathF.Round(f, MidpointRounding.ToZero);
	public static double Round(double d, MidpointRounding m) => Math.Round(d, m);
	public static double Round(float f, MidpointRounding m) => MathF.Round(f, m);
	public static double Round2(double d) => Math.Round(d, 2);
	public static double Round2(float f) => MathF.Round(f, 2);
	public static double Round4(double d) => Math.Round(d, 4, MidpointRounding.ToZero);
	public static double Round4(float f) => MathF.Round(f, 4, MidpointRounding.ToZero);
}

.NET 8

; Assembly listing for method RoundDisasm:RoundAway(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       C5F828C8             vmovaps  xmm1, xmm0
       C5F8101521000000     vmovups  xmm2, xmmword ptr [reloc @RWD00]
       C5F154CA             vandpd   xmm1, xmm1, xmm2
       62F1F518560D23000000 vorpd    xmm1, xmm1, qword ptr [reloc @RWD16] {1to2}
       C5F358C0             vaddsd   xmm0, xmm1, xmm0
       C4E3790BC00B         vroundsd xmm0, xmm0, xmm0, 11
 
G_M000_IG03:                ;; offset=0x0027
       C3                   ret      
 
RWD00  	dq	8000000000000000h, 8000000000000000h
RWD16  	dq	3FDFFFFFFFFFFFFFh	;          0.5

; Total bytes of code 40

; Assembly listing for method RoundDisasm:RoundAway(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       C5F828C8             vmovaps  xmm1, xmm0
       C5F8101521000000     vmovups  xmm2, xmmword ptr [reloc @RWD00]
       C5F054CA             vandps   xmm1, xmm1, xmm2
       62F17418560D23000000 vorps    xmm1, xmm1, dword ptr [reloc @RWD16] {1to4}
       C5F258C0             vaddss   xmm0, xmm1, xmm0
       C4E3790AC00B         vroundss xmm0, xmm0, xmm0, 11
       C5FA5AC0             vcvtss2sd xmm0, xmm0, xmm0
 
G_M000_IG03:                ;; offset=0x002B
       C3                   ret      
 
RWD00  	dq	8000000080000000h, 8000000080000000h
RWD16  	dd	3EFFFFFFh		;       0.5

; Total bytes of code 44

; Assembly listing for method RoundDisasm:RoundTruncate(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       33D2                 xor      edx, edx
       41B802000000         mov      r8d, 2
 
G_M000_IG03:                ;; offset=0x000B
       FF259F041E00         tail.jmp [System.Math:Round(double,int,int):double]
 
; Total bytes of code 17

; Assembly listing for method RoundDisasm:RoundTruncate(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       4883EC28             sub      rsp, 40
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0007
       33D2                 xor      edx, edx
       41B802000000         mov      r8d, 2
       FF15F38F1E00         call     [System.MathF:Round(float,int,int):float]
       C5FA5AC0             vcvtss2sd xmm0, xmm0, xmm0
 
G_M000_IG03:                ;; offset=0x0019
       4883C428             add      rsp, 40
       C3                   ret      
 
; Total bytes of code 30

; Assembly listing for method RoundDisasm:Round(double,int):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       448BC2               mov      r8d, edx
       33D2                 xor      edx, edx
 
G_M000_IG03:                ;; offset=0x0008
       FF2532041E00         tail.jmp [System.Math:Round(double,int,int):double]
 
; Total bytes of code 14

; Assembly listing for method RoundDisasm:Round(float,int):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       4883EC28             sub      rsp, 40
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0007
       448BC2               mov      r8d, edx
       33D2                 xor      edx, edx
       FF15968F1E00         call     [System.MathF:Round(float,int,int):float]
       C5FA5AC0             vcvtss2sd xmm0, xmm0, xmm0
 
G_M000_IG03:                ;; offset=0x0016
       4883C428             add      rsp, 40
       C3                   ret      
 
; Total bytes of code 27

; Assembly listing for method RoundDisasm:Round2(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       BA02000000           mov      edx, 2
       4533C0               xor      r8d, r8d
 
G_M000_IG03:                ;; offset=0x000B
       FF25DF031E00         tail.jmp [System.Math:Round(double,int,int):double]
 
; Total bytes of code 17

; Assembly listing for method RoundDisasm:Round2(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data

G_M000_IG01:                ;; offset=0x0000
       4883EC28             sub      rsp, 40
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0007
       BA02000000           mov      edx, 2
       4533C0               xor      r8d, r8d
       FF15338F1E00         call     [System.MathF:Round(float,int,int):float]
       C5FA5AC0             vcvtss2sd xmm0, xmm0, xmm0
 
G_M000_IG03:                ;; offset=0x0019
       4883C428             add      rsp, 40
       C3                   ret      
 
; Total bytes of code 30

; Assembly listing for method RoundDisasm:Round4(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; fully interruptible
; No PGO data

G_M000_IG01:                ;; offset=0x0000
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0003
       BA04000000           mov      edx, 4
       41B802000000         mov      r8d, 2
 
G_M000_IG03:                ;; offset=0x000E
       FF256C031E00         tail.jmp [System.Math:Round(double,int,int):double]
 
; Total bytes of code 20

; Assembly listing for method RoundDisasm:Round4(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data

G_M000_IG01:                ;; offset=0x0000
       4883EC28             sub      rsp, 40
       C5F877               vzeroupper 
 
G_M000_IG02:                ;; offset=0x0007
       BA04000000           mov      edx, 4
       41B802000000         mov      r8d, 2
       FF15C08E1E00         call     [System.MathF:Round(float,int,int):float]
       C5FA5AC0             vcvtss2sd xmm0, xmm0, xmm0
 
G_M000_IG03:                ;; offset=0x001C
       4883C428             add      rsp, 40
       C3                   ret      
 
; Total bytes of code 33

This branch

; Assembly listing for method RoundDisasm:RoundAway(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  4,  4   )  double  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  2,  2   )  double  ->  mm0         "Inline return value spill temp"
;* V03 tmp2         [V03    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V04 tmp3         [V04    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
;* V05 tmp4         [V05    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M25588_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M25588_IG02:  ;; offset=0x0003
       C5F8100D25000000     vmovups  xmm1, xmmword ptr [reloc @RWD00]
       C5F828D0             vmovaps  xmm2, xmm0
       62F3ED08250D26000000CA vpternlogq xmm1, xmm2, xmmword ptr [reloc @RWD16], -54
       C5F358C0             vaddsd   xmm0, xmm1, xmm0
       C4E3790BC00B         vroundsd xmm0, xmm0, xmm0, 11
						;; size=33 bbWeight=1 PerfScore 15.25
G_M25588_IG03:  ;; offset=0x0024
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00
RWD00  	dq	8000000000000000h, 8000000000000000h
RWD16  	dq	3FDFFFFFFFFFFFFFh, 3FDFFFFFFFFFFFFFh


; Total bytes of code 37, prolog size 3, PerfScore 17.25, instruction count 7, allocated bytes for code 37 (MethodHash=318b9c0b) for method RoundDisasm:RoundAway(double):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:RoundAway(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  4,  4   )   float  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  2,  2   )   float  ->  mm0         "Inline return value spill temp"
;* V03 tmp2         [V03    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V04 tmp3         [V04    ] (  0,  0   )   float  ->  zero-ref    "Inline return value spill temp"
;* V05 tmp4         [V05    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M34641_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M34641_IG02:  ;; offset=0x0003
       C5F8100D25000000     vmovups  xmm1, xmmword ptr [reloc @RWD00]
       C5F828D0             vmovaps  xmm2, xmm0
       62F36D08250D26000000CA vpternlogd xmm1, xmm2, xmmword ptr [reloc @RWD16], -54
       C5F258C0             vaddss   xmm0, xmm1, xmm0
       C4E3790AC00B         vroundss xmm0, xmm0, xmm0, 11
       C5FA5AC0             vcvtss2sd xmm0, xmm0, xmm0
						;; size=37 bbWeight=1 PerfScore 19.25
G_M34641_IG03:  ;; offset=0x0028
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00
RWD00  	dq	8000000080000000h, 8000000080000000h
RWD16  	dq	3EFFFFFF3EFFFFFFh, 3EFFFFFF3EFFFFFFh


; Total bytes of code 41, prolog size 3, PerfScore 21.25, instruction count 8, allocated bytes for code 41 (MethodHash=f9e078ae) for method RoundDisasm:RoundAway(float):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:RoundTruncate(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  3,  3   )  double  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  2,  2   )  double  ->  mm0         "Inline return value spill temp"
;* V03 tmp2         [V03    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V04 tmp3         [V04    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
;* V05 tmp4         [V05    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M39156_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M39156_IG02:  ;; offset=0x0003
       C4E3790BC00B         vroundsd xmm0, xmm0, xmm0, 11
						;; size=6 bbWeight=1 PerfScore 7.00
G_M39156_IG03:  ;; offset=0x0009
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 10, prolog size 3, PerfScore 9.00, instruction count 3, allocated bytes for code 10 (MethodHash=6fe1670b) for method RoundDisasm:RoundTruncate(double):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:RoundTruncate(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  3,  3   )   float  ->  mm0         single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  2,  2   )   float  ->  mm0         "Inline return value spill temp"
;* V03 tmp2         [V03    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V04 tmp3         [V04    ] (  0,  0   )   float  ->  zero-ref    "Inline return value spill temp"
;* V05 tmp4         [V05    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 0

G_M7249_IG01:  ;; offset=0x0000
       C5F877               vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M7249_IG02:  ;; offset=0x0003
       C4E3790AC00B         vroundss xmm0, xmm0, xmm0, 11
       C5FA5AC0             vcvtss2sd xmm0, xmm0, xmm0
						;; size=10 bbWeight=1 PerfScore 11.00
G_M7249_IG03:  ;; offset=0x000D
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 14, prolog size 3, PerfScore 13.00, instruction count 4, allocated bytes for code 14 (MethodHash=8f28e3ae) for method RoundDisasm:RoundTruncate(float):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round(double,int):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T01] (  8,  5   )  double  ->  mm0         single-def
;  V01 arg1         [V01,T00] (  5,  4   )     int  ->  rbx         single-def
;  V02 OutArgs      [V02    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V03 tmp1         [V03,T02] (  6,  3.50)  double  ->  mm0         "Inline return value spill temp"
;* V04 tmp2         [V04    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V05 tmp3         [V05    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
;  V06 tmp4         [V06,T03] (  2,  0   )     ref  ->  rdx         single-def "argument with side effect"
;
; Lcl frame size = 32

G_M6149_IG01:  ;; offset=0x0000
       53                   push     rbx
       4883EC20             sub      rsp, 32
       C5F877               vzeroupper 
       8BDA                 mov      ebx, edx
						;; size=10 bbWeight=1 PerfScore 2.50
G_M6149_IG02:  ;; offset=0x000A
       83FB04               cmp      ebx, 4
       775F                 ja       SHORT G_M6149_IG09
       8BCB                 mov      ecx, ebx
       488D1578000000       lea      rdx, [reloc @RWD00]
       8B148A               mov      edx, dword ptr [rdx+4*rcx]
       488D05E8FFFFFF       lea      rax, G_M6149_IG02
       4803D0               add      rdx, rax
       FFE2                 jmp      rdx
						;; size=29 bbWeight=1 PerfScore 7.25
G_M6149_IG03:  ;; offset=0x0027
       C5F8100D81000000     vmovups  xmm1, xmmword ptr [reloc @RWD32]
       C5F828D0             vmovaps  xmm2, xmm0
       62F3ED08250D82000000CA vpternlogq xmm1, xmm2, xmmword ptr [reloc @RWD48], -54
       C5F358C0             vaddsd   xmm0, xmm1, xmm0
       C4E3790BC00B         vroundsd xmm0, xmm0, xmm0, 11
       EB1E                 jmp      SHORT G_M6149_IG08
						;; size=35 bbWeight=0.50 PerfScore 8.62
G_M6149_IG04:  ;; offset=0x004A
       C4E3790BC004         vroundsd xmm0, xmm0, xmm0, 4
       EB16                 jmp      SHORT G_M6149_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M6149_IG05:  ;; offset=0x0052
       C4E3790BC00B         vroundsd xmm0, xmm0, xmm0, 11
       EB0E                 jmp      SHORT G_M6149_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M6149_IG06:  ;; offset=0x005A
       C4E3790BC009         vroundsd xmm0, xmm0, xmm0, 9
       EB06                 jmp      SHORT G_M6149_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M6149_IG07:  ;; offset=0x0062
       C4E3790BC00A         vroundsd xmm0, xmm0, xmm0, 10
						;; size=6 bbWeight=0.50 PerfScore 3.50
G_M6149_IG08:  ;; offset=0x0068
       4883C420             add      rsp, 32
       5B                   pop      rbx
       C3                   ret      
						;; size=6 bbWeight=1 PerfScore 1.75
G_M6149_IG09:  ;; offset=0x006E
       B9840B0000           mov      ecx, 0xB84
       48BA004022BEFA7F0000 mov      rdx, 0x7FFABE224000
       E88EE44E5F           call     CORINFO_HELP_STRCNS
       488BD0               mov      rdx, rax
       8BCB                 mov      ecx, ebx
       FF1533947C00         call     [System.ThrowHelper:ThrowArgumentException_InvalidEnumValue[int](int,System.String)]
       CC                   int3     
						;; size=32 bbWeight=0 PerfScore 0.00
RWD00  	dd	00000040h ; case G_M6149_IG04
       	dd	0000001Dh ; case G_M6149_IG03
       	dd	00000048h ; case G_M6149_IG05
       	dd	00000050h ; case G_M6149_IG06
       	dd	00000058h ; case G_M6149_IG07
RWD20  	dd	00000000h, 00000000h, 00000000h
RWD32  	dq	8000000000000000h, 8000000000000000h
RWD48  	dq	3FDFFFFFFFFFFFFFh, 3FDFFFFFFFFFFFFFh


; Total bytes of code 142, prolog size 8, PerfScore 37.12, instruction count 35, allocated bytes for code 142 (MethodHash=d24ae7fa) for method RoundDisasm:Round(double,int):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round(float,int):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T01] (  8,  5   )   float  ->  mm0         single-def
;  V01 arg1         [V01,T00] (  5,  4   )     int  ->  rbx         single-def
;  V02 OutArgs      [V02    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V03 tmp1         [V03,T02] (  6,  3.50)   float  ->  mm0         "Inline return value spill temp"
;* V04 tmp2         [V04    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;* V05 tmp3         [V05    ] (  0,  0   )   float  ->  zero-ref    "Inline return value spill temp"
;  V06 tmp4         [V06,T03] (  2,  0   )     ref  ->  rdx         single-def "argument with side effect"
;
; Lcl frame size = 32

G_M39904_IG01:  ;; offset=0x0000
       53                   push     rbx
       4883EC20             sub      rsp, 32
       C5F877               vzeroupper 
       8BDA                 mov      ebx, edx
						;; size=10 bbWeight=1 PerfScore 2.50
G_M39904_IG02:  ;; offset=0x000A
       83FB04               cmp      ebx, 4
       7763                 ja       SHORT G_M39904_IG10
       8BCB                 mov      ecx, ebx
       488D1588000000       lea      rdx, [reloc @RWD00]
       8B148A               mov      edx, dword ptr [rdx+4*rcx]
       488D05E8FFFFFF       lea      rax, G_M39904_IG02
       4803D0               add      rdx, rax
       FFE2                 jmp      rdx
						;; size=29 bbWeight=1 PerfScore 7.25
G_M39904_IG03:  ;; offset=0x0027
       C5F8100D91000000     vmovups  xmm1, xmmword ptr [reloc @RWD32]
       C5F828D0             vmovaps  xmm2, xmm0
       62F36D08250D92000000CA vpternlogd xmm1, xmm2, xmmword ptr [reloc @RWD48], -54
       C5F258C0             vaddss   xmm0, xmm1, xmm0
       C4E3790AC00B         vroundss xmm0, xmm0, xmm0, 11
       EB1E                 jmp      SHORT G_M39904_IG08
						;; size=35 bbWeight=0.50 PerfScore 8.62
G_M39904_IG04:  ;; offset=0x004A
       C4E3790AC004         vroundss xmm0, xmm0, xmm0, 4
       EB16                 jmp      SHORT G_M39904_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M39904_IG05:  ;; offset=0x0052
       C4E3790AC00B         vroundss xmm0, xmm0, xmm0, 11
       EB0E                 jmp      SHORT G_M39904_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M39904_IG06:  ;; offset=0x005A
       C4E3790AC009         vroundss xmm0, xmm0, xmm0, 9
       EB06                 jmp      SHORT G_M39904_IG08
						;; size=8 bbWeight=0.50 PerfScore 4.50
G_M39904_IG07:  ;; offset=0x0062
       C4E3790AC00A         vroundss xmm0, xmm0, xmm0, 10
						;; size=6 bbWeight=0.50 PerfScore 3.50
G_M39904_IG08:  ;; offset=0x0068
       C5FA5AC0             vcvtss2sd xmm0, xmm0, xmm0
						;; size=4 bbWeight=1 PerfScore 4.00
G_M39904_IG09:  ;; offset=0x006C
       4883C420             add      rsp, 32
       5B                   pop      rbx
       C3                   ret      
						;; size=6 bbWeight=1 PerfScore 1.75
G_M39904_IG10:  ;; offset=0x0072
       B9840B0000           mov      ecx, 0xB84
       48BA004022BEFA7F0000 mov      rdx, 0x7FFABE224000
       E89AE34E5F           call     CORINFO_HELP_STRCNS
       488BD0               mov      rdx, rax
       8BCB                 mov      ecx, ebx
       FF153F937C00         call     [System.ThrowHelper:ThrowArgumentException_InvalidEnumValue[int](int,System.String)]
       CC                   int3     
						;; size=32 bbWeight=0 PerfScore 0.00
RWD00  	dd	00000040h ; case G_M39904_IG04
       	dd	0000001Dh ; case G_M39904_IG03
       	dd	00000048h ; case G_M39904_IG05
       	dd	00000050h ; case G_M39904_IG06
       	dd	00000058h ; case G_M39904_IG07
RWD20  	dd	00000000h, 00000000h, 00000000h
RWD32  	dq	8000000080000000h, 8000000080000000h
RWD48  	dq	3EFFFFFF3EFFFFFFh, 3EFFFFFF3EFFFFFFh


; Total bytes of code 146, prolog size 8, PerfScore 41.12, instruction count 36, allocated bytes for code 146 (MethodHash=8938641f) for method RoundDisasm:Round(float,int):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round2(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 2 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T02] (  4,  3.50)  double  ->  mm0         single-def
;  V01 OutArgs      [V01    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  4,  7   )  double  ->  mm1         "Inlining Arg"
;* V03 tmp2         [V03    ] (  0,  0   )  struct (16) zero-ref    do-not-enreg[SF] ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[double]>
;  V04 tmp3         [V04,T04] (  3,  1.50)  double  ->  mm1         "Inline stloc first use temp"
;  V05 tmp4         [V05,T00] (  3,  3   )  struct (16) [rsp+0x28]  do-not-enreg[SF] must-init "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[double]>
;  V06 tmp5         [V06,T05] (  2,  1   )  double  ->  mm0         "Inline return value spill temp"
;* V07 tmp6         [V07    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;  V08 tmp7         [V08,T03] (  2,  2   )  double  ->  mm0         "Inlining Arg"
;* V09 tmp8         [V09    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
;* V10 tmp9         [V10    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 56

G_M8040_IG01:  ;; offset=0x0000
       4883EC38             sub      rsp, 56
       C5F877               vzeroupper 
       33C0                 xor      eax, eax
       4889442428           mov      qword ptr [rsp+0x28], rax
						;; size=14 bbWeight=1 PerfScore 2.50
G_M8040_IG02:  ;; offset=0x000E
       C5F828C8             vmovaps  xmm1, xmm0
       C5F0541546000000     vandps   xmm2, xmm1, xmmword ptr [reloc @RWD00]
       C5FB101D4E000000     vmovsd   xmm3, qword ptr [reloc @RWD16]
       C5F92EDA             vucomisd xmm3, xmm2
       762F                 jbe      SHORT G_M8040_IG04
						;; size=26 bbWeight=1 PerfScore 8.25
G_M8040_IG03:  ;; offset=0x0028
       C744243010000000     mov      dword ptr [rsp+0x30], 16
       48B8206AB71AFB7F0000 mov      rax, 0x7FFB1AB76A20      ; static handle
       4889442428           mov      bword ptr [rsp+0x28], rax
       488B442428           mov      rax, bword ptr [rsp+0x28]
       C5FB104810           vmovsd   xmm1, qword ptr [rax+0x10]
       C5FB59C1             vmulsd   xmm0, xmm0, xmm1
       C4E3790BC004         vroundsd xmm0, xmm0, xmm0, 4
       C5FB5EC9             vdivsd   xmm1, xmm0, xmm1
						;; size=47 bbWeight=0.50 PerfScore 14.62
G_M8040_IG04:  ;; offset=0x0057
       C5F828C1             vmovaps  xmm0, xmm1
						;; size=4 bbWeight=1 PerfScore 0.25
G_M8040_IG05:  ;; offset=0x005B
       4883C438             add      rsp, 56
       C3                   ret      
						;; size=5 bbWeight=1 PerfScore 1.25
RWD00  	dq	7FFFFFFFFFFFFFFFh, 7FFFFFFFFFFFFFFFh
RWD16  	dq	4341C37937E08000h	;        1e+16


; Total bytes of code 96, prolog size 14, PerfScore 26.88, instruction count 20, allocated bytes for code 96 (MethodHash=166ce097) for method RoundDisasm:Round2(double):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round2(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 2 single block inlinees; 3 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T02] (  4,  3.50)   float  ->  mm0         single-def
;  V01 OutArgs      [V01    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  4,  7   )   float  ->  mm1         "Inlining Arg"
;* V03 tmp2         [V03    ] (  0,  0   )  struct (16) zero-ref    do-not-enreg[SF] ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[float]>
;  V04 tmp3         [V04,T04] (  3,  1.50)   float  ->  mm1         "Inline stloc first use temp"
;  V05 tmp4         [V05,T00] (  3,  3   )  struct (16) [rsp+0x28]  do-not-enreg[SF] must-init "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[float]>
;  V06 tmp5         [V06,T05] (  2,  1   )   float  ->  mm0         "Inline return value spill temp"
;* V07 tmp6         [V07    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;  V08 tmp7         [V08,T03] (  2,  2   )   float  ->  mm0         "Inlining Arg"
;* V09 tmp8         [V09    ] (  0,  0   )   float  ->  zero-ref    "Inline return value spill temp"
;* V10 tmp9         [V10    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 56

G_M52813_IG01:  ;; offset=0x0000
       4883EC38             sub      rsp, 56
       C5F877               vzeroupper 
       33C0                 xor      eax, eax
       4889442428           mov      qword ptr [rsp+0x28], rax
						;; size=14 bbWeight=1 PerfScore 2.50
G_M52813_IG02:  ;; offset=0x000E
       C5F828C8             vmovaps  xmm1, xmm0
       C5F0541546000000     vandps   xmm2, xmm1, xmmword ptr [reloc @RWD00]
       C5FA101D4E000000     vmovss   xmm3, dword ptr [reloc @RWD16]
       C5F82EDA             vucomiss xmm3, xmm2
       762F                 jbe      SHORT G_M52813_IG04
						;; size=26 bbWeight=1 PerfScore 8.25
G_M52813_IG03:  ;; offset=0x0028
       C744243007000000     mov      dword ptr [rsp+0x30], 7
       48B868DCB71AFB7F0000 mov      rax, 0x7FFB1AB7DC68      ; static handle
       4889442428           mov      bword ptr [rsp+0x28], rax
       488B442428           mov      rax, bword ptr [rsp+0x28]
       C5FA104808           vmovss   xmm1, dword ptr [rax+0x08]
       C5FA59C1             vmulss   xmm0, xmm0, xmm1
       C4E3790AC004         vroundss xmm0, xmm0, xmm0, 4
       C5FA5EC9             vdivss   xmm1, xmm0, xmm1
						;; size=47 bbWeight=0.50 PerfScore 13.62
G_M52813_IG04:  ;; offset=0x0057
       C5FA5AC1             vcvtss2sd xmm0, xmm0, xmm1
						;; size=4 bbWeight=1 PerfScore 4.00
G_M52813_IG05:  ;; offset=0x005B
       4883C438             add      rsp, 56
       C3                   ret      
						;; size=5 bbWeight=1 PerfScore 1.25
RWD00  	dq	7FFFFFFF7FFFFFFFh, 7FFFFFFF7FFFFFFFh
RWD16  	dd	4CBEBC20h		;     1e+08


; Total bytes of code 96, prolog size 14, PerfScore 29.62, instruction count 20, allocated bytes for code 96 (MethodHash=81b131b2) for method RoundDisasm:Round2(float):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round4(double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 1 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T02] (  4,  3.50)  double  ->  mm0         single-def
;  V01 OutArgs      [V01    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  4,  7   )  double  ->  mm1         "Inlining Arg"
;* V03 tmp2         [V03    ] (  0,  0   )  struct (16) zero-ref    do-not-enreg[SF] ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[double]>
;  V04 tmp3         [V04,T04] (  3,  1.50)  double  ->  mm1         "Inline stloc first use temp"
;  V05 tmp4         [V05,T00] (  3,  3   )  struct (16) [rsp+0x28]  do-not-enreg[SF] must-init "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[double]>
;  V06 tmp5         [V06,T05] (  2,  1   )  double  ->  mm0         "Inline return value spill temp"
;* V07 tmp6         [V07    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;  V08 tmp7         [V08,T03] (  2,  2   )  double  ->  mm0         "Inlining Arg"
;* V09 tmp8         [V09    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
;* V10 tmp9         [V10    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 56

G_M22062_IG01:  ;; offset=0x0000
       4883EC38             sub      rsp, 56
       C5F877               vzeroupper 
       33C0                 xor      eax, eax
       4889442428           mov      qword ptr [rsp+0x28], rax
						;; size=14 bbWeight=1 PerfScore 2.50
G_M22062_IG02:  ;; offset=0x000E
       C5F828C8             vmovaps  xmm1, xmm0
       C5F0541546000000     vandps   xmm2, xmm1, xmmword ptr [reloc @RWD00]
       C5FB101D4E000000     vmovsd   xmm3, qword ptr [reloc @RWD16]
       C5F92EDA             vucomisd xmm3, xmm2
       762F                 jbe      SHORT G_M22062_IG04
						;; size=26 bbWeight=1 PerfScore 8.25
G_M22062_IG03:  ;; offset=0x0028
       C744243010000000     mov      dword ptr [rsp+0x30], 16
       48B8206AB71AFB7F0000 mov      rax, 0x7FFB1AB76A20      ; static handle
       4889442428           mov      bword ptr [rsp+0x28], rax
       488B442428           mov      rax, bword ptr [rsp+0x28]
       C5FB104820           vmovsd   xmm1, qword ptr [rax+0x20]
       C5FB59C1             vmulsd   xmm0, xmm0, xmm1
       C4E3790BC00B         vroundsd xmm0, xmm0, xmm0, 11
       C5FB5EC9             vdivsd   xmm1, xmm0, xmm1
						;; size=47 bbWeight=0.50 PerfScore 14.62
G_M22062_IG04:  ;; offset=0x0057
       C5F828C1             vmovaps  xmm0, xmm1
						;; size=4 bbWeight=1 PerfScore 0.25
G_M22062_IG05:  ;; offset=0x005B
       4883C438             add      rsp, 56
       C3                   ret      
						;; size=5 bbWeight=1 PerfScore 1.25
RWD00  	dq	7FFFFFFFFFFFFFFFh, 7FFFFFFFFFFFFFFFh
RWD16  	dq	4341C37937E08000h	;        1e+16


; Total bytes of code 96, prolog size 14, PerfScore 26.88, instruction count 20, allocated bytes for code 96 (MethodHash=b456a9d1) for method RoundDisasm:Round4(double):double (FullOpts)
; ============================================================

; Assembly listing for method RoundDisasm:Round4(float):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 3 inlinees without PGO data
; Final local variable assignments
;
;  V00 arg0         [V00,T02] (  4,  3.50)   float  ->  mm0         single-def
;  V01 OutArgs      [V01    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  4,  7   )   float  ->  mm1         "Inlining Arg"
;* V03 tmp2         [V03    ] (  0,  0   )  struct (16) zero-ref    do-not-enreg[SF] ld-addr-op "Inline stloc first use temp" <System.ReadOnlySpan`1[float]>
;  V04 tmp3         [V04,T04] (  3,  1.50)   float  ->  mm1         "Inline stloc first use temp"
;  V05 tmp4         [V05,T00] (  3,  3   )  struct (16) [rsp+0x28]  do-not-enreg[SF] must-init "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[float]>
;  V06 tmp5         [V06,T05] (  2,  1   )   float  ->  mm0         "Inline return value spill temp"
;* V07 tmp6         [V07    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
;  V08 tmp7         [V08,T03] (  2,  2   )   float  ->  mm0         "Inlining Arg"
;* V09 tmp8         [V09    ] (  0,  0   )   float  ->  zero-ref    "Inline return value spill temp"
;* V10 tmp9         [V10    ] (  0,  0   )     ref  ->  zero-ref    "argument with side effect"
;
; Lcl frame size = 56

G_M34635_IG01:  ;; offset=0x0000
       4883EC38             sub      rsp, 56
       C5F877               vzeroupper 
       33C0                 xor      eax, eax
       4889442428           mov      qword ptr [rsp+0x28], rax
						;; size=14 bbWeight=1 PerfScore 2.50
G_M34635_IG02:  ;; offset=0x000E
       C5F828C8             vmovaps  xmm1, xmm0
       C5F0541546000000     vandps   xmm2, xmm1, xmmword ptr [reloc @RWD00]
       C5FA101D4E000000     vmovss   xmm3, dword ptr [reloc @RWD16]
       C5F82EDA             vucomiss xmm3, xmm2
       762F                 jbe      SHORT G_M34635_IG04
						;; size=26 bbWeight=1 PerfScore 8.25
G_M34635_IG03:  ;; offset=0x0028
       C744243007000000     mov      dword ptr [rsp+0x30], 7
       48B868DCB71AFB7F0000 mov      rax, 0x7FFB1AB7DC68      ; static handle
       4889442428           mov      bword ptr [rsp+0x28], rax
       488B442428           mov      rax, bword ptr [rsp+0x28]
       C5FA104810           vmovss   xmm1, dword ptr [rax+0x10]
       C5FA59C1             vmulss   xmm0, xmm0, xmm1
       C4E3790AC00B         vroundss xmm0, xmm0, xmm0, 11
       C5FA5EC9             vdivss   xmm1, xmm0, xmm1
						;; size=47 bbWeight=0.50 PerfScore 13.62
G_M34635_IG04:  ;; offset=0x0057
       C5FA5AC1             vcvtss2sd xmm0, xmm0, xmm1
						;; size=4 bbWeight=1 PerfScore 4.00
G_M34635_IG05:  ;; offset=0x005B
       4883C438             add      rsp, 56
       C3                   ret      
						;; size=5 bbWeight=1 PerfScore 1.25
RWD00  	dq	7FFFFFFF7FFFFFFFh, 7FFFFFFF7FFFFFFFh
RWD16  	dd	4CBEBC20h		;     1e+08


; Total bytes of code 96, prolog size 14, PerfScore 29.62, instruction count 20, allocated bytes for code 96 (MethodHash=cd4078b4) for method RoundDisasm:Round4(float):double (FullOpts)
; ============================================================

Fixes #98164.

Author: MichalPetryka
Assignees: -
Labels:

area-System.Numerics

Milestone: -

@MichalPetryka
Copy link
Contributor Author

@MihuBot

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.

Labels

area-System.Numerics community-contribution Indicates that the PR has been added by a community member

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Math.Round[F](x, MidpointRounding) doesn't use VROUNDS[SD] outside of "to even"

2 participants