KEMBAR78
Investigate adjusting herustics for unrolled block copies/initialization · Issue #82529 · dotnet/runtime · GitHub
Skip to content

Investigate adjusting herustics for unrolled block copies/initialization #82529

@am11

Description

@am11

Zero initialization becomes expensive with two bytes struct fields are involved:

Given:

unsafe struct S1
{
    fixed byte a[10];
    int b;
    fixed byte c[23];
    fixed byte d[24];
    fixed byte e[25];
}

unsafe struct S2
{
    fixed short a[10];
    int b;
    fixed short c[23];
    fixed short d[24];
    fixed short e[25];
}

Unlike S1, initializing S2 does not inline the CORINFO_HELP_MEMSET call and fail to make use of vectors:

;   S1 X1() { S1 s = default; return s; }

C:X1():S1:this:
       sub      rsp, 24
       vzeroupper 
       mov      rax, qword ptr [(reloc)]
       mov      qword ptr [rsp+10H], rax
       xor      eax, eax
       vxorps   ymm0, ymm0
       vmovdqu  ymmword ptr[rsi], ymm0
       vmovdqu  ymmword ptr[rsi+20H], ymm0
       vmovdqu  xmmword ptr [rsi+40H], xmm0
       mov      qword ptr [rsi+50H], rax
       mov      rax, rsi
       lea      rdi, [(reloc)]
       mov      rdi, qword ptr [rdi]
       cmp      qword ptr [rsp+10H], rdi
       je       SHORT G_M61359_IG03
       call     [CORINFO_HELP_FAIL_FAST]
G_M61359_IG03:
       nop      
       add      rsp, 24
       ret      

;   S2 X2() { S2 s = default; return s; }

C:X2():S2:this:
       push     rbx
       sub      rsp, 16
       mov      rax, qword ptr [(reloc)]
       mov      qword ptr [rsp+08H], rax
       mov      rbx, rsi
       xor      esi, esi
       mov      rdi, rbx
       mov      edx, 168
       call     [CORINFO_HELP_MEMSET]
       mov      rax, rbx
       lea      rdi, [(reloc)]
       mov      rdi, qword ptr [rdi]
       cmp      qword ptr [rsp+08H], rdi
       je       SHORT G_M46095_IG03
       call     [CORINFO_HELP_FAIL_FAST]
G_M46095_IG03:
       nop      
       add      rsp, 16
       pop      rbx
       ret      

The codegen can try to match that of C++ compiler:

X2():                                  # @X2()
        mov     rax, rdi
        vxorps  xmm0, xmm0, xmm0
        vmovups ymmword ptr [rdi + 128], ymm0
        vmovups ymmword ptr [rdi + 96], ymm0
        vmovups ymmword ptr [rdi + 64], ymm0
        vmovups ymmword ptr [rdi + 32], ymm0
        vmovups ymmword ptr [rdi], ymm0
        mov     qword ptr [rdi + 160], 0
        vzeroupper
        ret

https://godbolt.org/z/a3WsPdd4M

Metadata

Metadata

Assignees

Labels

area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI

Type

No type

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions