KEMBAR78
Function Call Optimization | DOC
Function Call Optimization


This note describes the observations on the following Function Calls:

     1.     Constructor call
     2.     Constructor call of Base Class
     3.     Get / Set Methods.

To Test for the above a sample Base Class has been developed.
class Base
{
      public:
            size_t            nameLen;
            char              *name;

          protected:
                Base(char *nameStr):
                      nameLen((nameStr)? strlen(nameStr): 0),
                      name(strcpy(new char[nameLen+1], nameStr))
                {}
          …
};

This generates the following assembly in debug build using g++. The function calls
have been marked in red.

          _ZN4BaseC2EPc:                            => Base::Base()
          .LFB1443:
                .loc 3 17 0
                pushq %rbp #
          .LCFI22:
                movq %rsp, %rbp #,
          .LCFI23:
                pushq %rbx #
          .LCFI24:
                subq $40, %rsp   #,
          .LCFI25:
                movq %rdi, -16(%rbp)         # this, this
                movq %rsi, -24(%rbp)         # nameStr, nameStr
          .LBB13:
                .loc 3 20 0
                movq -16(%rbp), %rax         # this,
                movq %rax, -32(%rbp)         #,
                cmpq $0, -24(%rbp)           #, nameStr
                je    .L39 #,
                movq -24(%rbp), %rdi         # nameStr, nameStr
                call strlen      #                 => strlen()
                movq %rax, -40(%rbp)         # tmp61,
                jmp   .L40 #
          .L39:
                movq $0, -40(%rbp)           #,
.L40:
             movq      -40(%rbp), %rax  #,
             movq      -32(%rbp), %rdx  #,
             movq      %rax, (%rdx)     #, <variable>.nameLen
             movq      -16(%rbp), %rbx  # this, this
             movq      -16(%rbp), %rax  # this, this
             movq      (%rax), %rdi     # <variable>.nameLen, tmp63
             incq      %rdi # tmp63
             call      _Znam #                => operator new()
             movq      %rax, %rdi #, tmp65
             movq      -24(%rbp), %rsi  # nameStr, nameStr
             call      strcpy       #         => strcpy()
             movq      %rax, 8(%rbx)    #, <variable>.name
       .LBE13:
             addq      $40, %rsp       #,
             popq      %rbx #
             leave
             ret

The Derived Class,

       class Derived: public Base
       {
                   Base *myBase;

               public:
                     Derived(char *name):
                           Base(name),
                           myBase((Base*)this)
                     {}
               …
       };

generates the following assembly:

       _ZN7DerivedC1EPc:                              => Derived::Derived()
       .LFB1452:
             .loc 3 53 0
             pushq %rbp #
       .LCFI19:
             movq %rsp, %rbp #,
       .LCFI20:
             subq $16, %rsp   #,
       .LCFI21:
             movq %rdi, -8(%rbp)              # this, this
             movq %rsi, -16(%rbp)             # name, name
       .LBB12:
             .loc 3 56 0
             movq -16(%rbp), %rsi             #   name, name
             movq -8(%rbp), %rdi              #   this, this
             call _ZN4BaseC2EPc               #       => Base::Base()
             movq -8(%rbp), %rdx              #   this, this
             movq -8(%rbp), %rax              #   this, this
             movq %rax, 16(%rdx)              #   this, <variable>.myBase
       .LBE12:
             leave
             ret

Finally, the instantiation of a derived Class Object as in

       char *s; Derived d(s);
generates:

             .loc 2 8 0
             movq -24(%rbp), %rsi            # s, s
             leaq -64(%rbp), %rdi            #, tmp59
       .LEHB0:
             call _ZN7DerivedC1EPc           #      => Derived::Derived()

This means that the functions are called respectively as we had expected.

Building the instantiation in release mode, we see the following:

       .LCFI2:
             testq %rdi, %rdi # s
             movq %rsp, %rbp #, tmp114
             je    .L3   #,
             call strlen       #          => strlen()
       .L3:
       .L5:
             leaq 1(%rax), %rdi     #, tmp67
             movq %rax, (%rbp)      # tmp63, <variable>.nameLen
       .LEHB0:
             call _Znam #                 => operator new()
       .LEHE0:
             movq %rbx, %rsi # s, nameStr
             movq %rax, %rdi #, <anonymous>
             call strcpy       #          => strcpy()
             movq %rax, 8(%rbp)     # tmp70, <variable>.name
             movq 8(%rsp), %rdi     # <variable>.name,
       <variable>.name
             movq %rbp, 16(%rbp)    # tmp114, <variable>.myBase
             testq %rdi, %rdi # <variable>.name
             jne   .L44 #,

Thus we see that the Derived (and Base) Constructor calls have been totally
optimized.

The above is illustrated in terms of the constructors. Similar optimizations do
(mostly) take place for (non-virtual) destructors, copy constructor, copy assignment
operator, all non-virtual inline member functions and all inline global functions.

Only virtual member functions are not inlined as their call sequence is runtime
dependent.

The same behavior is observed in case of Get / Set Methods.
class Base
{
      …
      char *GetName() const { return name; }
      void SetName(char *nameStr)
      {
            if (nameStr)
            {
                  if (name)
                  {
                         delete [] name;
                  }

                     name =
                     strcpy(new char[strlen(nameStr)+1], nameStr);
              }
       }
       …
};

The assemblies for the Get / Set Methods are shown below:
_ZNK4Base7GetNameEv:               => Base::GetName()
.LFB1448:
      .loc 3 29 0
      pushq %rbp #
.LCFI17:
      movq %rsp, %rbp #,
.LCFI18:
      movq %rdi, -8(%rbp)    # this, this
.LBB11:
      .loc 3 29 0
      movq -8(%rbp), %rax    # this, this
      movq 8(%rax), %rax     # <variable>.name,
<variable>.name
.LBE11:
      leave
      ret

_ZN4Base7SetNameEPc:               => Base::SetName()
.LFB1449:
      .loc 3 31 0
      pushq %rbp #
.LCFI13:
      movq %rsp, %rbp #,
.LCFI14:
      pushq %rbx #
.LCFI15:
      subq $24, %rsp   #,
.LCFI16:
      movq %rdi, -16(%rbp)   # this, this
      movq %rsi, -24(%rbp)   # nameStr, nameStr
.LBB10:
      .loc 3 32 0
      cmpq $0, -24(%rbp)     #, nameStr
      je    .L29 #,
      .loc 3 34 0
      movq -16(%rbp), %rax   # this, this
      cmpq $0, 8(%rax) #, <variable>.name
      je    .L31 #,
      .loc 3 36 0
      movq -16(%rbp), %rax   # this, this
      cmpq $0, 8(%rax) #, <variable>.name
      je    .L31 #,
      movq -16(%rbp), %rax   # this, this
      movq 8(%rax), %rdi     # <variable>.name,
<variable>.name
      call _ZdaPv      #           => operator delete()
.L31:
      .loc 3 39 0
      movq -16(%rbp), %rbx   # this, this
      movq -24(%rbp), %rdi   # nameStr, nameStr
      call strlen      #           => strlen()
      movq %rax, %rdi # tmp65, tmp63
      incq %rdi # tmp63
      call _Znam #                 => operatot new()
      movq %rax, %rdi #, tmp66
      movq -24(%rbp), %rsi   # nameStr, nameStr
      call strcpy      #           => strcpy()
      movq %rax, 8(%rbx)     #, <variable>.name
.L29:
.LBE10:
      .loc 3 41 0
      addq $24, %rsp   #,
      popq %rbx #
      leave
      ret
The calls,

               char *oldName = d.GetName();
               char *newName = "My Gang";
               d.SetName(newName);

generate the following assembly in debug build:

       .LEHE0:
             .loc    2 11 0
             leaq     -64(%rbp), %rdi   #, tmp60
             call     _ZNK4Base7GetNameEv     #     => Base::GetName()
             movq     %rax, -72(%rbp)   # tmp61, oldName
             .loc    2 12 0
             movq     $.LC0, -80(%rbp) #, newName
             .loc    2 13 0
             movq     -80(%rbp), %rsi   # newName, newName
             leaq     -64(%rbp), %rdi   #, tmp63
       .LEHB1:
             call     _ZN4Base7SetNameEPc    #      => Base::SetName()
             .loc    2 16 0
             movq     $.LC1, -80(%rbp) #, newName


The same in release build is:

       .L10:
             movl     $.LC0, %edi #, nameStr
             call     strlen      #                 => strlen()
             leaq     1(%rax), %rdi     #, tmp80
       .LEHB1:
             call     _Znam #                       => operator new()
             movq     %rax, %rdi #, tmp85
             movl     $.LC0, %esi #, nameStr
             call     strcpy      #                 => strcpy()
             testq    %rax, %rax # tmp86
             movq     %rax, 8(%rsp)     # tmp86, <variable>.name
             je       .L14 #,
             movq     %rax, %rdi # tmp86, <variable>.name
             call     _ZdaPv      #                 => operator delete()
             movl     $.LC1, %edi #, newName
             call     strlen      #
             leaq     1(%rax), %rdi     #, tmp90
             call     _Znam #                       => operator new()
             movq     %rax, %rdi #, tmp95
             movl     $.LC1, %esi #, newName
             call     strcpy      #                 => strcpy()
             movq     %rax, 8(%rsp)     # tmp96, <variable>.name

Function Call Optimization

  • 1.
    Function Call Optimization Thisnote describes the observations on the following Function Calls: 1. Constructor call 2. Constructor call of Base Class 3. Get / Set Methods. To Test for the above a sample Base Class has been developed. class Base { public: size_t nameLen; char *name; protected: Base(char *nameStr): nameLen((nameStr)? strlen(nameStr): 0), name(strcpy(new char[nameLen+1], nameStr)) {} … }; This generates the following assembly in debug build using g++. The function calls have been marked in red. _ZN4BaseC2EPc: => Base::Base() .LFB1443: .loc 3 17 0 pushq %rbp # .LCFI22: movq %rsp, %rbp #, .LCFI23: pushq %rbx # .LCFI24: subq $40, %rsp #, .LCFI25: movq %rdi, -16(%rbp) # this, this movq %rsi, -24(%rbp) # nameStr, nameStr .LBB13: .loc 3 20 0 movq -16(%rbp), %rax # this, movq %rax, -32(%rbp) #, cmpq $0, -24(%rbp) #, nameStr je .L39 #, movq -24(%rbp), %rdi # nameStr, nameStr call strlen # => strlen() movq %rax, -40(%rbp) # tmp61, jmp .L40 # .L39: movq $0, -40(%rbp) #,
  • 2.
    .L40: movq -40(%rbp), %rax #, movq -32(%rbp), %rdx #, movq %rax, (%rdx) #, <variable>.nameLen movq -16(%rbp), %rbx # this, this movq -16(%rbp), %rax # this, this movq (%rax), %rdi # <variable>.nameLen, tmp63 incq %rdi # tmp63 call _Znam # => operator new() movq %rax, %rdi #, tmp65 movq -24(%rbp), %rsi # nameStr, nameStr call strcpy # => strcpy() movq %rax, 8(%rbx) #, <variable>.name .LBE13: addq $40, %rsp #, popq %rbx # leave ret The Derived Class, class Derived: public Base { Base *myBase; public: Derived(char *name): Base(name), myBase((Base*)this) {} … }; generates the following assembly: _ZN7DerivedC1EPc: => Derived::Derived() .LFB1452: .loc 3 53 0 pushq %rbp # .LCFI19: movq %rsp, %rbp #, .LCFI20: subq $16, %rsp #, .LCFI21: movq %rdi, -8(%rbp) # this, this movq %rsi, -16(%rbp) # name, name .LBB12: .loc 3 56 0 movq -16(%rbp), %rsi # name, name movq -8(%rbp), %rdi # this, this call _ZN4BaseC2EPc # => Base::Base() movq -8(%rbp), %rdx # this, this movq -8(%rbp), %rax # this, this movq %rax, 16(%rdx) # this, <variable>.myBase .LBE12: leave ret Finally, the instantiation of a derived Class Object as in char *s; Derived d(s);
  • 3.
    generates: .loc 2 8 0 movq -24(%rbp), %rsi # s, s leaq -64(%rbp), %rdi #, tmp59 .LEHB0: call _ZN7DerivedC1EPc # => Derived::Derived() This means that the functions are called respectively as we had expected. Building the instantiation in release mode, we see the following: .LCFI2: testq %rdi, %rdi # s movq %rsp, %rbp #, tmp114 je .L3 #, call strlen # => strlen() .L3: .L5: leaq 1(%rax), %rdi #, tmp67 movq %rax, (%rbp) # tmp63, <variable>.nameLen .LEHB0: call _Znam # => operator new() .LEHE0: movq %rbx, %rsi # s, nameStr movq %rax, %rdi #, <anonymous> call strcpy # => strcpy() movq %rax, 8(%rbp) # tmp70, <variable>.name movq 8(%rsp), %rdi # <variable>.name, <variable>.name movq %rbp, 16(%rbp) # tmp114, <variable>.myBase testq %rdi, %rdi # <variable>.name jne .L44 #, Thus we see that the Derived (and Base) Constructor calls have been totally optimized. The above is illustrated in terms of the constructors. Similar optimizations do (mostly) take place for (non-virtual) destructors, copy constructor, copy assignment operator, all non-virtual inline member functions and all inline global functions. Only virtual member functions are not inlined as their call sequence is runtime dependent. The same behavior is observed in case of Get / Set Methods.
  • 4.
    class Base { … char *GetName() const { return name; } void SetName(char *nameStr) { if (nameStr) { if (name) { delete [] name; } name = strcpy(new char[strlen(nameStr)+1], nameStr); } } … }; The assemblies for the Get / Set Methods are shown below:
  • 5.
    _ZNK4Base7GetNameEv: => Base::GetName() .LFB1448: .loc 3 29 0 pushq %rbp # .LCFI17: movq %rsp, %rbp #, .LCFI18: movq %rdi, -8(%rbp) # this, this .LBB11: .loc 3 29 0 movq -8(%rbp), %rax # this, this movq 8(%rax), %rax # <variable>.name, <variable>.name .LBE11: leave ret _ZN4Base7SetNameEPc: => Base::SetName() .LFB1449: .loc 3 31 0 pushq %rbp # .LCFI13: movq %rsp, %rbp #, .LCFI14: pushq %rbx # .LCFI15: subq $24, %rsp #, .LCFI16: movq %rdi, -16(%rbp) # this, this movq %rsi, -24(%rbp) # nameStr, nameStr .LBB10: .loc 3 32 0 cmpq $0, -24(%rbp) #, nameStr je .L29 #, .loc 3 34 0 movq -16(%rbp), %rax # this, this cmpq $0, 8(%rax) #, <variable>.name je .L31 #, .loc 3 36 0 movq -16(%rbp), %rax # this, this cmpq $0, 8(%rax) #, <variable>.name je .L31 #, movq -16(%rbp), %rax # this, this movq 8(%rax), %rdi # <variable>.name, <variable>.name call _ZdaPv # => operator delete() .L31: .loc 3 39 0 movq -16(%rbp), %rbx # this, this movq -24(%rbp), %rdi # nameStr, nameStr call strlen # => strlen() movq %rax, %rdi # tmp65, tmp63 incq %rdi # tmp63 call _Znam # => operatot new() movq %rax, %rdi #, tmp66 movq -24(%rbp), %rsi # nameStr, nameStr call strcpy # => strcpy() movq %rax, 8(%rbx) #, <variable>.name .L29: .LBE10: .loc 3 41 0 addq $24, %rsp #, popq %rbx # leave ret
  • 6.
    The calls, char *oldName = d.GetName(); char *newName = "My Gang"; d.SetName(newName); generate the following assembly in debug build: .LEHE0: .loc 2 11 0 leaq -64(%rbp), %rdi #, tmp60 call _ZNK4Base7GetNameEv # => Base::GetName() movq %rax, -72(%rbp) # tmp61, oldName .loc 2 12 0 movq $.LC0, -80(%rbp) #, newName .loc 2 13 0 movq -80(%rbp), %rsi # newName, newName leaq -64(%rbp), %rdi #, tmp63 .LEHB1: call _ZN4Base7SetNameEPc # => Base::SetName() .loc 2 16 0 movq $.LC1, -80(%rbp) #, newName The same in release build is: .L10: movl $.LC0, %edi #, nameStr call strlen # => strlen() leaq 1(%rax), %rdi #, tmp80 .LEHB1: call _Znam # => operator new() movq %rax, %rdi #, tmp85 movl $.LC0, %esi #, nameStr call strcpy # => strcpy() testq %rax, %rax # tmp86 movq %rax, 8(%rsp) # tmp86, <variable>.name je .L14 #, movq %rax, %rdi # tmp86, <variable>.name call _ZdaPv # => operator delete() movl $.LC1, %edi #, newName call strlen # leaq 1(%rax), %rdi #, tmp90 call _Znam # => operator new() movq %rax, %rdi #, tmp95 movl $.LC1, %esi #, newName call strcpy # => strcpy() movq %rax, 8(%rsp) # tmp96, <variable>.name