Posted to tcl by aspect at Wed Sep 30 00:10:41 GMT 2015view raw
- Examining the assembly from applying drh's example optimisation to CompareVarKeys.
- Assembly obtained with:
- configure --enable-symbols=all
- make
- rm tclVar.o
- make -n tclVar.o | sed -e 's/^gcc/gcc -S -fverbose-asm/' | sh
- Original 7b3b0ca73eb7ba1cbce9 (without optimisation):
- .type CompareVarKeys, @function
- CompareVarKeys:
- .LFB82:
- .loc 1 6359 0
- .cfi_startproc
- pushq %rbp #
- .cfi_def_cfa_offset 16
- .cfi_offset 6, -16
- movq %rsp, %rbp #,
- .cfi_def_cfa_register 6
- pushq %r14 #
- pushq %r13 #
- pushq %r12 #
- pushq %rbx #
- subq $32, %rsp #,
- .cfi_offset 14, -24
- .cfi_offset 13, -32
- .cfi_offset 12, -40
- .cfi_offset 3, -48
- movq %rdi, -56(%rbp) # keyPtr, keyPtr
- movq %rsi, -64(%rbp) # hPtr, hPtr
- .loc 1 6360 0
- movq -56(%rbp), %rax # keyPtr, tmp95
- movq %rax, -40(%rbp) # tmp95, objPtr1
- .loc 1 6361 0
- movq -64(%rbp), %rax # hPtr, tmp96
- movq 32(%rax), %rax # hPtr_7(D)->key.objPtr, tmp97
- movq %rax, -48(%rbp) # tmp97, objPtr2
- .loc 1 6369 0
- movq -40(%rbp), %rax # objPtr1, tmp98
- cmpq -48(%rbp), %rax # objPtr2, tmp98
- jne .L910 #,
- .loc 1 6370 0
- movl $1, %eax #, D.20671
- jmp .L911 #
- .L910:
- .loc 1 6378 0
- movq -40(%rbp), %rax # objPtr1, tmp99
- movq 8(%rax), %rax # objPtr1_6->bytes, D.20670
- testq %rax, %rax # D.20670
- je .L912 #,
- .loc 1 6378 0 is_stmt 0 discriminator 1
- movq -40(%rbp), %rax # objPtr1, tmp100
- movq 8(%rax), %rax # objPtr1_6->bytes, D.20670
- jmp .L913 #
- .L912:
- .loc 1 6378 0 discriminator 2
- movq -40(%rbp), %rax # objPtr1, tmp101
- movq %rax, %rdi # tmp101,
- call Tcl_GetString@PLT #
- .L913:
- .loc 1 6378 0 discriminator 4
- movq %rax, %r13 # D.20670, p1
- .loc 1 6379 0 is_stmt 1 discriminator 4
- movq -40(%rbp), %rax # objPtr1, tmp102
- movl 16(%rax), %ebx # objPtr1_6->length, l1
- .loc 1 6380 0 discriminator 4
- movq -48(%rbp), %rax # objPtr2, tmp103
- movq 8(%rax), %rax # objPtr2_8->bytes, D.20670
- testq %rax, %rax # D.20670
- je .L914 #,
- .loc 1 6380 0 is_stmt 0 discriminator 1
- movq -48(%rbp), %rax # objPtr2, tmp104
- movq 8(%rax), %rax # objPtr2_8->bytes, D.20670
- jmp .L915 #
- .L914:
- .loc 1 6380 0 discriminator 2
- movq -48(%rbp), %rax # objPtr2, tmp105
- movq %rax, %rdi # tmp105,
- call Tcl_GetString@PLT #
- .L915:
- .loc 1 6380 0 discriminator 4
- movq %rax, %r14 # D.20670, p2
- .loc 1 6381 0 is_stmt 1 discriminator 4
- movq -48(%rbp), %rax # objPtr2, tmp106
- movl 16(%rax), %r12d # objPtr2_8->length, l2
- .loc 1 6387 0 discriminator 4
- cmpl %r12d, %ebx # l2, l1
- jne .L916 #,
- .loc 1 6387 0 is_stmt 0 discriminator 1
- movslq %ebx, %rax # l1, D.20672
- movq %rax, %rdx # D.20672,
- movq %r14, %rsi # p2,
- movq %r13, %rdi # p1,
- call memcmp@PLT #
- testl %eax, %eax # D.20671
- jne .L916 #,
- .loc 1 6387 0 discriminator 3
- movl $1, %eax #, D.20671
- jmp .L918 #
- .L916:
- .loc 1 6387 0 discriminator 4
- movl $0, %eax #, D.20671
- .L918:
- .loc 1 6387 0
- nop
- .L911:
- .loc 1 6388 0 is_stmt 1
- addq $32, %rsp #,
- popq %rbx #
- popq %r12 #
- popq %r13 #
- popq %r14 #
- popq %rbp #
- .cfi_def_cfa 7, 8
- ret
- .cfi_endproc
- .LFE82:
- .size CompareVarKeys, .-CompareVarKeys
- Optimised outer function:
- .type CompareVarKeys, @function
- CompareVarKeys:
- .LFB83:
- .loc 1 6392 0
- .cfi_startproc
- pushq %rbp #
- .cfi_def_cfa_offset 16
- .cfi_offset 6, -16
- movq %rsp, %rbp #,
- .cfi_def_cfa_register 6
- subq $32, %rsp #,
- movq %rdi, -24(%rbp) # keyPtr, keyPtr
- movq %rsi, -32(%rbp) # hPtr, hPtr
- .loc 1 6393 0
- movq -24(%rbp), %rax # keyPtr, tmp85
- movq %rax, -8(%rbp) # tmp85, objPtr1
- .loc 1 6394 0
- movq -32(%rbp), %rax # hPtr, tmp86
- movq 32(%rax), %rax # hPtr_4(D)->key.objPtr, tmp87
- movq %rax, -16(%rbp) # tmp87, objPtr2
- .loc 1 6400 0
- movq -8(%rbp), %rax # objPtr1, tmp88
- cmpq -16(%rbp), %rax # objPtr2, tmp88
- jne .L918 #,
- .loc 1 6401 0
- movl $1, %eax #, D.20680
- jmp .L919 #
- .L918:
- .loc 1 6403 0
- movq -16(%rbp), %rdx # objPtr2, tmp89
- movq -8(%rbp), %rax # objPtr1, tmp90
- movq %rdx, %rsi # tmp89,
- movq %rax, %rdi # tmp90,
- call CompareDistinctVarKeys #
- .L919:
- .loc 1 6405 0
- leave
- .cfi_def_cfa 7, 8
- ret
- .cfi_endproc
- .LFE83:
- .size CompareVarKeys, .-CompareVarKeys
- Inner function:
- .type CompareDistinctVarKeys, @function
- CompareDistinctVarKeys:
- .LFB82:
- .loc 1 6367 0
- .cfi_startproc
- pushq %rbp #
- .cfi_def_cfa_offset 16
- .cfi_offset 6, -16
- movq %rsp, %rbp #,
- .cfi_def_cfa_register 6
- pushq %r14 #
- pushq %r13 #
- pushq %r12 #
- pushq %rbx #
- subq $16, %rsp #,
- .cfi_offset 14, -24
- .cfi_offset 13, -32
- .cfi_offset 12, -40
- .cfi_offset 3, -48
- movq %rdi, -40(%rbp) # objPtr1, objPtr1
- movq %rsi, -48(%rbp) # objPtr2, objPtr2
- .loc 1 6376 0
- movq -40(%rbp), %rax # objPtr1, tmp95
- movq 8(%rax), %rax # objPtr1_4(D)->bytes, D.20677
- testq %rax, %rax # D.20677
- je .L910 #,
- .loc 1 6376 0 is_stmt 0 discriminator 1
- movq -40(%rbp), %rax # objPtr1, tmp96
- movq 8(%rax), %rax # objPtr1_4(D)->bytes, D.20677
- jmp .L911 #
- .L910:
- .loc 1 6376 0 discriminator 2
- movq -40(%rbp), %rax # objPtr1, tmp97
- movq %rax, %rdi # tmp97,
- call Tcl_GetString@PLT #
- .L911:
- .loc 1 6376 0 discriminator 4
- movq %rax, %r13 # D.20677, p1
- .loc 1 6377 0 is_stmt 1 discriminator 4
- movq -40(%rbp), %rax # objPtr1, tmp98
- movl 16(%rax), %ebx # objPtr1_4(D)->length, l1
- .loc 1 6378 0 discriminator 4
- movq -48(%rbp), %rax # objPtr2, tmp99
- movq 8(%rax), %rax # objPtr2_10(D)->bytes, D.20677
- testq %rax, %rax # D.20677
- je .L912 #,
- .loc 1 6378 0 is_stmt 0 discriminator 1
- movq -48(%rbp), %rax # objPtr2, tmp100
- movq 8(%rax), %rax # objPtr2_10(D)->bytes, D.20677
- jmp .L913 #
- .L912:
- .loc 1 6378 0 discriminator 2
- movq -48(%rbp), %rax # objPtr2, tmp101
- movq %rax, %rdi # tmp101,
- call Tcl_GetString@PLT #
- .L913:
- .loc 1 6378 0 discriminator 4
- movq %rax, %r14 # D.20677, p2
- .loc 1 6379 0 is_stmt 1 discriminator 4
- movq -48(%rbp), %rax # objPtr2, tmp102
- movl 16(%rax), %r12d # objPtr2_10(D)->length, l2
- .loc 1 6385 0 discriminator 4
- cmpl %r12d, %ebx # l2, l1
- jne .L914 #,
- .loc 1 6385 0 is_stmt 0 discriminator 1
- movslq %ebx, %rax # l1, D.20679
- movq %rax, %rdx # D.20679,
- movq %r14, %rsi # p2,
- movq %r13, %rdi # p1,
- call memcmp@PLT #
- testl %eax, %eax # D.20678
- jne .L914 #,
- .loc 1 6385 0 discriminator 3
- movl $1, %eax #, D.20678
- jmp .L915 #
- .L914:
- .loc 1 6385 0 discriminator 4
- movl $0, %eax #, D.20678
- .L915:
- .loc 1 6386 0 is_stmt 1 discriminator 6
- addq $16, %rsp #,
- popq %rbx #
- popq %r12 #
- popq %r13 #
- popq %r14 #
- popq %rbp #
- .cfi_def_cfa 7, 8
- ret
- .cfi_endproc
- .LFE82:
- .size CompareDistinctVarKeys, .-CompareDistinctVarKeys