Posted to tcl by aspect at Wed Sep 30 00:10:41 GMT 2015view pretty
Examining the assembly from applying drh's example optimisation to CompareVarKeys. Assembly obtained with: configure --enable-symbols=all make rm tclVar.o make -n tclVar.o | sed -e 's/^gcc/gcc -S -fverbose-asm/' | sh Original 7b3b0ca73eb7ba1cbce9 (without optimisation): .type CompareVarKeys, @function CompareVarKeys: .LFB82: .loc 1 6359 0 .cfi_startproc pushq %rbp # .cfi_def_cfa_offset 16 .cfi_offset 6, -16 movq %rsp, %rbp #, .cfi_def_cfa_register 6 pushq %r14 # pushq %r13 # pushq %r12 # pushq %rbx # subq $32, %rsp #, .cfi_offset 14, -24 .cfi_offset 13, -32 .cfi_offset 12, -40 .cfi_offset 3, -48 movq %rdi, -56(%rbp) # keyPtr, keyPtr movq %rsi, -64(%rbp) # hPtr, hPtr .loc 1 6360 0 movq -56(%rbp), %rax # keyPtr, tmp95 movq %rax, -40(%rbp) # tmp95, objPtr1 .loc 1 6361 0 movq -64(%rbp), %rax # hPtr, tmp96 movq 32(%rax), %rax # hPtr_7(D)->key.objPtr, tmp97 movq %rax, -48(%rbp) # tmp97, objPtr2 .loc 1 6369 0 movq -40(%rbp), %rax # objPtr1, tmp98 cmpq -48(%rbp), %rax # objPtr2, tmp98 jne .L910 #, .loc 1 6370 0 movl $1, %eax #, D.20671 jmp .L911 # .L910: .loc 1 6378 0 movq -40(%rbp), %rax # objPtr1, tmp99 movq 8(%rax), %rax # objPtr1_6->bytes, D.20670 testq %rax, %rax # D.20670 je .L912 #, .loc 1 6378 0 is_stmt 0 discriminator 1 movq -40(%rbp), %rax # objPtr1, tmp100 movq 8(%rax), %rax # objPtr1_6->bytes, D.20670 jmp .L913 # .L912: .loc 1 6378 0 discriminator 2 movq -40(%rbp), %rax # objPtr1, tmp101 movq %rax, %rdi # tmp101, call Tcl_GetString@PLT # .L913: .loc 1 6378 0 discriminator 4 movq %rax, %r13 # D.20670, p1 .loc 1 6379 0 is_stmt 1 discriminator 4 movq -40(%rbp), %rax # objPtr1, tmp102 movl 16(%rax), %ebx # objPtr1_6->length, l1 .loc 1 6380 0 discriminator 4 movq -48(%rbp), %rax # objPtr2, tmp103 movq 8(%rax), %rax # objPtr2_8->bytes, D.20670 testq %rax, %rax # D.20670 je .L914 #, .loc 1 6380 0 is_stmt 0 discriminator 1 movq -48(%rbp), %rax # objPtr2, tmp104 movq 8(%rax), %rax # objPtr2_8->bytes, D.20670 jmp .L915 # .L914: .loc 1 6380 0 discriminator 2 movq -48(%rbp), %rax # objPtr2, tmp105 movq %rax, %rdi # tmp105, call Tcl_GetString@PLT # .L915: .loc 1 6380 0 discriminator 4 movq %rax, %r14 # D.20670, p2 .loc 1 6381 0 is_stmt 1 discriminator 4 movq -48(%rbp), %rax # objPtr2, tmp106 movl 16(%rax), %r12d # objPtr2_8->length, l2 .loc 1 6387 0 discriminator 4 cmpl %r12d, %ebx # l2, l1 jne .L916 #, .loc 1 6387 0 is_stmt 0 discriminator 1 movslq %ebx, %rax # l1, D.20672 movq %rax, %rdx # D.20672, movq %r14, %rsi # p2, movq %r13, %rdi # p1, call memcmp@PLT # testl %eax, %eax # D.20671 jne .L916 #, .loc 1 6387 0 discriminator 3 movl $1, %eax #, D.20671 jmp .L918 # .L916: .loc 1 6387 0 discriminator 4 movl $0, %eax #, D.20671 .L918: .loc 1 6387 0 nop .L911: .loc 1 6388 0 is_stmt 1 addq $32, %rsp #, popq %rbx # popq %r12 # popq %r13 # popq %r14 # popq %rbp # .cfi_def_cfa 7, 8 ret .cfi_endproc .LFE82: .size CompareVarKeys, .-CompareVarKeys Optimised outer function: .type CompareVarKeys, @function CompareVarKeys: .LFB83: .loc 1 6392 0 .cfi_startproc pushq %rbp # .cfi_def_cfa_offset 16 .cfi_offset 6, -16 movq %rsp, %rbp #, .cfi_def_cfa_register 6 subq $32, %rsp #, movq %rdi, -24(%rbp) # keyPtr, keyPtr movq %rsi, -32(%rbp) # hPtr, hPtr .loc 1 6393 0 movq -24(%rbp), %rax # keyPtr, tmp85 movq %rax, -8(%rbp) # tmp85, objPtr1 .loc 1 6394 0 movq -32(%rbp), %rax # hPtr, tmp86 movq 32(%rax), %rax # hPtr_4(D)->key.objPtr, tmp87 movq %rax, -16(%rbp) # tmp87, objPtr2 .loc 1 6400 0 movq -8(%rbp), %rax # objPtr1, tmp88 cmpq -16(%rbp), %rax # objPtr2, tmp88 jne .L918 #, .loc 1 6401 0 movl $1, %eax #, D.20680 jmp .L919 # .L918: .loc 1 6403 0 movq -16(%rbp), %rdx # objPtr2, tmp89 movq -8(%rbp), %rax # objPtr1, tmp90 movq %rdx, %rsi # tmp89, movq %rax, %rdi # tmp90, call CompareDistinctVarKeys # .L919: .loc 1 6405 0 leave .cfi_def_cfa 7, 8 ret .cfi_endproc .LFE83: .size CompareVarKeys, .-CompareVarKeys Inner function: .type CompareDistinctVarKeys, @function CompareDistinctVarKeys: .LFB82: .loc 1 6367 0 .cfi_startproc pushq %rbp # .cfi_def_cfa_offset 16 .cfi_offset 6, -16 movq %rsp, %rbp #, .cfi_def_cfa_register 6 pushq %r14 # pushq %r13 # pushq %r12 # pushq %rbx # subq $16, %rsp #, .cfi_offset 14, -24 .cfi_offset 13, -32 .cfi_offset 12, -40 .cfi_offset 3, -48 movq %rdi, -40(%rbp) # objPtr1, objPtr1 movq %rsi, -48(%rbp) # objPtr2, objPtr2 .loc 1 6376 0 movq -40(%rbp), %rax # objPtr1, tmp95 movq 8(%rax), %rax # objPtr1_4(D)->bytes, D.20677 testq %rax, %rax # D.20677 je .L910 #, .loc 1 6376 0 is_stmt 0 discriminator 1 movq -40(%rbp), %rax # objPtr1, tmp96 movq 8(%rax), %rax # objPtr1_4(D)->bytes, D.20677 jmp .L911 # .L910: .loc 1 6376 0 discriminator 2 movq -40(%rbp), %rax # objPtr1, tmp97 movq %rax, %rdi # tmp97, call Tcl_GetString@PLT # .L911: .loc 1 6376 0 discriminator 4 movq %rax, %r13 # D.20677, p1 .loc 1 6377 0 is_stmt 1 discriminator 4 movq -40(%rbp), %rax # objPtr1, tmp98 movl 16(%rax), %ebx # objPtr1_4(D)->length, l1 .loc 1 6378 0 discriminator 4 movq -48(%rbp), %rax # objPtr2, tmp99 movq 8(%rax), %rax # objPtr2_10(D)->bytes, D.20677 testq %rax, %rax # D.20677 je .L912 #, .loc 1 6378 0 is_stmt 0 discriminator 1 movq -48(%rbp), %rax # objPtr2, tmp100 movq 8(%rax), %rax # objPtr2_10(D)->bytes, D.20677 jmp .L913 # .L912: .loc 1 6378 0 discriminator 2 movq -48(%rbp), %rax # objPtr2, tmp101 movq %rax, %rdi # tmp101, call Tcl_GetString@PLT # .L913: .loc 1 6378 0 discriminator 4 movq %rax, %r14 # D.20677, p2 .loc 1 6379 0 is_stmt 1 discriminator 4 movq -48(%rbp), %rax # objPtr2, tmp102 movl 16(%rax), %r12d # objPtr2_10(D)->length, l2 .loc 1 6385 0 discriminator 4 cmpl %r12d, %ebx # l2, l1 jne .L914 #, .loc 1 6385 0 is_stmt 0 discriminator 1 movslq %ebx, %rax # l1, D.20679 movq %rax, %rdx # D.20679, movq %r14, %rsi # p2, movq %r13, %rdi # p1, call memcmp@PLT # testl %eax, %eax # D.20678 jne .L914 #, .loc 1 6385 0 discriminator 3 movl $1, %eax #, D.20678 jmp .L915 # .L914: .loc 1 6385 0 discriminator 4 movl $0, %eax #, D.20678 .L915: .loc 1 6386 0 is_stmt 1 discriminator 6 addq $16, %rsp #, popq %rbx # popq %r12 # popq %r13 # popq %r14 # popq %rbp # .cfi_def_cfa 7, 8 ret .cfi_endproc .LFE82: .size CompareDistinctVarKeys, .-CompareDistinctVarKeys