Posted to tcl by aspect at Wed Sep 30 00:10:41 GMT 2015view pretty

Examining the assembly from applying drh's example optimisation to CompareVarKeys.

Assembly obtained with:

    configure --enable-symbols=all
    make
    rm tclVar.o
    make -n tclVar.o | sed -e 's/^gcc/gcc -S -fverbose-asm/' | sh


Original 7b3b0ca73eb7ba1cbce9 (without optimisation):

	.type	CompareVarKeys, @function
CompareVarKeys:
.LFB82:
	.loc 1 6359 0
	.cfi_startproc
	pushq	%rbp	#
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%rsp, %rbp	#,
	.cfi_def_cfa_register 6
	pushq	%r14	#
	pushq	%r13	#
	pushq	%r12	#
	pushq	%rbx	#
	subq	$32, %rsp	#,
	.cfi_offset 14, -24
	.cfi_offset 13, -32
	.cfi_offset 12, -40
	.cfi_offset 3, -48
	movq	%rdi, -56(%rbp)	# keyPtr, keyPtr
	movq	%rsi, -64(%rbp)	# hPtr, hPtr
	.loc 1 6360 0
	movq	-56(%rbp), %rax	# keyPtr, tmp95
	movq	%rax, -40(%rbp)	# tmp95, objPtr1
	.loc 1 6361 0
	movq	-64(%rbp), %rax	# hPtr, tmp96
	movq	32(%rax), %rax	# hPtr_7(D)->key.objPtr, tmp97
	movq	%rax, -48(%rbp)	# tmp97, objPtr2
	.loc 1 6369 0
	movq	-40(%rbp), %rax	# objPtr1, tmp98
	cmpq	-48(%rbp), %rax	# objPtr2, tmp98
	jne	.L910	#,
	.loc 1 6370 0
	movl	$1, %eax	#, D.20671
	jmp	.L911	#
.L910:
	.loc 1 6378 0
	movq	-40(%rbp), %rax	# objPtr1, tmp99
	movq	8(%rax), %rax	# objPtr1_6->bytes, D.20670
	testq	%rax, %rax	# D.20670
	je	.L912	#,
	.loc 1 6378 0 is_stmt 0 discriminator 1
	movq	-40(%rbp), %rax	# objPtr1, tmp100
	movq	8(%rax), %rax	# objPtr1_6->bytes, D.20670
	jmp	.L913	#
.L912:
	.loc 1 6378 0 discriminator 2
	movq	-40(%rbp), %rax	# objPtr1, tmp101
	movq	%rax, %rdi	# tmp101,
	call	Tcl_GetString@PLT	#
.L913:
	.loc 1 6378 0 discriminator 4
	movq	%rax, %r13	# D.20670, p1
	.loc 1 6379 0 is_stmt 1 discriminator 4
	movq	-40(%rbp), %rax	# objPtr1, tmp102
	movl	16(%rax), %ebx	# objPtr1_6->length, l1
	.loc 1 6380 0 discriminator 4
	movq	-48(%rbp), %rax	# objPtr2, tmp103
	movq	8(%rax), %rax	# objPtr2_8->bytes, D.20670
	testq	%rax, %rax	# D.20670
	je	.L914	#,
	.loc 1 6380 0 is_stmt 0 discriminator 1
	movq	-48(%rbp), %rax	# objPtr2, tmp104
	movq	8(%rax), %rax	# objPtr2_8->bytes, D.20670
	jmp	.L915	#
.L914:
	.loc 1 6380 0 discriminator 2
	movq	-48(%rbp), %rax	# objPtr2, tmp105
	movq	%rax, %rdi	# tmp105,
	call	Tcl_GetString@PLT	#
.L915:
	.loc 1 6380 0 discriminator 4
	movq	%rax, %r14	# D.20670, p2
	.loc 1 6381 0 is_stmt 1 discriminator 4
	movq	-48(%rbp), %rax	# objPtr2, tmp106
	movl	16(%rax), %r12d	# objPtr2_8->length, l2
	.loc 1 6387 0 discriminator 4
	cmpl	%r12d, %ebx	# l2, l1
	jne	.L916	#,
	.loc 1 6387 0 is_stmt 0 discriminator 1
	movslq	%ebx, %rax	# l1, D.20672
	movq	%rax, %rdx	# D.20672,
	movq	%r14, %rsi	# p2,
	movq	%r13, %rdi	# p1,
	call	memcmp@PLT	#
	testl	%eax, %eax	# D.20671
	jne	.L916	#,
	.loc 1 6387 0 discriminator 3
	movl	$1, %eax	#, D.20671
	jmp	.L918	#
.L916:
	.loc 1 6387 0 discriminator 4
	movl	$0, %eax	#, D.20671
.L918:
	.loc 1 6387 0
	nop
.L911:
	.loc 1 6388 0 is_stmt 1
	addq	$32, %rsp	#,
	popq	%rbx	#
	popq	%r12	#
	popq	%r13	#
	popq	%r14	#
	popq	%rbp	#
	.cfi_def_cfa 7, 8
	ret
	.cfi_endproc
.LFE82:
	.size	CompareVarKeys, .-CompareVarKeys





Optimised outer function:

	.type	CompareVarKeys, @function
CompareVarKeys:
.LFB83:
	.loc 1 6392 0
	.cfi_startproc
	pushq	%rbp	#
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%rsp, %rbp	#,
	.cfi_def_cfa_register 6
	subq	$32, %rsp	#,
	movq	%rdi, -24(%rbp)	# keyPtr, keyPtr
	movq	%rsi, -32(%rbp)	# hPtr, hPtr
	.loc 1 6393 0
	movq	-24(%rbp), %rax	# keyPtr, tmp85
	movq	%rax, -8(%rbp)	# tmp85, objPtr1
	.loc 1 6394 0
	movq	-32(%rbp), %rax	# hPtr, tmp86
	movq	32(%rax), %rax	# hPtr_4(D)->key.objPtr, tmp87
	movq	%rax, -16(%rbp)	# tmp87, objPtr2
	.loc 1 6400 0
	movq	-8(%rbp), %rax	# objPtr1, tmp88
	cmpq	-16(%rbp), %rax	# objPtr2, tmp88
	jne	.L918	#,
	.loc 1 6401 0
	movl	$1, %eax	#, D.20680
	jmp	.L919	#
.L918:
	.loc 1 6403 0
	movq	-16(%rbp), %rdx	# objPtr2, tmp89
	movq	-8(%rbp), %rax	# objPtr1, tmp90
	movq	%rdx, %rsi	# tmp89,
	movq	%rax, %rdi	# tmp90,
	call	CompareDistinctVarKeys	#
.L919:
	.loc 1 6405 0
	leave
	.cfi_def_cfa 7, 8
	ret
	.cfi_endproc
.LFE83:
	.size	CompareVarKeys, .-CompareVarKeys


Inner function:

	.type	CompareDistinctVarKeys, @function
CompareDistinctVarKeys:
.LFB82:
	.loc 1 6367 0
	.cfi_startproc
	pushq	%rbp	#
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%rsp, %rbp	#,
	.cfi_def_cfa_register 6
	pushq	%r14	#
	pushq	%r13	#
	pushq	%r12	#
	pushq	%rbx	#
	subq	$16, %rsp	#,
	.cfi_offset 14, -24
	.cfi_offset 13, -32
	.cfi_offset 12, -40
	.cfi_offset 3, -48
	movq	%rdi, -40(%rbp)	# objPtr1, objPtr1
	movq	%rsi, -48(%rbp)	# objPtr2, objPtr2
	.loc 1 6376 0
	movq	-40(%rbp), %rax	# objPtr1, tmp95
	movq	8(%rax), %rax	# objPtr1_4(D)->bytes, D.20677
	testq	%rax, %rax	# D.20677
	je	.L910	#,
	.loc 1 6376 0 is_stmt 0 discriminator 1
	movq	-40(%rbp), %rax	# objPtr1, tmp96
	movq	8(%rax), %rax	# objPtr1_4(D)->bytes, D.20677
	jmp	.L911	#
.L910:
	.loc 1 6376 0 discriminator 2
	movq	-40(%rbp), %rax	# objPtr1, tmp97
	movq	%rax, %rdi	# tmp97,
	call	Tcl_GetString@PLT	#
.L911:
	.loc 1 6376 0 discriminator 4
	movq	%rax, %r13	# D.20677, p1
	.loc 1 6377 0 is_stmt 1 discriminator 4
	movq	-40(%rbp), %rax	# objPtr1, tmp98
	movl	16(%rax), %ebx	# objPtr1_4(D)->length, l1
	.loc 1 6378 0 discriminator 4
	movq	-48(%rbp), %rax	# objPtr2, tmp99
	movq	8(%rax), %rax	# objPtr2_10(D)->bytes, D.20677
	testq	%rax, %rax	# D.20677
	je	.L912	#,
	.loc 1 6378 0 is_stmt 0 discriminator 1
	movq	-48(%rbp), %rax	# objPtr2, tmp100
	movq	8(%rax), %rax	# objPtr2_10(D)->bytes, D.20677
	jmp	.L913	#
.L912:
	.loc 1 6378 0 discriminator 2
	movq	-48(%rbp), %rax	# objPtr2, tmp101
	movq	%rax, %rdi	# tmp101,
	call	Tcl_GetString@PLT	#
.L913:
	.loc 1 6378 0 discriminator 4
	movq	%rax, %r14	# D.20677, p2
	.loc 1 6379 0 is_stmt 1 discriminator 4
	movq	-48(%rbp), %rax	# objPtr2, tmp102
	movl	16(%rax), %r12d	# objPtr2_10(D)->length, l2
	.loc 1 6385 0 discriminator 4
	cmpl	%r12d, %ebx	# l2, l1
	jne	.L914	#,
	.loc 1 6385 0 is_stmt 0 discriminator 1
	movslq	%ebx, %rax	# l1, D.20679
	movq	%rax, %rdx	# D.20679,
	movq	%r14, %rsi	# p2,
	movq	%r13, %rdi	# p1,
	call	memcmp@PLT	#
	testl	%eax, %eax	# D.20678
	jne	.L914	#,
	.loc 1 6385 0 discriminator 3
	movl	$1, %eax	#, D.20678
	jmp	.L915	#
.L914:
	.loc 1 6385 0 discriminator 4
	movl	$0, %eax	#, D.20678
.L915:
	.loc 1 6386 0 is_stmt 1 discriminator 6
	addq	$16, %rsp	#,
	popq	%rbx	#
	popq	%r12	#
	popq	%r13	#
	popq	%r14	#
	popq	%rbp	#
	.cfi_def_cfa 7, 8
	ret
	.cfi_endproc
.LFE82:
	.size	CompareDistinctVarKeys, .-CompareDistinctVarKeys