Posted to tcl by aspect at Wed Sep 30 00:10:41 GMT 2015view raw

  1. Examining the assembly from applying drh's example optimisation to CompareVarKeys.
  2.  
  3. Assembly obtained with:
  4.  
  5. configure --enable-symbols=all
  6. make
  7. rm tclVar.o
  8. make -n tclVar.o | sed -e 's/^gcc/gcc -S -fverbose-asm/' | sh
  9.  
  10.  
  11. Original 7b3b0ca73eb7ba1cbce9 (without optimisation):
  12.  
  13. .type CompareVarKeys, @function
  14. CompareVarKeys:
  15. .LFB82:
  16. .loc 1 6359 0
  17. .cfi_startproc
  18. pushq %rbp #
  19. .cfi_def_cfa_offset 16
  20. .cfi_offset 6, -16
  21. movq %rsp, %rbp #,
  22. .cfi_def_cfa_register 6
  23. pushq %r14 #
  24. pushq %r13 #
  25. pushq %r12 #
  26. pushq %rbx #
  27. subq $32, %rsp #,
  28. .cfi_offset 14, -24
  29. .cfi_offset 13, -32
  30. .cfi_offset 12, -40
  31. .cfi_offset 3, -48
  32. movq %rdi, -56(%rbp) # keyPtr, keyPtr
  33. movq %rsi, -64(%rbp) # hPtr, hPtr
  34. .loc 1 6360 0
  35. movq -56(%rbp), %rax # keyPtr, tmp95
  36. movq %rax, -40(%rbp) # tmp95, objPtr1
  37. .loc 1 6361 0
  38. movq -64(%rbp), %rax # hPtr, tmp96
  39. movq 32(%rax), %rax # hPtr_7(D)->key.objPtr, tmp97
  40. movq %rax, -48(%rbp) # tmp97, objPtr2
  41. .loc 1 6369 0
  42. movq -40(%rbp), %rax # objPtr1, tmp98
  43. cmpq -48(%rbp), %rax # objPtr2, tmp98
  44. jne .L910 #,
  45. .loc 1 6370 0
  46. movl $1, %eax #, D.20671
  47. jmp .L911 #
  48. .L910:
  49. .loc 1 6378 0
  50. movq -40(%rbp), %rax # objPtr1, tmp99
  51. movq 8(%rax), %rax # objPtr1_6->bytes, D.20670
  52. testq %rax, %rax # D.20670
  53. je .L912 #,
  54. .loc 1 6378 0 is_stmt 0 discriminator 1
  55. movq -40(%rbp), %rax # objPtr1, tmp100
  56. movq 8(%rax), %rax # objPtr1_6->bytes, D.20670
  57. jmp .L913 #
  58. .L912:
  59. .loc 1 6378 0 discriminator 2
  60. movq -40(%rbp), %rax # objPtr1, tmp101
  61. movq %rax, %rdi # tmp101,
  62. call Tcl_GetString@PLT #
  63. .L913:
  64. .loc 1 6378 0 discriminator 4
  65. movq %rax, %r13 # D.20670, p1
  66. .loc 1 6379 0 is_stmt 1 discriminator 4
  67. movq -40(%rbp), %rax # objPtr1, tmp102
  68. movl 16(%rax), %ebx # objPtr1_6->length, l1
  69. .loc 1 6380 0 discriminator 4
  70. movq -48(%rbp), %rax # objPtr2, tmp103
  71. movq 8(%rax), %rax # objPtr2_8->bytes, D.20670
  72. testq %rax, %rax # D.20670
  73. je .L914 #,
  74. .loc 1 6380 0 is_stmt 0 discriminator 1
  75. movq -48(%rbp), %rax # objPtr2, tmp104
  76. movq 8(%rax), %rax # objPtr2_8->bytes, D.20670
  77. jmp .L915 #
  78. .L914:
  79. .loc 1 6380 0 discriminator 2
  80. movq -48(%rbp), %rax # objPtr2, tmp105
  81. movq %rax, %rdi # tmp105,
  82. call Tcl_GetString@PLT #
  83. .L915:
  84. .loc 1 6380 0 discriminator 4
  85. movq %rax, %r14 # D.20670, p2
  86. .loc 1 6381 0 is_stmt 1 discriminator 4
  87. movq -48(%rbp), %rax # objPtr2, tmp106
  88. movl 16(%rax), %r12d # objPtr2_8->length, l2
  89. .loc 1 6387 0 discriminator 4
  90. cmpl %r12d, %ebx # l2, l1
  91. jne .L916 #,
  92. .loc 1 6387 0 is_stmt 0 discriminator 1
  93. movslq %ebx, %rax # l1, D.20672
  94. movq %rax, %rdx # D.20672,
  95. movq %r14, %rsi # p2,
  96. movq %r13, %rdi # p1,
  97. call memcmp@PLT #
  98. testl %eax, %eax # D.20671
  99. jne .L916 #,
  100. .loc 1 6387 0 discriminator 3
  101. movl $1, %eax #, D.20671
  102. jmp .L918 #
  103. .L916:
  104. .loc 1 6387 0 discriminator 4
  105. movl $0, %eax #, D.20671
  106. .L918:
  107. .loc 1 6387 0
  108. nop
  109. .L911:
  110. .loc 1 6388 0 is_stmt 1
  111. addq $32, %rsp #,
  112. popq %rbx #
  113. popq %r12 #
  114. popq %r13 #
  115. popq %r14 #
  116. popq %rbp #
  117. .cfi_def_cfa 7, 8
  118. ret
  119. .cfi_endproc
  120. .LFE82:
  121. .size CompareVarKeys, .-CompareVarKeys
  122.  
  123.  
  124.  
  125.  
  126.  
  127. Optimised outer function:
  128.  
  129. .type CompareVarKeys, @function
  130. CompareVarKeys:
  131. .LFB83:
  132. .loc 1 6392 0
  133. .cfi_startproc
  134. pushq %rbp #
  135. .cfi_def_cfa_offset 16
  136. .cfi_offset 6, -16
  137. movq %rsp, %rbp #,
  138. .cfi_def_cfa_register 6
  139. subq $32, %rsp #,
  140. movq %rdi, -24(%rbp) # keyPtr, keyPtr
  141. movq %rsi, -32(%rbp) # hPtr, hPtr
  142. .loc 1 6393 0
  143. movq -24(%rbp), %rax # keyPtr, tmp85
  144. movq %rax, -8(%rbp) # tmp85, objPtr1
  145. .loc 1 6394 0
  146. movq -32(%rbp), %rax # hPtr, tmp86
  147. movq 32(%rax), %rax # hPtr_4(D)->key.objPtr, tmp87
  148. movq %rax, -16(%rbp) # tmp87, objPtr2
  149. .loc 1 6400 0
  150. movq -8(%rbp), %rax # objPtr1, tmp88
  151. cmpq -16(%rbp), %rax # objPtr2, tmp88
  152. jne .L918 #,
  153. .loc 1 6401 0
  154. movl $1, %eax #, D.20680
  155. jmp .L919 #
  156. .L918:
  157. .loc 1 6403 0
  158. movq -16(%rbp), %rdx # objPtr2, tmp89
  159. movq -8(%rbp), %rax # objPtr1, tmp90
  160. movq %rdx, %rsi # tmp89,
  161. movq %rax, %rdi # tmp90,
  162. call CompareDistinctVarKeys #
  163. .L919:
  164. .loc 1 6405 0
  165. leave
  166. .cfi_def_cfa 7, 8
  167. ret
  168. .cfi_endproc
  169. .LFE83:
  170. .size CompareVarKeys, .-CompareVarKeys
  171.  
  172.  
  173. Inner function:
  174.  
  175. .type CompareDistinctVarKeys, @function
  176. CompareDistinctVarKeys:
  177. .LFB82:
  178. .loc 1 6367 0
  179. .cfi_startproc
  180. pushq %rbp #
  181. .cfi_def_cfa_offset 16
  182. .cfi_offset 6, -16
  183. movq %rsp, %rbp #,
  184. .cfi_def_cfa_register 6
  185. pushq %r14 #
  186. pushq %r13 #
  187. pushq %r12 #
  188. pushq %rbx #
  189. subq $16, %rsp #,
  190. .cfi_offset 14, -24
  191. .cfi_offset 13, -32
  192. .cfi_offset 12, -40
  193. .cfi_offset 3, -48
  194. movq %rdi, -40(%rbp) # objPtr1, objPtr1
  195. movq %rsi, -48(%rbp) # objPtr2, objPtr2
  196. .loc 1 6376 0
  197. movq -40(%rbp), %rax # objPtr1, tmp95
  198. movq 8(%rax), %rax # objPtr1_4(D)->bytes, D.20677
  199. testq %rax, %rax # D.20677
  200. je .L910 #,
  201. .loc 1 6376 0 is_stmt 0 discriminator 1
  202. movq -40(%rbp), %rax # objPtr1, tmp96
  203. movq 8(%rax), %rax # objPtr1_4(D)->bytes, D.20677
  204. jmp .L911 #
  205. .L910:
  206. .loc 1 6376 0 discriminator 2
  207. movq -40(%rbp), %rax # objPtr1, tmp97
  208. movq %rax, %rdi # tmp97,
  209. call Tcl_GetString@PLT #
  210. .L911:
  211. .loc 1 6376 0 discriminator 4
  212. movq %rax, %r13 # D.20677, p1
  213. .loc 1 6377 0 is_stmt 1 discriminator 4
  214. movq -40(%rbp), %rax # objPtr1, tmp98
  215. movl 16(%rax), %ebx # objPtr1_4(D)->length, l1
  216. .loc 1 6378 0 discriminator 4
  217. movq -48(%rbp), %rax # objPtr2, tmp99
  218. movq 8(%rax), %rax # objPtr2_10(D)->bytes, D.20677
  219. testq %rax, %rax # D.20677
  220. je .L912 #,
  221. .loc 1 6378 0 is_stmt 0 discriminator 1
  222. movq -48(%rbp), %rax # objPtr2, tmp100
  223. movq 8(%rax), %rax # objPtr2_10(D)->bytes, D.20677
  224. jmp .L913 #
  225. .L912:
  226. .loc 1 6378 0 discriminator 2
  227. movq -48(%rbp), %rax # objPtr2, tmp101
  228. movq %rax, %rdi # tmp101,
  229. call Tcl_GetString@PLT #
  230. .L913:
  231. .loc 1 6378 0 discriminator 4
  232. movq %rax, %r14 # D.20677, p2
  233. .loc 1 6379 0 is_stmt 1 discriminator 4
  234. movq -48(%rbp), %rax # objPtr2, tmp102
  235. movl 16(%rax), %r12d # objPtr2_10(D)->length, l2
  236. .loc 1 6385 0 discriminator 4
  237. cmpl %r12d, %ebx # l2, l1
  238. jne .L914 #,
  239. .loc 1 6385 0 is_stmt 0 discriminator 1
  240. movslq %ebx, %rax # l1, D.20679
  241. movq %rax, %rdx # D.20679,
  242. movq %r14, %rsi # p2,
  243. movq %r13, %rdi # p1,
  244. call memcmp@PLT #
  245. testl %eax, %eax # D.20678
  246. jne .L914 #,
  247. .loc 1 6385 0 discriminator 3
  248. movl $1, %eax #, D.20678
  249. jmp .L915 #
  250. .L914:
  251. .loc 1 6385 0 discriminator 4
  252. movl $0, %eax #, D.20678
  253. .L915:
  254. .loc 1 6386 0 is_stmt 1 discriminator 6
  255. addq $16, %rsp #,
  256. popq %rbx #
  257. popq %r12 #
  258. popq %r13 #
  259. popq %r14 #
  260. popq %rbp #
  261. .cfi_def_cfa 7, 8
  262. ret
  263. .cfi_endproc
  264. .LFE82:
  265. .size CompareDistinctVarKeys, .-CompareDistinctVarKeys
  266.