/xbmc/visualizations/Goom/goom2k4-0/src/ppc_drawings.s

http://github.com/xbmc/xbmc · Assembly · 381 lines · 260 code · 70 blank · 51 comment · 0 complexity · b2b0d67aac740f1ebe45d0ff8eb2d575 MD5 · raw file

  1. ; PowerPC optimized drawing methods for Goom
  2. ; Š 2003 Guillaume Borios
  3. ; This Source Code is released under the terms of the General Public License
  4. ; Change log :
  5. ; 30 May 2003 : File creation
  6. ; Section definition : We use a read only code section for the whole file
  7. .section __TEXT,__text,regular,pure_instructions
  8. ; --------------------------------------------------------------------------------------
  9. ; Single 32b pixel drawing macros
  10. ; Usage :
  11. ; DRAWMETHOD_XXXX_MACRO *pixelIN, *pixelOUT, COLOR, WR1, WR2, WR3, WR4
  12. ; Only the work registers (WR) can be touched by the macros
  13. ;
  14. ; Available methods :
  15. ; DRAWMETHOD_DFLT_MACRO : Default drawing method (Actually OVRW)
  16. ; DRAWMETHOD_PLUS_MACRO : RVB Saturated per channel addition (SLOWEST)
  17. ; DRAWMETHOD_HALF_MACRO : 50% Transparency color drawing
  18. ; DRAWMETHOD_OVRW_MACRO : Direct COLOR drawing (FASTEST)
  19. ; DRAWMETHOD_B_OR_MACRO : Bitwise OR
  20. ; DRAWMETHOD_BAND_MACRO : Bitwise AND
  21. ; DRAWMETHOD_BXOR_MACRO : Bitwise XOR
  22. ; DRAWMETHOD_BNOT_MACRO : Bitwise NOT
  23. ; --------------------------------------------------------------------------------------
  24. .macro DRAWMETHOD_OVRW_MACRO
  25. stw $2,0($1) ;; *$1 <- $2
  26. .endmacro
  27. .macro DRAWMETHOD_B_OR_MACRO
  28. lwz $3,0($0) ;; $3 <- *$0
  29. or $3,$3,$2 ;; $3 <- $3 | $2
  30. stw $3,0($1) ;; *$1 <- $3
  31. .endmacro
  32. .macro DRAWMETHOD_BAND_MACRO
  33. lwz $3,0($0) ;; $3 <- *$0
  34. and $3,$3,$2 ;; $3 <- $3 & $2
  35. stw $3,0($1) ;; *$1 <- $3
  36. .endmacro
  37. .macro DRAWMETHOD_BXOR_MACRO
  38. lwz $3,0($0) ;; $3 <- *$0
  39. xor $3,$3,$2 ;; $3 <- $3 ^ $2
  40. stw $3,0($1) ;; *$1 <- $3
  41. .endmacro
  42. .macro DRAWMETHOD_BNOT_MACRO
  43. lwz $3,0($0) ;; $3 <- *$0
  44. nand $3,$3,$3 ;; $3 <- ~$3
  45. stw $3,0($1) ;; *$1 <- $3
  46. .endmacro
  47. .macro DRAWMETHOD_PLUS_MACRO
  48. lwz $4,0($0) ;; $4 <- *$0
  49. andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00
  50. andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00
  51. add $3,$3,$5 ;; $3 <- $3 + $5
  52. rlwinm $5,$3,15,0,0 ;; $5 <- 0 | ($3[15] << 15)
  53. srawi $5,$5,23 ;; $5 <- $5 >> 23 (algebraic for sign extension)
  54. or $3,$3,$5 ;; $3 <- $3 | $5
  55. lis $5,0xFF ;; $5 <- 0x00FF00FF
  56. addi $5,$5,0xFF
  57. and $4,$4,$5 ;; $4 <- $4 & $5
  58. and $6,$2,$5 ;; $6 <- $2 & $5
  59. add $4,$4,$6 ;; $4 <- $4 + $6
  60. rlwinm $6,$4,7,0,0 ;; $6 <- 0 | ($4[7] << 7)
  61. srawi $6,$6,15 ;; $6 <- $6 >> 15 (algebraic for sign extension)
  62. rlwinm $5,$4,23,0,0 ;; $5 <- 0 | ($4[23] << 23)
  63. srawi $5,$5,31 ;; $5 <- $5 >> 31 (algebraic for sign extension)
  64. rlwimi $6,$5,0,24,31 ;; $6[24..31] <- $5[24..31]
  65. or $4,$4,$6 ;; $4 <- $4 | $6
  66. rlwimi $4,$3,0,16,23 ;; $4[16..23] <- $3[16..23]
  67. stw $4,0($1) ;; *$1 <- $4
  68. .endmacro
  69. .macro DRAWMETHOD_HALF_MACRO
  70. lwz $4,0($0) ;; $4 <- *$0
  71. andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00
  72. andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00
  73. add $3,$3,$5 ;; $3 <- $3 + $5
  74. lis $5,0xFF ;; $5 <- 0x00FF00FF
  75. addi $5,$5,0xFF
  76. and $4,$4,$5 ;; $4 <- $4 & $5
  77. and $5,$2,$5 ;; $5 <- $2 & $5
  78. add $4,$4,$5 ;; $4 <- $4 + $5
  79. srwi $4,$4,1 ;; $4 <- $4 >> 1
  80. rlwimi $4,$3,31,16,23 ;; $4[16..23] <- $3[15..22]
  81. stw $4,0($1) ;; *$1 <- $4
  82. .endmacro
  83. .macro DRAWMETHOD_DFLT_MACRO
  84. DRAWMETHOD_PLUS_MACRO
  85. .endmacro
  86. ; --------------------------------------------------------------------------------------
  87. ; **************************************************************************************
  88. ; void DRAWMETHOD_PLUS_PPC(unsigned int * buf, unsigned int _col);
  89. ; void DRAWMETHOD_PLUS_2_PPC(unsigned * in, unsigned int * out, unsigned int _col);
  90. ; **************************************************************************************
  91. .globl _DRAWMETHOD_PLUS_2_PPC
  92. .align 3
  93. _DRAWMETHOD_PLUS_2_PPC:
  94. DRAWMETHOD_PLUS_MACRO r3,r4,r5,r6,r7,r8,r9
  95. blr ;; return
  96. .globl _DRAWMETHOD_PLUS_PPC
  97. .align 3
  98. _DRAWMETHOD_PLUS_PPC:
  99. DRAWMETHOD_PLUS_MACRO r3,r3,r4,r5,r6,r7,r9
  100. blr ;; return
  101. ; **************************************************************************************
  102. ; void DRAWMETHOD_HALF_PPC(unsigned int * buf, unsigned int _col);
  103. ; void DRAWMETHOD_HALF_2_PPC(unsigned * in, unsigned int * out, unsigned int _col);
  104. ; **************************************************************************************
  105. .globl _DRAWMETHOD_HALF_2_PPC
  106. .align 3
  107. _DRAWMETHOD_HALF_2_PPC:
  108. DRAWMETHOD_HALF_MACRO r3,r4,r5,r6,r7,r8
  109. blr ;; return
  110. .globl _DRAWMETHOD_HALF_PPC
  111. .align 3
  112. _DRAWMETHOD_HALF_PPC:
  113. DRAWMETHOD_HALF_MACRO r3,r3,r4,r5,r6,r7
  114. blr ;; return
  115. ; **************************************************************************************
  116. ; void DRAW_LINE_PPC(unsigned int *data, int x1, int y1, int x2, int y2, unsigned int col,
  117. ; unsigned int screenx, unsigned int screeny)
  118. ; **************************************************************************************
  119. .globl _DRAW_LINE_PPC
  120. .align 3
  121. _DRAW_LINE_PPC:
  122. ;; NOT IMPLEMENTED YET
  123. blr ;; return
  124. ; **************************************************************************************
  125. ; void _ppc_brightness(Pixel * src, Pixel * dest, unsigned int size, unsigned int coeff)
  126. ; **************************************************************************************
  127. .const
  128. .align 4
  129. vectorZERO:
  130. .long 0,0,0,0
  131. .long 0x10101000, 0x10101001, 0x10101002, 0x10101003
  132. .long 0x10101004, 0x10101005, 0x10101006, 0x10101007
  133. .long 0x10101008, 0x10101009, 0x1010100A, 0x1010100B
  134. .long 0x1010100C, 0x1010100D, 0x1010100E, 0x1010100F
  135. .section __TEXT,__text,regular,pure_instructions
  136. .globl _ppc_brightness_G4
  137. .align 3
  138. _ppc_brightness_G4:
  139. ;; PowerPC Altivec code
  140. srwi r5,r5,2
  141. mtctr r5
  142. ;;vrsave
  143. mfspr r11,256
  144. lis r12,0xCFFC
  145. mtspr 256,r12
  146. mflr r0
  147. bcl 20,31,"L00000000001$pb"
  148. "L00000000001$pb":
  149. mflr r10
  150. mtlr r0
  151. addis r9,r10,ha16(vectorZERO-"L00000000001$pb")
  152. addi r9,r9,lo16(vectorZERO-"L00000000001$pb")
  153. vxor v0,v0,v0 ;; V0 = NULL vector
  154. addi r9,r9,16
  155. lvx v10,0,r9
  156. addi r9,r9,16
  157. lvx v11,0,r9
  158. addi r9,r9,16
  159. lvx v12,0,r9
  160. addi r9,r9,16
  161. lvx v13,0,r9
  162. addis r9,r10,ha16(vectortmpwork-"L00000000001$pb")
  163. addi r9,r9,lo16(vectortmpwork-"L00000000001$pb")
  164. stw r6,0(r9)
  165. li r6,8
  166. stw r6,4(r9)
  167. lvx v9,0,r9
  168. li r9,128
  169. vspltw v8,v9,0
  170. vspltw v9,v9,1
  171. ;; elt counter
  172. li r9,0
  173. lis r7,0x0F01
  174. b L7
  175. .align 4
  176. L7:
  177. lvx v1,r9,r3
  178. vperm v4,v1,v0,v10
  179. ;*********************
  180. add r10,r9,r3
  181. ;*********************
  182. vperm v5,v1,v0,v11
  183. vperm v6,v1,v0,v12
  184. vperm v7,v1,v0,v13
  185. vmulouh v4,v4,v8
  186. ;*********************
  187. dst r10,r7,3
  188. ;*********************
  189. vmulouh v5,v5,v8
  190. vmulouh v6,v6,v8
  191. vmulouh v7,v7,v8
  192. vsrw v4,v4,v9
  193. vsrw v5,v5,v9
  194. vsrw v6,v6,v9
  195. vsrw v7,v7,v9
  196. vpkuwus v4,v4,v5
  197. vpkuwus v6,v6,v7
  198. vpkuhus v1,v4,v6
  199. stvx v1,r9,r4
  200. addi r9,r9,16
  201. bdnz L7
  202. mtspr 256,r11
  203. blr
  204. .globl _ppc_brightness_G5
  205. .align 3
  206. _ppc_brightness_G5:
  207. ;; PowerPC Altivec G5 code
  208. srwi r5,r5,2
  209. mtctr r5
  210. ;;vrsave
  211. mfspr r11,256
  212. lis r12,0xCFFC
  213. mtspr 256,r12
  214. mflr r0
  215. bcl 20,31,"L00000000002$pb"
  216. "L00000000002$pb":
  217. mflr r10
  218. mtlr r0
  219. addis r9,r10,ha16(vectorZERO-"L00000000002$pb")
  220. addi r9,r9,lo16(vectorZERO-"L00000000002$pb")
  221. vxor v0,v0,v0 ;; V0 = NULL vector
  222. addi r9,r9,16
  223. lvx v10,0,r9
  224. addi r9,r9,16
  225. lvx v11,0,r9
  226. addi r9,r9,16
  227. lvx v12,0,r9
  228. addi r9,r9,16
  229. lvx v13,0,r9
  230. addis r9,r10,ha16(vectortmpwork-"L00000000002$pb")
  231. addi r9,r9,lo16(vectortmpwork-"L00000000002$pb")
  232. stw r6,0(r9)
  233. li r6,8
  234. stw r6,4(r9)
  235. lvx v9,0,r9
  236. li r9,128
  237. vspltw v8,v9,0
  238. vspltw v9,v9,1
  239. ;; elt counter
  240. li r9,0
  241. lis r7,0x0F01
  242. b L6
  243. .align 4
  244. L6:
  245. lvx v1,r9,r3
  246. vperm v4,v1,v0,v10
  247. ;*********************
  248. add r10,r9,r3
  249. ;*********************
  250. vperm v5,v1,v0,v11
  251. vperm v6,v1,v0,v12
  252. vperm v7,v1,v0,v13
  253. vmulouh v4,v4,v8
  254. vmulouh v5,v5,v8
  255. vmulouh v6,v6,v8
  256. vmulouh v7,v7,v8
  257. vsrw v4,v4,v9
  258. vsrw v5,v5,v9
  259. vsrw v6,v6,v9
  260. vsrw v7,v7,v9
  261. vpkuwus v4,v4,v5
  262. vpkuwus v6,v6,v7
  263. vpkuhus v1,v4,v6
  264. stvx v1,r9,r4
  265. addi r9,r9,16
  266. bdnz L6
  267. mtspr 256,r11
  268. blr
  269. .globl _ppc_brightness_generic
  270. .align 3
  271. _ppc_brightness_generic:
  272. lis r12,0x00FF
  273. ori r12,r12,0x00FF
  274. subi r3,r3,4
  275. subi r4,r4,4
  276. mtctr r5
  277. b L1
  278. .align 4
  279. L1:
  280. lwzu r7,4(r3)
  281. rlwinm r8,r7,16,24,31
  282. rlwinm r9,r7,24,24,31
  283. mullw r8,r8,r6
  284. rlwinm r10,r7,0,24,31
  285. mullw r9,r9,r6
  286. srwi r8,r8,8
  287. mullw r10,r10,r6
  288. srwi r9,r9,8
  289. rlwinm. r11,r8,0,0,23
  290. beq L2
  291. li r8,0xFF
  292. L2:
  293. srwi r10,r10,8
  294. rlwinm. r11,r9,0,0,23
  295. beq L3
  296. li r9,0xFF
  297. L3:
  298. rlwinm r7,r8,16,8,15
  299. rlwinm. r11,r10,0,0,23
  300. beq L4
  301. li r10,0xFF
  302. L4:
  303. rlwimi r7,r9,8,16,23
  304. rlwimi r7,r10,0,24,31
  305. stwu r7,4(r4)
  306. bdnz L1
  307. blr
  308. .static_data
  309. .align 4
  310. vectortmpwork:
  311. .long 0,0,0,0