PageRenderTime 32ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/pathscale/be/com/emulate.cxx

https://github.com/somian/Path64
C++ | 5500 lines | 3562 code | 761 blank | 1177 comment | 398 complexity | 585ce9277290458cdce41b68e1891e67 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * Copyright (C) 2007, 2008, 2009 PathScale, LLC. All Rights Reserved.
  3. */
  4. /*
  5. * Copyright (C) 2006, 2007. QLogic Corporation. All Rights Reserved.
  6. */
  7. /*
  8. * Copyright 2003, 2004, 2005, 2006 PathScale, Inc. All Rights Reserved.
  9. */
  10. /*
  11. Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved.
  12. Path64 is free software; you can redistribute it and/or modify it
  13. under the terms of the GNU General Public License as published by
  14. the Free Software Foundation; either version 3, or (at your option)
  15. any later version.
  16. Path64 is distributed in the hope that it will be useful, but WITHOUT
  17. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  18. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
  19. License for more details.
  20. You should have received a copy of the GNU General Public License
  21. along with Path64; see the file COPYING. If not, write to the Free
  22. Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  23. 02110-1301, USA.
  24. Special thanks goes to SGI for their continued support to open source
  25. */
  26. #ifdef USE_PCH
  27. #include "be_com_pch.h"
  28. #endif /* USE_PCH */
  29. #pragma hdrstop
  30. #include <math.h>
  31. #if defined(BUILD_OS_DARWIN)
  32. #include <limits.h>
  33. #else /* defined(BUILD_OS_DARWIN) */
  34. #include <values.h>
  35. #endif /* defined(BUILD_OS_DARWIN) */
  36. #include <alloca.h>
  37. #include "defs.h"
  38. #include "config.h"
  39. #include "config_debug.h"
  40. #include "config_opt.h"
  41. #include "config_targ_opt.h"
  42. #include "errors.h"
  43. #include "erglob.h"
  44. #include "tracing.h"
  45. #include "stab.h"
  46. #include "data_layout.h"
  47. #include "wn.h"
  48. #include "wn_util.h"
  49. #include "const.h"
  50. #include "targ_const.h"
  51. #include "targ_sim.h"
  52. #include "fb_whirl.h"
  53. #include "be_symtab.h"
  54. #include "intrn_info.h"
  55. #if (__GNUC__ == 2)
  56. //
  57. // Provide trunc(), which doesn't exist in the GNU library. This is a
  58. // quick and dirty hack, and should be handled elsehow.
  59. //
  60. static inline double trunc(double d)
  61. {
  62. if (d < 0.0) {
  63. return 1.0 + floor(d);
  64. }
  65. else {
  66. return floor(d);
  67. }
  68. }
  69. #endif
  70. /*
  71. ** For lack of a better word, these emulations are run time
  72. ** routines that supply functionality to whirl expression nodes
  73. **
  74. ** The list was ripped off from ragnarok and may be
  75. ** incomplete/NYI
  76. */
  77. typedef enum
  78. {
  79. EM_TRAPUV, /* sets fpc_csr to interrupt on NaN */
  80. EM_RTS_CHECKSTACK, /* checks for stack overflow */
  81. EM_LL_MUL, /* double-word multiply */
  82. EM_LL_DIV, /* double-word divide */
  83. EM_ULL_DIV, /* unsigned double-word divide */
  84. EM_LL_MOD, /* double-word mod */
  85. EM_LL_REM, /* double-word remainder */
  86. EM_ULL_REM, /* unsigned double-word remainder */
  87. EM_LL_LSHIFT, /* double-word left shift */
  88. EM_LL_RSHIFT, /* double-word right shift */
  89. EM_ULL_RSHIFT, /* unsigned double-word right shift */
  90. EM_LL_M3_DSLLV, /* mips 3 simulation of dsllv */
  91. EM_LL_M3_DSRAV, /* mips 3 simulation of dsrav */
  92. EM_LL_M3_DSRLV, /* mips 3 simulation of dsrlv */
  93. EM_LL_TO_F, /* cvt double-word to float */
  94. EM_ULL_TO_F, /* cvt unsigned double-word to float */
  95. EM_LL_TO_D, /* cvt double-word to double float */
  96. EM_ULL_TO_D, /* cvt unsigned double-word to double float */
  97. EM_F_TO_LL, /* cvt float to double-word */
  98. EM_F_TO_ULL, /* cvt float to unsigned double-word */
  99. EM_F_ROUND_LL_F, /* round float to float */
  100. EM_F_TRUNC_LL_F, /* trunc float to float */
  101. EM_D_TO_LL, /* cvt double float to double-word */
  102. EM_D_TO_ULL, /* cvt double float to unsigned double-word */
  103. EM_D_ROUND_LL_D, /* round double to double */
  104. EM_D_TRUNC_LL_D, /* trunc double to double */
  105. EM_LL_BIT_EXTRACT , /* double-word bit-field extraction */
  106. EM_LL_BIT_INSERT , /* double-word bit-field insertion */
  107. EM_Q_ABS, /* quad absolute value */
  108. EM_Q_SQRT, /* quad square root */
  109. EM_Q_ADD, /* quad plus */
  110. EM_Q_SUB, /* quad minus */
  111. EM_Q_MPY, /* quad multiply */
  112. EM_Q_DIV, /* quad divide */
  113. EM_Q_MAX1, /* quad max */
  114. EM_Q_MIN1, /* quad min */
  115. EM_Q_EQ, /* quad equal */
  116. EM_Q_NE, /* quad not equal */
  117. EM_Q_GE, /* quad greater equal */
  118. EM_Q_GT, /* quad greater than */
  119. EM_Q_LE, /* quad less equal */
  120. EM_Q_LT, /* quad less than */
  121. EM_SNGL_Q, /* convert quad to single */
  122. EM_DBLE_Q, /* convert quad to double */
  123. EM_KI_QINT, /* convert quad to 64 bits int */
  124. EM_JI_QINT, /* convert quad to 32 bits int */
  125. EM_Q_EXT, /* convert float to quad */
  126. EM_Q_EXTD, /* convert double to quad */
  127. EM_Q_FLOTK, /* convert to quad from 64 bits int */
  128. EM_Q_FLOTKU, /* convert to quad from unsigned 64 bits int */
  129. EM_Q_FLOTJ, /* convert to quad from 32 bits int */
  130. EM_Q_FLOTJU, /* convert to quad from unsigned 32 bits int */
  131. EM_KIQNNT, /* round quad to closest 64 bits int value */
  132. EM_JIQNNT, /* round quad to closest 32 bits int value */
  133. EM_C4_SQRT, /* float complex sqrt */
  134. EM_C8_SQRT, /* double complex sqrt */
  135. EM_CQ_SQRT, /* quad complex sqrt */
  136. EM_C4_RSQRT, /* float complex recipricol sqrt */
  137. EM_C8_RSQRT, /* double complex recipricol sqrt */
  138. EM_CQ_RSQRT, /* quad complex recipricol sqrt */
  139. EM_C4_ABS, /* float complex abs */
  140. EM_C8_ABS, /* double complex abs */
  141. EM_CQ_ABS, /* quad complex abs */
  142. EM_KI_QCEIL, /* ceil quad to 64 bits int (f90 only) */
  143. EM_JI_QCEIL, /* ceil quad to 32 bits in( f90 only)t */
  144. EM_KI_QFLOOR, /* floor quad to 64 bits int (f90 only) */
  145. EM_JI_QFLOOR, /* floor quad to 32 bits int (f90 only) */
  146. EM_LAST /* sentinel */
  147. } EMULATION;
  148. /*
  149. ** describe calling semantics for FE and runtime
  150. ** intrinsics and expression
  151. */
  152. typedef enum
  153. {
  154. COERCE_none,
  155. COERCE_by_reference,
  156. COERCE_by_value,
  157. COERCE_struct_by_value,
  158. COERCE_struct_by_reference,
  159. COERCE_split_complex
  160. } COERCE, *COERCEp;
  161. typedef struct EM_ROUTINES
  162. {
  163. EMULATION id;
  164. const char *functionName;
  165. INT32 functionAttributes;
  166. COERCE runtimeArg0coercion;
  167. } EM_ROUTINES, *EM_ROUTINESp;
  168. #define EM_id(x) em_routines[x].id
  169. #define EM_rt_name(x) em_routines[x].functionName
  170. #define EM_attributes(x) em_routines[x].functionAttributes
  171. #define EM_coerce0(x) em_routines[x].runtimeArg0coercion
  172. /*
  173. ** Keep track of intrinsic/emulation arguments
  174. ** Problems we are trying to solve
  175. **
  176. ** COERCE_by_reference
  177. ** are (unfortunately) provided by the FE to match the
  178. ** run time routine. When we get the argument we might have
  179. ** an address (anonymous pointer) and hence, lost the
  180. ** type to dereference (if we are trying to inline it)
  181. **
  182. ** COERCE_split_complex
  183. ** complex are split into real/imaginary pairs doubling
  184. ** the number of argumemts
  185. **
  186. ** This entire mechanism should be provided by the FE
  187. ** as part of wtable.h
  188. */
  189. #define NSE PU_NO_SIDE_EFFECTS
  190. #define PURE_NSE (PU_IS_PURE | NSE)
  191. #define INVALID NULL
  192. /*
  193. ** The emulation table may not yet be complete (or used)
  194. ** The fields are
  195. **
  196. ** EMULATION id;
  197. ** The table must be kept in order with the enumeration
  198. ** as it is a direct lookup
  199. **
  200. ** char *functionName;
  201. ** The exact external name, no underbars
  202. **
  203. ** INT32 functionAttributes;
  204. **
  205. ** COERCEp functionArgCoercion;
  206. ** Actual to runtime formal conversion
  207. ** The child of an expression/intrinsic WN needs to be
  208. ** converted to call it's runtime function.
  209. ** ex.
  210. ** complex routines are now split-by_value
  211. **
  212. ** These routines are all by value so we already know the
  213. ** argument type
  214. */
  215. #define NONE 0
  216. const EM_ROUTINES em_routines[]=
  217. {
  218. EM_TRAPUV, "__trapuv", PURE_NSE, COERCE_none,
  219. EM_RTS_CHECKSTACK,"_RtlCheckStack",PURE_NSE, COERCE_none,
  220. EM_LL_MUL, "__ll_mul", PURE_NSE, COERCE_none,
  221. EM_LL_DIV, "__ll_div", PURE_NSE, COERCE_none,
  222. EM_ULL_DIV, "__ull_div", PURE_NSE, COERCE_none,
  223. EM_LL_MOD, "__ll_mod", PURE_NSE, COERCE_none,
  224. EM_LL_REM, "__ll_rem", PURE_NSE, COERCE_none,
  225. EM_ULL_REM, "__ull_rem", PURE_NSE, COERCE_none,
  226. EM_LL_LSHIFT, "__ll_lshift", PURE_NSE, COERCE_none,
  227. EM_LL_RSHIFT, "__ll_rshift", PURE_NSE, COERCE_none,
  228. EM_ULL_RSHIFT, "__ull_rshift", PURE_NSE, COERCE_none,
  229. EM_LL_M3_DSLLV, "__dsllv", PURE_NSE, COERCE_none,
  230. EM_LL_M3_DSRAV, "__dsrav", PURE_NSE, COERCE_none,
  231. EM_LL_M3_DSRLV, "__dsrlv", PURE_NSE, COERCE_none,
  232. EM_LL_TO_F, "__ll_to_f", PURE_NSE, COERCE_none,
  233. EM_ULL_TO_F, "__ull_to_f", PURE_NSE, COERCE_none,
  234. EM_LL_TO_D, "__ll_to_d", PURE_NSE, COERCE_none,
  235. EM_ULL_TO_D, "__ull_to_d", PURE_NSE, COERCE_none,
  236. EM_F_TO_LL, "__f_to_ll", PURE_NSE, COERCE_none,
  237. EM_F_TO_ULL, "__f_to_ull", PURE_NSE, COERCE_none,
  238. EM_F_ROUND_LL_F, "__f_round_ll_f",PURE_NSE, COERCE_none,
  239. EM_F_TRUNC_LL_F, "__f_trunc_ll_f",PURE_NSE, COERCE_none,
  240. EM_D_TO_LL, "__d_to_ll", PURE_NSE, COERCE_none,
  241. EM_D_TO_ULL, "__d_to_ull", PURE_NSE, COERCE_none,
  242. EM_D_ROUND_LL_D, "__d_round_ll_d",PURE_NSE, COERCE_none,
  243. EM_D_TRUNC_LL_D, "__d_trunc_ll_d",PURE_NSE, COERCE_none,
  244. EM_LL_BIT_EXTRACT,"__ll_bit_extract",PURE_NSE,COERCE_none,
  245. EM_LL_BIT_INSERT, "__ll_bit_insert",PURE_NSE, COERCE_none,
  246. EM_Q_ABS, "__qabs", PURE_NSE, COERCE_none,
  247. EM_Q_SQRT, "__qsqrt", PURE_NSE, COERCE_none,
  248. EM_Q_ADD, "__q_add", PURE_NSE, COERCE_none,
  249. EM_Q_SUB, "__q_sub", PURE_NSE, COERCE_none,
  250. EM_Q_MPY, "__q_mul", PURE_NSE, COERCE_none,
  251. EM_Q_DIV, "__q_div", PURE_NSE, COERCE_none,
  252. EM_Q_MAX1, "__q_max1", PURE_NSE, COERCE_none,
  253. EM_Q_MIN1, "__q_min1", PURE_NSE, COERCE_none,
  254. EM_Q_EQ, "__q_eq", PURE_NSE, COERCE_none,
  255. EM_Q_NE, "__q_ne", PURE_NSE, COERCE_none,
  256. EM_Q_GE, "__q_ge", PURE_NSE, COERCE_none,
  257. EM_Q_GT, "__q_gt", PURE_NSE, COERCE_none,
  258. EM_Q_LE, "__q_le", PURE_NSE, COERCE_none,
  259. EM_Q_LT, "__q_lt", PURE_NSE, COERCE_none,
  260. EM_SNGL_Q, "__sngl_q", PURE_NSE, COERCE_none,
  261. EM_DBLE_Q, "__dble_q", PURE_NSE, COERCE_none,
  262. EM_KI_QINT, "__ki_qint", PURE_NSE, COERCE_none,
  263. EM_JI_QINT, "__ji_qint", PURE_NSE, COERCE_none,
  264. EM_Q_EXT, "__q_ext", PURE_NSE, COERCE_none,
  265. EM_Q_EXTD, "__q_extd", PURE_NSE, COERCE_none,
  266. EM_Q_FLOTK, "__q_flotk", PURE_NSE, COERCE_none,
  267. EM_Q_FLOTKU, "__q_flotku", PURE_NSE, COERCE_none,
  268. EM_Q_FLOTJ, "__q_flotj", PURE_NSE, COERCE_none,
  269. EM_Q_FLOTJU, "__q_flotju", PURE_NSE, COERCE_none,
  270. EM_KIQNNT, "__kiqnnt", PURE_NSE, COERCE_none,
  271. EM_JIQNNT, "__jiqnnt", PURE_NSE, COERCE_none,
  272. EM_C4_SQRT, "__csqrt", PURE_NSE, COERCE_split_complex,
  273. EM_C8_SQRT, "__zsqrt", PURE_NSE, COERCE_split_complex,
  274. EM_CQ_SQRT, "__cqsqrt", PURE_NSE, COERCE_split_complex,
  275. EM_C4_RSQRT, INVALID, NONE, COERCE_none,
  276. EM_C8_RSQRT, INVALID, NONE, COERCE_none,
  277. EM_CQ_RSQRT, INVALID, NONE, COERCE_none,
  278. EM_C4_ABS, INVALID, NONE, COERCE_none,
  279. EM_C8_ABS, INVALID, NONE, COERCE_none,
  280. EM_CQ_ABS, INVALID, NONE, COERCE_none,
  281. EM_KI_QCEIL, "_CEILING_16_8", PURE_NSE, COERCE_none,
  282. EM_JI_QCEIL, "_CEILING_16_4", PURE_NSE, COERCE_none,
  283. EM_KI_QFLOOR, "_FLOOR_16_8", PURE_NSE, COERCE_none,
  284. EM_JI_QFLOOR, "_FLOOR_16_4", PURE_NSE, COERCE_none,
  285. };
  286. typedef struct
  287. {
  288. INTRINSIC id;
  289. COERCE runtimeArg0;
  290. COERCE runtimeArg1;
  291. } INTRINSIC_RUNTIME_FORMALS;
  292. #define INTR_id(x) intrinsic_runtime_formals[(x)].id
  293. #define INTR_coerce0(x) intrinsic_runtime_formals[(x)].runtimeArg0
  294. #define INTR_coerce1(x) intrinsic_runtime_formals[(x)].runtimeArg1
  295. /*
  296. ** TODO
  297. ** eventually the FE will supply this information
  298. ** from the intrinsic table, when we finish the implementation
  299. */
  300. INTRINSIC_RUNTIME_FORMALS intrinsic_runtime_formals[]=
  301. {
  302. INTRN_C4I4EXPEXPR, COERCE_split_complex, COERCE_none,
  303. INTRN_C4I8EXPEXPR, COERCE_split_complex, COERCE_none,
  304. INTRN_C8I4EXPEXPR, COERCE_split_complex, COERCE_none,
  305. INTRN_C8I8EXPEXPR, COERCE_split_complex, COERCE_none,
  306. INTRN_CQI4EXPEXPR, COERCE_split_complex, COERCE_none,
  307. INTRN_CQI8EXPEXPR, COERCE_split_complex, COERCE_none,
  308. INTRN_C16I4EXPEXPR, COERCE_split_complex, COERCE_none,
  309. INTRN_C16I8EXPEXPR, COERCE_split_complex, COERCE_none,
  310. INTRN_C4EXPEXPR, COERCE_split_complex, COERCE_split_complex,
  311. INTRN_C8EXPEXPR, COERCE_split_complex, COERCE_split_complex,
  312. INTRN_CQEXPEXPR, COERCE_split_complex, COERCE_split_complex,
  313. INTRN_C16EXPEXPR, COERCE_split_complex, COERCE_split_complex,
  314. INTRN_F4C4ABS, COERCE_split_complex, COERCE_none,
  315. INTRN_F8C8ABS, COERCE_split_complex, COERCE_none,
  316. INTRN_FQCQABS, COERCE_split_complex, COERCE_none,
  317. INTRN_F16C16ABS, COERCE_split_complex, COERCE_none,
  318. INTRN_C4EXP, COERCE_split_complex, COERCE_none,
  319. INTRN_C8EXP, COERCE_split_complex, COERCE_none,
  320. INTRN_CQEXP, COERCE_split_complex, COERCE_none,
  321. INTRN_C16EXP, COERCE_split_complex, COERCE_none,
  322. INTRN_C4LOG, COERCE_split_complex, COERCE_none,
  323. INTRN_C8LOG, COERCE_split_complex, COERCE_none,
  324. INTRN_CQLOG, COERCE_split_complex, COERCE_none,
  325. INTRN_C16LOG, COERCE_split_complex, COERCE_none,
  326. INTRN_C4COS, COERCE_split_complex, COERCE_none,
  327. INTRN_C8COS, COERCE_split_complex, COERCE_none,
  328. INTRN_CQCOS, COERCE_split_complex, COERCE_none,
  329. INTRN_C16COS, COERCE_split_complex, COERCE_none,
  330. INTRN_C4SIN, COERCE_split_complex, COERCE_none,
  331. INTRN_C8SIN, COERCE_split_complex, COERCE_none,
  332. INTRN_CQSIN, COERCE_split_complex, COERCE_none,
  333. INTRN_C16SIN, COERCE_split_complex, COERCE_none
  334. };
  335. INT32 intrinsic_runtime_formals_size = sizeof(intrinsic_runtime_formals) /
  336. sizeof( INTRINSIC_RUNTIME_FORMALS);
  337. typedef struct
  338. {
  339. INTRINSIC id;
  340. TYPE_ID parameterType0;
  341. TYPE_ID parameterType1;
  342. TYPE_ID parameterType2;
  343. } INTRINSIC_PARAMETER_TYPE;
  344. #define INTR_parm_id(x) intrinsic_parameter_type[(x)].id
  345. #define INTR_parmtype0(x) intrinsic_parameter_type[(x)].parameterType0
  346. #define INTR_parmtype1(x) intrinsic_parameter_type[(x)].parameterType1
  347. #define INTR_parmtype2(x) intrinsic_parameter_type[(x)].parameterType2
  348. INTRINSIC_PARAMETER_TYPE intrinsic_parameter_type[]=
  349. {
  350. INTRN_I1DIM, MTYPE_I1, MTYPE_I1, MTYPE_V,
  351. INTRN_I2DIM, MTYPE_I2, MTYPE_I2, MTYPE_V,
  352. INTRN_I4DIM, MTYPE_I4, MTYPE_I4, MTYPE_V,
  353. INTRN_I8DIM, MTYPE_I8, MTYPE_I8, MTYPE_V,
  354. INTRN_F4DIM, MTYPE_F4, MTYPE_F4, MTYPE_V,
  355. INTRN_F8DIM, MTYPE_F8, MTYPE_F8, MTYPE_V,
  356. INTRN_FQDIM, MTYPE_FQ, MTYPE_FQ, MTYPE_V,
  357. INTRN_F16DIM, MTYPE_F16, MTYPE_F16, MTYPE_V,
  358. INTRN_F4MOD, MTYPE_F4, MTYPE_F4, MTYPE_V,
  359. INTRN_F8MOD, MTYPE_F8, MTYPE_F8, MTYPE_V,
  360. INTRN_FQMOD, MTYPE_FQ, MTYPE_FQ, MTYPE_V,
  361. INTRN_F16MOD, MTYPE_F16, MTYPE_F16, MTYPE_V,
  362. INTRN_F8F4PROD, MTYPE_F4, MTYPE_F4, MTYPE_V,
  363. INTRN_FQF8PROD, MTYPE_F8, MTYPE_F8, MTYPE_V,
  364. INTRN_F16F8PROD, MTYPE_F8, MTYPE_F8, MTYPE_V,
  365. INTRN_I1SIGN, MTYPE_I1, MTYPE_I1, MTYPE_V,
  366. INTRN_I2SIGN, MTYPE_I2, MTYPE_I2, MTYPE_V,
  367. INTRN_I4SIGN, MTYPE_I4, MTYPE_I4, MTYPE_V,
  368. INTRN_I8SIGN, MTYPE_I8, MTYPE_I8, MTYPE_V,
  369. INTRN_F4SIGN, MTYPE_F4, MTYPE_F4, MTYPE_V,
  370. INTRN_F8SIGN, MTYPE_F8, MTYPE_F8, MTYPE_V,
  371. INTRN_FQSIGN, MTYPE_FQ, MTYPE_FQ, MTYPE_V,
  372. INTRN_F16SIGN, MTYPE_F16, MTYPE_F16, MTYPE_V,
  373. INTRN_F4AINT, MTYPE_F4, MTYPE_V, MTYPE_V,
  374. INTRN_F8AINT, MTYPE_F8, MTYPE_V, MTYPE_V,
  375. INTRN_FQAINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
  376. INTRN_F16AINT, MTYPE_F16, MTYPE_V, MTYPE_V,
  377. INTRN_I2F4NINT, MTYPE_F4, MTYPE_V, MTYPE_V,
  378. INTRN_I4F4NINT, MTYPE_F4, MTYPE_V, MTYPE_V,
  379. INTRN_I8F4NINT, MTYPE_F4, MTYPE_V, MTYPE_V,
  380. INTRN_I2F8IDNINT, MTYPE_F8, MTYPE_V, MTYPE_V,
  381. INTRN_I4F8IDNINT, MTYPE_F8, MTYPE_V, MTYPE_V,
  382. INTRN_I8F8IDNINT, MTYPE_F8, MTYPE_V, MTYPE_V,
  383. INTRN_I2FQIQNINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
  384. INTRN_I4FQIQNINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
  385. INTRN_I8FQIQNINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
  386. INTRN_I2F16IQNINT, MTYPE_F16, MTYPE_V, MTYPE_V,
  387. INTRN_I4F16IQNINT, MTYPE_F16, MTYPE_V, MTYPE_V,
  388. INTRN_I8F16IQNINT, MTYPE_F16, MTYPE_V, MTYPE_V,
  389. INTRN_F4ANINT, MTYPE_F4, MTYPE_V, MTYPE_V,
  390. INTRN_F8ANINT, MTYPE_F8, MTYPE_V, MTYPE_V,
  391. INTRN_FQANINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
  392. INTRN_F16ANINT, MTYPE_F16, MTYPE_V, MTYPE_V,
  393. INTRN_F4LOG10, MTYPE_F4, MTYPE_V, MTYPE_V,
  394. INTRN_F8LOG10, MTYPE_F8, MTYPE_V, MTYPE_V,
  395. INTRN_FQLOG10, MTYPE_FQ, MTYPE_V, MTYPE_V,
  396. INTRN_F16LOG10, MTYPE_F16, MTYPE_V, MTYPE_V,
  397. INTRN_I1BTEST, MTYPE_I1, MTYPE_I1, MTYPE_V,
  398. INTRN_I2BTEST, MTYPE_I2, MTYPE_I2, MTYPE_V,
  399. INTRN_I4BTEST, MTYPE_I4, MTYPE_I4, MTYPE_V,
  400. INTRN_I8BTEST, MTYPE_I8, MTYPE_I8, MTYPE_V,
  401. INTRN_I1BSET, MTYPE_I1, MTYPE_I1, MTYPE_V,
  402. INTRN_I2BSET, MTYPE_I2, MTYPE_I2, MTYPE_V,
  403. INTRN_I4BSET, MTYPE_I4, MTYPE_I4, MTYPE_V,
  404. INTRN_I8BSET, MTYPE_I8, MTYPE_I8, MTYPE_V,
  405. INTRN_I1BCLR, MTYPE_I1, MTYPE_I1, MTYPE_V,
  406. INTRN_I2BCLR, MTYPE_I2, MTYPE_I2, MTYPE_V,
  407. INTRN_I4BCLR, MTYPE_I4, MTYPE_I4, MTYPE_V,
  408. INTRN_I8BCLR, MTYPE_I8, MTYPE_I8, MTYPE_V,
  409. INTRN_I1BITS, MTYPE_I1, MTYPE_I1, MTYPE_I1,
  410. INTRN_I2BITS, MTYPE_I2, MTYPE_I2, MTYPE_I2,
  411. INTRN_I4BITS, MTYPE_I4, MTYPE_I4, MTYPE_I4,
  412. INTRN_I8BITS, MTYPE_I8, MTYPE_I8, MTYPE_I8,
  413. INTRN_I1SHL, MTYPE_I1, MTYPE_I1, MTYPE_V,
  414. INTRN_I2SHL, MTYPE_I2, MTYPE_I2, MTYPE_V,
  415. INTRN_I1SHR, MTYPE_I1, MTYPE_I1, MTYPE_V,
  416. INTRN_I2SHR, MTYPE_I2, MTYPE_I2, MTYPE_V,
  417. INTRN_I1SHFT, MTYPE_I1, MTYPE_I1, MTYPE_V,
  418. INTRN_I2SHFT, MTYPE_I2, MTYPE_I2, MTYPE_V,
  419. INTRN_I4SHFT, MTYPE_I4, MTYPE_I4, MTYPE_V,
  420. INTRN_I8SHFT, MTYPE_I8, MTYPE_I8, MTYPE_V,
  421. INTRN_I1SHFTC, MTYPE_I1, MTYPE_I1, MTYPE_I1,
  422. INTRN_I2SHFTC, MTYPE_I2, MTYPE_I2, MTYPE_I2,
  423. INTRN_I4SHFTC, MTYPE_I4, MTYPE_I4, MTYPE_I4,
  424. INTRN_I8SHFTC, MTYPE_I8, MTYPE_I8, MTYPE_I8,
  425. INTRN_I1MVBITS, MTYPE_I1, MTYPE_I1, MTYPE_I1,
  426. INTRN_I2MVBITS, MTYPE_I2, MTYPE_I2, MTYPE_I2,
  427. INTRN_I4MVBITS, MTYPE_I4, MTYPE_I4, MTYPE_I4,
  428. INTRN_I8MVBITS, MTYPE_I8, MTYPE_I8, MTYPE_I8,
  429. };
  430. INT32 intrinsic_parameter_type_size = sizeof(intrinsic_parameter_type) /
  431. sizeof( INTRINSIC_PARAMETER_TYPE);
  432. #define WN_has_ty(x) (OPCODE_has_1ty(WN_opcode(x)) || OPCODE_has_2ty(WN_opcode(x)))
  433. #define WN_is_pointer(x) (WN_has_ty(x) && (TY_kind(WN_ty(x)) == KIND_POINTER))
  434. #define Is_Integer_Constant(x) (WN_operator(x) == OPR_INTCONST)
  435. #define Is_Constant(x) (WN_operator(x) == OPR_CONST)
  436. #define OPCODE_is_intrinsic(op) \
  437. ((OPCODE_operator((op)) == OPR_INTRINSIC_CALL) || \
  438. (OPCODE_operator((op)) == OPR_INTRINSIC_OP))
  439. #define ABS(x) (((x)<0) ? -(x) : (x))
  440. /* ====================================================================
  441. * Exported Functions
  442. * ====================================================================
  443. */
  444. extern const char * INTR_intrinsic_name(WN *tree);
  445. extern WN * make_pointer_to_node(WN *block, WN *tree);
  446. /* ====================================================================
  447. * Imported Functions
  448. * ====================================================================
  449. */
  450. extern PREG_NUM AssignExpr(WN *block, WN *tree, TYPE_ID type);
  451. extern TY_IDX compute_alignment_type(WN *tree, TY_IDX, INT64 offset);
  452. extern INT32 compute_copy_alignment(TY_IDX, TY_IDX, INT32 offset);
  453. extern BOOL lower_is_aliased(WN *wn1, WN *wn2, INT64 size);
  454. extern TYPE_ID compute_copy_quantum(INT32 );
  455. extern WN *WN_I1const(TYPE_ID type, INT64 con);
  456. extern void WN_annotate_call_flags(WN *call, ST *sym);
  457. extern BOOL CG_bcopy_cannot_overlap;
  458. extern BOOL CG_memcpy_cannot_overlap;
  459. extern BOOL CG_memmove_cannot_overlap;
  460. extern INT32 CG_memmove_inst_count;
  461. #ifdef KEY
  462. extern INT32 CG_memmove_align_inst_count;
  463. #endif
  464. /* ====================================================================
  465. * Forward Declarations
  466. * ====================================================================
  467. */
  468. static EMULATION WN_emulation(WN *tree);
  469. static WN *em_exp_int(WN *block, WN *x, WN *pow, TYPE_ID type);
  470. static WN *em_exp_float(WN *block, WN *x, WN *pow, TYPE_ID type);
  471. static WN *em_mod_float(WN *block, WN *x, WN *y);
  472. static WN *em_complex_exp(WN *block, WN *x);
  473. static WN *em_complex_cos(WN *block, WN *x);
  474. static COERCE INTR_coerce_runtime(WN *tree, INT32 arg);
  475. static TYPE_ID INTR_parameter_type(WN *tree, INT32 arg);
  476. static TY_IDX aux_compute_alignment(WN *tree);
  477. /* ====================================================================
  478. * private variables
  479. * ====================================================================
  480. */
  481. static INT32 em_exp_int_max = 256;
  482. #define MAX_INTRINSIC_ARGS 20
  483. /* ====================================================================
  484. *
  485. * TYPE_ID INTR_return_mtype(id)
  486. *
  487. *
  488. *
  489. * ==================================================================== */
  490. TYPE_ID INTR_return_mtype(INTRINSIC id)
  491. {
  492. INTRN_RETKIND rtype = INTRN_return_kind(id);
  493. switch(rtype)
  494. {
  495. case IRETURN_I1: return MTYPE_I1;
  496. case IRETURN_I2: return MTYPE_I2;
  497. case IRETURN_I4: return MTYPE_I4;
  498. case IRETURN_I8: return MTYPE_I8;
  499. case IRETURN_U1: return MTYPE_U1;
  500. case IRETURN_U2: return MTYPE_U2;
  501. case IRETURN_U4: return MTYPE_U4;
  502. case IRETURN_U8: return MTYPE_U8;
  503. case IRETURN_F4: return MTYPE_F4;
  504. case IRETURN_F8: return MTYPE_F8;
  505. case IRETURN_FQ: return MTYPE_FQ;
  506. case IRETURN_F16: return MTYPE_F16;
  507. case IRETURN_C4: return MTYPE_C4;
  508. case IRETURN_C8: return MTYPE_C8;
  509. case IRETURN_CQ: return MTYPE_CQ;
  510. case IRETURN_C16: return MTYPE_C16;
  511. case IRETURN_V: return MTYPE_V;
  512. case IRETURN_PV:
  513. case IRETURN_PU1:
  514. case IRETURN_DA1:
  515. case IRETURN_SZT:
  516. case IRETURN_PC :
  517. case IRETURN_UNKNOWN:
  518. return MTYPE_UNKNOWN;
  519. }
  520. return MTYPE_UNKNOWN;
  521. }
  522. /* ====================================================================
  523. *
  524. * EMULATION WN_emulation(WN *tree)
  525. *
  526. * Provide the correct emulation enum for a given WN
  527. *
  528. * TODO: cache most frequently used id's
  529. *
  530. * ==================================================================== */
  531. static EMULATION WN_emulation(WN *tree)
  532. {
  533. OPCODE op = WN_opcode(tree);
  534. TYPE_ID type = OPCODE_rtype(op);
  535. switch (WN_operator(tree)) {
  536. case OPR_SQRT:
  537. switch(type) {
  538. case MTYPE_C4: return EM_C4_SQRT;
  539. case MTYPE_C8: return EM_C8_SQRT;
  540. case MTYPE_CQ: return EM_CQ_SQRT;
  541. case MTYPE_C16: return EM_CQ_SQRT;
  542. case MTYPE_FQ: return EM_Q_SQRT;
  543. case MTYPE_F16: return EM_Q_SQRT;
  544. }
  545. break;
  546. case OPR_RSQRT:
  547. switch(type) {
  548. case MTYPE_C4: return EM_C4_RSQRT;
  549. case MTYPE_C8: return EM_C8_RSQRT;
  550. case MTYPE_CQ: return EM_CQ_RSQRT;
  551. case MTYPE_C16: return EM_CQ_RSQRT;
  552. }
  553. break;
  554. case OPR_CVT:
  555. {
  556. TYPE_ID desc = WN_desc(tree);
  557. if (desc == MTYPE_FQ || desc == MTYPE_F16)
  558. {
  559. switch(type) {
  560. case MTYPE_I4: return EM_JI_QINT;
  561. case MTYPE_I8: return EM_KI_QINT;
  562. case MTYPE_F4: return EM_SNGL_Q;
  563. case MTYPE_F8: return EM_DBLE_Q;
  564. }
  565. break;
  566. }
  567. else if (type == MTYPE_FQ || type == MTYPE_F16)
  568. {
  569. switch(desc) {
  570. case MTYPE_U4: return EM_Q_FLOTJU;
  571. case MTYPE_I4: return EM_Q_FLOTJ;
  572. case MTYPE_U8: return EM_Q_FLOTKU;
  573. case MTYPE_I8: return EM_Q_FLOTK;
  574. case MTYPE_F8: return EM_Q_EXTD;
  575. case MTYPE_F4: return EM_Q_EXT;
  576. }
  577. }
  578. }
  579. break;
  580. case OPR_RND:
  581. {
  582. TYPE_ID desc = WN_desc(tree);
  583. if (desc == MTYPE_FQ || desc == MTYPE_F16)
  584. {
  585. switch(type)
  586. {
  587. case MTYPE_I4: return EM_JIQNNT;
  588. case MTYPE_I8: return EM_KIQNNT;
  589. }
  590. break;
  591. }
  592. }
  593. break;
  594. default:
  595. if (type == MTYPE_FQ || type == MTYPE_F16)
  596. {
  597. switch(WN_operator(tree)) {
  598. case OPR_ISTORE:
  599. case OPR_ISTOREX:
  600. case OPR_STID:
  601. case OPR_ILOAD:
  602. case OPR_ILOADX:
  603. case OPR_SELECT:
  604. case OPR_LDID:
  605. case OPR_CONST:
  606. case OPR_NEG:
  607. break;
  608. case OPR_ABS: return EM_Q_ABS;
  609. case OPR_ADD: return EM_Q_ADD;
  610. case OPR_SUB: return EM_Q_SUB;
  611. case OPR_MPY: return EM_Q_MPY;
  612. case OPR_DIV: return EM_Q_DIV;
  613. case OPR_MAX: return EM_Q_MAX1;
  614. case OPR_MIN: return EM_Q_MIN1;
  615. case OPR_RECIP:
  616. case OPR_RSQRT:
  617. case OPR_MADD:
  618. case OPR_MSUB:
  619. case OPR_NMADD:
  620. case OPR_NMSUB:
  621. case OPR_RND:
  622. case OPR_TRUNC:
  623. case OPR_CVT:
  624. case OPR_SQRT:
  625. Is_True(FALSE, ("WN_emulation() %s should be already processed", OPCODE_name(WN_opcode(tree))));
  626. break;
  627. case OPR_CEIL:
  628. case OPR_FLOOR:
  629. case OPR_MOD:
  630. case OPR_REM:
  631. case OPR_CVTL:
  632. case OPR_CALL:
  633. case OPR_INTRINSIC_CALL:
  634. Is_True(FALSE, ("WN_emulation() %s invalid context for op", OPCODE_name(WN_opcode(tree))));
  635. }
  636. }
  637. else if (WN_desc(tree)== MTYPE_FQ || WN_desc(tree)== MTYPE_F16)
  638. {
  639. switch(WN_operator(tree)) {
  640. case OPR_EQ: return EM_Q_EQ;
  641. case OPR_NE: return EM_Q_NE;
  642. case OPR_GT: return EM_Q_GT;
  643. case OPR_GE: return EM_Q_GE;
  644. case OPR_LT: return EM_Q_LT;
  645. case OPR_LE: return EM_Q_LE;
  646. case OPR_TRUNC:
  647. switch(type)
  648. {
  649. case MTYPE_I4: return EM_JI_QINT;
  650. case MTYPE_I8: return EM_KI_QINT;
  651. }
  652. break;
  653. case OPR_CEIL:
  654. switch(type)
  655. {
  656. case MTYPE_I4: return EM_JI_QCEIL;
  657. case MTYPE_I8: return EM_KI_QCEIL;
  658. }
  659. break;
  660. case OPR_FLOOR:
  661. switch(type)
  662. {
  663. case MTYPE_I4: return EM_JI_QFLOOR;
  664. case MTYPE_I8: return EM_KI_QFLOOR;
  665. }
  666. break;
  667. }
  668. }
  669. break;
  670. }
  671. FmtAssert(FALSE, ("WN_emulation() %s not recognized", OPCODE_name(WN_opcode(tree))));
  672. return EM_LAST;
  673. }
  674. /* ====================================================================
  675. *
  676. * WN *checkForZero(WN *block, TYPE_ID type, PREG_NUM xN, WN *if_else, WN *value)
  677. *
  678. * Create test block for zero
  679. * if (x==0)
  680. * { ret = 0; }
  681. * else
  682. * { ret = value }
  683. * return ret;
  684. *
  685. * ==================================================================== */
  686. static WN *checkForZero(WN *block, TYPE_ID type, PREG_NUM xN, WN *if_else, WN *value)
  687. {
  688. TYPE_ID rtype = WN_rtype(value);
  689. WN *if_then;
  690. PREG_NUM retN;
  691. if_then = WN_CreateBlock();
  692. retN = AssignExpr(if_then, WN_Zerocon(rtype), rtype);
  693. {
  694. WN *st;
  695. st = WN_StidIntoPreg(rtype, retN, MTYPE_To_PREG(rtype), value);
  696. WN_INSERT_BlockLast(if_else, st);
  697. }
  698. {
  699. WN *cond, *IF;
  700. Is_True(MTYPE_is_float(type), ("unexpected type"));
  701. cond = WN_EQ(type,
  702. WN_LdidPreg(type, xN),
  703. WN_Zerocon(type));
  704. IF = WN_CreateIf(cond, if_then, if_else);
  705. WN_INSERT_BlockLast(block, IF);
  706. }
  707. return WN_LdidPreg(rtype, retN);
  708. }
  709. /* ====================================================================
  710. *
  711. * WN * WN_arg(WN *tree, INT32 arg)
  712. *
  713. * return Nth kid , skiping PARM
  714. * ==================================================================== */
  715. static WN *WN_arg(WN *tree, INT32 arg)
  716. {
  717. WN *child= WN_kid(tree, arg);
  718. if (WN_operator_is(child, OPR_PARM))
  719. {
  720. return WN_kid0(child);
  721. }
  722. return child;
  723. }
  724. static WN *em_clen(WN *block, WN *len)
  725. {
  726. return len;
  727. }
  728. /*
  729. **
  730. ** Auxillary routine to implement ( x + .5 * sign(x) )
  731. */
  732. static WN *aux_nearest(TYPE_ID rtype, PREG_NUM xN)
  733. {
  734. WN *rel, *select;
  735. rel = WN_GE(rtype, WN_LdidPreg(rtype, xN), WN_Zerocon(rtype));
  736. select = WN_Select(rtype,
  737. rel,
  738. WN_Floatconst(rtype, .5),
  739. WN_Floatconst(rtype, -.5));
  740. return WN_Add(rtype, WN_LdidPreg(rtype, xN), select);
  741. }
  742. /*
  743. ** Auxillary routine for Convert ( {Round,Trunc}(rtype) )
  744. */
  745. static WN *aux_CvtRnd(TYPE_ID rtype, WN *x)
  746. {
  747. WN *rnd;
  748. TYPE_ID intToFloat = (Slow_CVTDL) ? MTYPE_I4 : MTYPE_I8;
  749. // Needed for correctness, no matter how slow the truncate
  750. if (WN_rtype(x) != MTYPE_F4) {
  751. intToFloat = MTYPE_I8;
  752. }
  753. rnd = WN_Rnd(rtype, intToFloat, x);
  754. return WN_Cvt(intToFloat, rtype, rnd);
  755. }
  756. static WN *aux_CvtTrunc(TYPE_ID rtype, WN *x)
  757. {
  758. WN *trunc;
  759. TYPE_ID intToFloat = (Slow_CVTDL) ? MTYPE_I4 : MTYPE_I8;
  760. /*
  761. * this is em_aint()
  762. */
  763. // Needed for correctness, no matter how slow the truncate
  764. if (WN_rtype(x) != MTYPE_F4) {
  765. intToFloat = MTYPE_I8;
  766. }
  767. trunc = WN_Trunc(rtype, intToFloat, x);
  768. return WN_Cvt(intToFloat, rtype, trunc);
  769. }
  770. /*
  771. ** Optimizer cannot deal with zero length mstore so return BLOCK
  772. */
  773. static WN *aux_CreateMstore(WN_OFFSET offset, TY_IDX type, WN *value, WN *addr,
  774. WN *size)
  775. {
  776. if (Is_Integer_Constant(size) && WN_const_val(size) <= 0)
  777. {
  778. /* Cannot delete these nodes, since they are used later (bug 623566)
  779. WN_Delete(value);
  780. WN_Delete(addr);
  781. WN_Delete(size);
  782. */
  783. return WN_CreateBlock();
  784. }
  785. UINT64 ty_size = TY_size(TY_pointed(type));
  786. if (ty_size != 0 && WN_const_val (size) % ty_size != 0) {
  787. // size copied is not a multiple of the size of the type, which means
  788. // that we are copying part of the type. We then change the pointer
  789. // to (void*)
  790. static TY_IDX void_star = TY_IDX_ZERO;
  791. if (void_star == TY_IDX_ZERO)
  792. void_star = Make_Pointer_Type (MTYPE_To_TY (MTYPE_V));
  793. Set_TY_IDX_index (type, TY_IDX_index (void_star));
  794. }
  795. return WN_CreateMstore(offset, type, value, addr, size);
  796. }
  797. /*
  798. **
  799. ** Notes for the following functions:
  800. **
  801. ** [1] Fast_trunc_Allowed (currently when Roundoff_Level >= ROUNDOFF_SIMPLE)
  802. ** generate trunc. This will fail when (-2**63 <= |x| < 2**63-1)
  803. **
  804. ** [2] Test x against TWO_EXP
  805. ** Floating point value is such that (x+1 == x), ie. there is no
  806. ** possible fractional value ie.
  807. ** 2**23 <= |x| return x
  808. **
  809. ** It is possible (if necessary) to special case MTYPE_F4 and generate
  810. ** a trunc to MTYPE_I4.
  811. **/
  812. #define TWO_EXP_23 8388608.0
  813. #define TWO_EXP_52 4503599627370496.0
  814. /*
  815. **
  816. ** INTRN_I2F4NINT:
  817. ** INTRN_I4F4NINT:
  818. ** INTRN_I8F4NINT:
  819. ** INTRN_I2F8IDNINT:
  820. ** INTRN_I4F8IDNINT:
  821. ** INTRN_I8F8IDNINT:
  822. ** INTRN_I2FQIQNINT:
  823. ** INTRN_I4FQIQNINT:
  824. ** INTRN_I8FQIQNINT:
  825. ** INTRN_I2F16IQNINT:
  826. ** INTRN_I4F16IQNINT:
  827. ** INTRN_I8F16IQNINT:
  828. **
  829. ** change into
  830. ** rnd(x) roundoff >= 3
  831. ** trunc( x + .5 * sign(x) )
  832. */
  833. static WN *em_nearest_int(WN *block, TYPE_ID rtype, WN *x)
  834. {
  835. TYPE_ID type = WN_rtype(x);
  836. if (Fast_NINT_Allowed)
  837. {
  838. return WN_Rnd(type, rtype, x);
  839. }
  840. else if ((type == MTYPE_F4) || (type == MTYPE_F8))
  841. {
  842. WN *add;
  843. PREG_NUM xN;
  844. xN = AssignExpr(block, x, type);
  845. add = aux_nearest(type, xN);
  846. if (Fast_trunc_Allowed)
  847. {
  848. return WN_Trunc(type, rtype, add);
  849. }
  850. else
  851. {
  852. WN *rel, *select;
  853. double con= (type==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;
  854. rel = WN_GE(type,
  855. WN_Abs(type, WN_LdidPreg(type, xN)),
  856. WN_Floatconst(type, con));
  857. select = WN_Select(type, rel, WN_LdidPreg(type, xN), add);
  858. return WN_Trunc(type, rtype, select);
  859. }
  860. }
  861. else
  862. {
  863. return NULL;
  864. }
  865. }
  866. /*
  867. **
  868. ** INTRN_F4ANINT:
  869. ** INTRN_F8ANINT:
  870. ** INTRN_FQANINT:
  871. ** INTRN_F16ANINT:
  872. **
  873. ** change into
  874. ** cvt (float, trunc( x + .5 * sign(x) )) roundoff>= 3
  875. */
  876. static WN *em_nearest_aint(WN *block, TYPE_ID rtype, WN *x)
  877. {
  878. if (Fast_NINT_Allowed)
  879. {
  880. return aux_CvtRnd(rtype, x);
  881. }
  882. else if ((rtype == MTYPE_F4) || (rtype == MTYPE_F8))
  883. {
  884. PREG_NUM xN;
  885. WN *add, *cvt;
  886. xN = AssignExpr(block, x, rtype);
  887. add = aux_nearest(rtype, xN);
  888. /*
  889. * this is em_aint()
  890. */
  891. cvt = aux_CvtTrunc(rtype, add);
  892. if (Fast_trunc_Allowed)
  893. {
  894. return cvt;
  895. }
  896. else
  897. {
  898. WN *rel;
  899. double con= (rtype==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;
  900. rel = WN_GE(rtype,
  901. WN_Abs(rtype, WN_LdidPreg(rtype, xN)),
  902. WN_Floatconst(rtype, con));
  903. return WN_Select(rtype, rel, WN_LdidPreg(rtype, xN), cvt);
  904. }
  905. }
  906. return NULL;
  907. }
  908. /*
  909. **
  910. ** INTRN_F4AINT
  911. ** INTRN_F8AINT
  912. ** INTRN_FQAINT
  913. ** INTRN_F16AINT
  914. **
  915. ** change into
  916. ** cvt (float, trunc(x))
  917. */
  918. static WN *em_aint(WN *block, TYPE_ID rtype, WN *x)
  919. {
  920. if (Fast_trunc_Allowed)
  921. {
  922. return aux_CvtTrunc(rtype, x);
  923. }
  924. else if ((rtype == MTYPE_F4) || (rtype == MTYPE_F8))
  925. {
  926. PREG_NUM xN;
  927. WN *rel, *cvt;
  928. double con= (rtype==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;
  929. xN = AssignExpr(block, x, rtype);
  930. rel = WN_GE(rtype,
  931. WN_Abs(rtype, WN_LdidPreg(rtype, xN)),
  932. WN_Floatconst(rtype, con));
  933. cvt = aux_CvtTrunc(rtype, WN_LdidPreg(rtype, xN));
  934. return WN_Select(rtype, rel, WN_LdidPreg(rtype, xN), cvt);
  935. }
  936. else
  937. {
  938. return NULL;
  939. }
  940. }
  941. /*
  942. **
  943. ** change into
  944. ** | x | if y >= 0
  945. ** - | x | if y < 0
  946. **
  947. ** --> absN = | x |;
  948. ** --> (y>=0) ? absN : -absN;
  949. */
  950. static WN *em_sign(WN *block, WN *x, WN *y)
  951. {
  952. PREG_NUM absN;
  953. TYPE_ID type = WN_rtype(x);
  954. WN *abs, *select;
  955. #ifdef KEY // bug 9660
  956. if (MTYPE_is_integral(type) && ! MTYPE_signed(type))
  957. type = Mtype_TransferSign(MTYPE_I4, type);
  958. #endif
  959. #ifdef KEY // bug 12052
  960. if (MTYPE_is_integral(type) &&
  961. MTYPE_byte_size(type) < MTYPE_byte_size(WN_rtype(y)))
  962. type = Mtype_TransferSize(WN_rtype(y), type);
  963. #endif
  964. abs = WN_Abs(type, x);
  965. absN = AssignExpr(block, abs, type);
  966. select = WN_Select(type,
  967. WN_GE(type, y, WN_Zerocon(type)),
  968. WN_LdidPreg(type, absN),
  969. WN_Neg(type, WN_LdidPreg(type, absN)));
  970. return select;
  971. }
  972. /*
  973. **
  974. ** change into
  975. ** cvt (x) * cvt(y)
  976. */
  977. static WN *em_prod(WN *block, TYPE_ID rtype, WN *x, WN *y)
  978. {
  979. TYPE_ID type = WN_rtype(x);
  980. WN *mpy;
  981. mpy = WN_Mpy(rtype,
  982. WN_Cvt(type, rtype, x),
  983. WN_Cvt(type, rtype, y));
  984. return mpy;
  985. }
  986. /*
  987. **
  988. ** change into
  989. ** (x>y) ? (x-y) : 0
  990. */
  991. static WN *em_dim(WN *block, WN *x, WN *y)
  992. {
  993. PREG_NUM xN, yN;
  994. TYPE_ID type = WN_rtype(x);
  995. WN *rel, *sub, *select;
  996. xN = AssignExpr(block, x, type);
  997. yN = AssignExpr(block, y, type);
  998. rel = WN_GT(type,
  999. WN_LdidPreg(type, xN),
  1000. WN_LdidPreg(type, yN));
  1001. sub = WN_Sub(type,
  1002. WN_LdidPreg(type, xN),
  1003. WN_LdidPreg(type, yN));
  1004. select = WN_Select(type,
  1005. rel,
  1006. sub,
  1007. WN_Zerocon(type));
  1008. return select;
  1009. }
  1010. /*
  1011. **
  1012. ** change into
  1013. ** x - y * ( FLOAT ( |(x / y)| ))
  1014. */
  1015. static WN *em_mod_float(WN *block, WN *x, WN *y)
  1016. {
  1017. PREG_NUM xN, yN;
  1018. TYPE_ID type = WN_rtype(x);
  1019. WN *div, *cvt, *mpy, *sub;
  1020. if ((type == MTYPE_F4) || (type == MTYPE_F8)) {
  1021. xN = AssignExpr(block, x, type);
  1022. yN = AssignExpr(block, y, type);
  1023. div = WN_Div(type,
  1024. WN_LdidPreg(type, xN),
  1025. WN_LdidPreg(type, yN));
  1026. cvt = em_aint(block, type, div);
  1027. mpy = WN_Mpy(type,
  1028. WN_LdidPreg(type, yN),
  1029. cvt);
  1030. sub = WN_Sub(type,
  1031. WN_LdidPreg(type, xN),
  1032. mpy);
  1033. return sub;
  1034. } else {
  1035. return NULL;
  1036. }
  1037. }
  1038. /*
  1039. ** WN *build_mult_tree(block, TYPE_ID type, PREG_NUM xN, int pow)
  1040. **
  1041. ** Build a multiply tree to make shipiro happy.
  1042. **
  1043. ** Actually, create a series of temporaries to hold the powers that be.
  1044. **
  1045. ** ex. x ** 9 (= 1001)
  1046. ** t0= x;
  1047. ** t1= t0*t0; (x**2)
  1048. ** t2= t1*t1; (x**4)
  1049. ** t3= t2*t2; (x**8)
  1050. ** ans = t3 * t0;
  1051. **
  1052. */
  1053. #define BIT_IS_ON(x,i) ((x) & (1<<(i)))
  1054. static WN *build_mult_tree(WN *block, TYPE_ID type, PREG_NUM xN, INT32 pow)
  1055. {
  1056. PREG_NUM powers[16]; /* could handle pow = 64k */
  1057. INT32 i, n = 0;
  1058. PREG_NUM xNm1;
  1059. WN *tree = NULL;
  1060. Is_True((pow>0), ("expected pow>0"));
  1061. powers[n++] = xN;
  1062. xNm1 = xN;
  1063. for(i= 1; ((1<<i) <= pow); i++)
  1064. {
  1065. WN *mpy;
  1066. mpy = WN_Mpy(type, WN_LdidPreg(type, xNm1), WN_LdidPreg(type, xNm1));
  1067. xNm1 = AssignExpr(block, mpy, type);
  1068. powers[n++] = xNm1;
  1069. }
  1070. for(i= 0; ((1<<i) <= pow); i++)
  1071. {
  1072. if (BIT_IS_ON(pow, i))
  1073. {
  1074. PREG_NUM powerN = powers[i];
  1075. if (tree)
  1076. {
  1077. tree = WN_Mpy(type, tree, WN_LdidPreg(type, powerN));
  1078. }
  1079. else
  1080. {
  1081. tree = WN_LdidPreg(type, powerN);
  1082. }
  1083. }
  1084. }
  1085. return tree;
  1086. }
  1087. static WN *em_exp_float(WN *block, WN *x, WN *pow, TYPE_ID type)
  1088. {
  1089. if (Is_Constant(pow))
  1090. {
  1091. TCON con = Const_Val(pow);
  1092. BOOL sqrt, rsqrt;
  1093. #ifdef KEY
  1094. BOOL sqrt_25, rsqrt_25, sqrt_75, rsqrt_75;
  1095. BOOL cbrt_33, cbrt_66;
  1096. #endif
  1097. WN *tree, *x_copy;
  1098. double n;
  1099. /*
  1100. * for complex x verify the power is a real number
  1101. * (TODO) general complex ** complex
  1102. */
  1103. if (MTYPE_is_complex(type))
  1104. {
  1105. TCON Ipow;
  1106. Ipow = Extract_Complex_Imag(con);
  1107. if (Targ_To_Host_Float(Ipow) == 0.0)
  1108. {
  1109. con = Extract_Complex_Real(con);
  1110. }
  1111. else
  1112. {
  1113. return NULL;
  1114. }
  1115. }
  1116. n = Targ_To_Host_Float(con);
  1117. sqrt = rsqrt = FALSE;
  1118. cbrt_33 = cbrt_66 = FALSE;
  1119. #ifdef KEY
  1120. sqrt_25 = rsqrt_25 = sqrt_75 = rsqrt_75 = FALSE;
  1121. #endif
  1122. if (trunc(n) == n)
  1123. {
  1124. ;
  1125. }
  1126. else if ((trunc(ABS(n))+.5) == ABS(n))
  1127. {
  1128. /*
  1129. * if we need to multiply by sqrt we need a copy of x
  1130. * as it might get changed underneath us.
  1131. */
  1132. if (n<0)
  1133. rsqrt = TRUE;
  1134. else
  1135. sqrt = TRUE;
  1136. x_copy = WN_COPY_Tree(x);
  1137. }
  1138. #ifdef KEY
  1139. else if ((trunc(ABS(n))+.25) == ABS(n))
  1140. {
  1141. /*
  1142. * if we need to multiply by sqrt we need a copy of x
  1143. * as it might get changed underneath us.
  1144. */
  1145. if (n<0)
  1146. rsqrt_25 = TRUE;
  1147. else
  1148. sqrt_25 = TRUE;
  1149. x_copy = WN_COPY_Tree(x);
  1150. }
  1151. else if ((trunc(ABS(n))+.75) == ABS(n))
  1152. {
  1153. /*
  1154. * if we need to multiply by sqrt we need a copy of x
  1155. * as it might get changed underneath us.
  1156. */
  1157. if (n<0)
  1158. rsqrt_75 = TRUE;
  1159. else
  1160. sqrt_75 = TRUE;
  1161. x_copy = WN_COPY_Tree(x);
  1162. }
  1163. #ifdef TARG_X8664
  1164. else if (ABS((trunc(n)+1.0/3) - n) < .0000001 &&
  1165. ! (Is_Target_64bit() && !Is_Target_Anyx86() && OPT_Fast_Math))
  1166. { // the pow in fast_math is faster than cbrt, so no point converting
  1167. cbrt_33 = TRUE;
  1168. x_copy = WN_COPY_Tree(x);
  1169. }
  1170. else if (ABS((trunc(n)+2.0/3) - n) < .0000001 &&
  1171. ! (Is_Target_64bit() && !Is_Target_Anyx86() && OPT_Fast_Math))
  1172. { // the pow in fast_math is faster than cbrt, so no point converting
  1173. cbrt_66 = TRUE;
  1174. x_copy = WN_COPY_Tree(x);
  1175. }
  1176. #endif
  1177. #endif
  1178. else
  1179. {
  1180. return NULL;
  1181. }
  1182. {
  1183. WN *ipow = WN_Intconst(MTYPE_I4, (INT64) trunc(n));
  1184. tree = em_exp_int(block, x, ipow, type);
  1185. }
  1186. if (sqrt || rsqrt)
  1187. {
  1188. #ifdef KEY
  1189. // bug 4824: non-constant float x could be negative
  1190. // bug 4990: Do the check only for C/C++ and if
  1191. // -fmath-errno (-LANG:math_errno=on)
  1192. if (!PU_f77_lang (Get_Current_PU()) &&
  1193. !PU_f90_lang (Get_Current_PU()) && // ! Fortran
  1194. LANG_Math_Errno && // -fmath-errno
  1195. MTYPE_is_float (WN_rtype (x_copy)) &&
  1196. (!Is_Constant (x_copy) ||
  1197. Targ_To_Host_Float (Const_Val (x_copy)) < 0))
  1198. return NULL;
  1199. #endif // KEY
  1200. #ifdef TARG_X8664
  1201. // Bug 5935 - rsqrtsd or rsqrtpd is absent.
  1202. if (rsqrt && (type == MTYPE_F8 || type == MTYPE_V16F8))
  1203. return NULL;
  1204. #endif
  1205. if (tree)
  1206. {
  1207. /*
  1208. * x ** n+.5 -> (x**n) * (x**.5)
  1209. * where the function em_exp_int has already evaluated
  1210. */
  1211. PREG_NUM xN, treeN;
  1212. WN *fractional;
  1213. xN = AssignExpr(block, x_copy, type);
  1214. treeN = AssignExpr(block, tree, type);
  1215. fractional = (sqrt) ? WN_Sqrt(type, WN_LdidPreg(type, xN)) :
  1216. WN_Rsqrt(type, WN_LdidPreg(type, xN));
  1217. tree = WN_Mpy(type,
  1218. WN_LdidPreg(type, treeN),
  1219. fractional);
  1220. }
  1221. }
  1222. #ifdef KEY // bug 6932
  1223. // evaluate (x**0.25) as sqrt(sqrt(x))
  1224. if (sqrt_25 || rsqrt_25)
  1225. {
  1226. if (!PU_f77_lang (Get_Current_PU()) &&
  1227. !PU_f90_lang (Get_Current_PU()) && // ! Fortran
  1228. LANG_Math_Errno && // -fmath-errno
  1229. MTYPE_is_float (WN_rtype (x_copy)) &&
  1230. (!Is_Constant (x_copy) ||
  1231. Targ_To_Host_Float (Const_Val (x_copy)) < 0))
  1232. return NULL;
  1233. #ifdef TARG_X8664
  1234. // rsqrtsd or rsqrtpd is absent.
  1235. if (rsqrt_25 && (type == MTYPE_F8 || type == MTYPE_V16F8))
  1236. return NULL;
  1237. #endif
  1238. if (tree)
  1239. {
  1240. /*
  1241. * x ** n+.25 -> (x**n) * (x**.25)
  1242. * where the function em_exp_int has already evaluated
  1243. */
  1244. PREG_NUM xN, treeN;
  1245. WN *fractional;
  1246. xN = AssignExpr(block, x_copy, type);
  1247. treeN = AssignExpr(block, tree, type);
  1248. if (sqrt_25)
  1249. fractional = WN_Sqrt(type, WN_Sqrt(type, WN_LdidPreg(type, xN)));
  1250. else
  1251. fractional = WN_Sqrt(type, WN_Rsqrt(type, WN_LdidPreg(type, xN)));
  1252. tree = WN_Mpy(type,
  1253. WN_LdidPreg(type, treeN),
  1254. fractional);
  1255. }
  1256. }
  1257. // evaluate (x**0.75) as sqrt(x)*sqrt(sqrt(x))
  1258. if (sqrt_75 || rsqrt_75)
  1259. {
  1260. if (!PU_f77_lang (Get_Current_PU()) &&
  1261. !PU_f90_lang (Get_Current_PU()) && // ! Fortran
  1262. LANG_Math_Errno && // -fmath-errno
  1263. MTYPE_is_float (WN_rtype (x_copy)) &&
  1264. (!Is_Constant (x_copy) ||
  1265. Targ_To_Host_Float (Const_Val (x_copy)) < 0))
  1266. return NULL;
  1267. #ifdef TARG_X8664
  1268. // rsqrtsd or rsqrtpd is absent.
  1269. if (rsqrt_75 && (type == MTYPE_F8 || type == MTYPE_V16F8))
  1270. return NULL;
  1271. #endif
  1272. if (tree)
  1273. {
  1274. /*
  1275. * x ** n+.75 -> (x**n) * (x**.75)
  1276. * where the function em_exp_int has already evaluated
  1277. */
  1278. PREG_NUM xN, treeN;
  1279. WN *fractional;
  1280. xN = AssignExpr(block, x_copy, type);
  1281. treeN = AssignExpr(block, tree, type);
  1282. if (sqrt_75)
  1283. fractional = WN_Mpy(type,
  1284. WN_Sqrt(type, WN_LdidPreg(type, xN)),
  1285. WN_Sqrt(type,
  1286. WN_Sqrt(type, WN_LdidPreg(type, xN))));
  1287. else
  1288. fractional = WN_Mpy(type,
  1289. WN_Rsqrt(type, WN_LdidPreg(type, xN)),
  1290. WN_Rsqrt(type,
  1291. WN_Sqrt(type, WN_LdidPreg(type, xN))));
  1292. tree = WN_Mpy(type,
  1293. WN_LdidPreg(type, treeN),
  1294. fractional);
  1295. }
  1296. }
  1297. // evaluate (x**0.333333) by calling cbrt()/cbrtf()
  1298. if (cbrt_33 || cbrt_66)
  1299. {
  1300. if (type != MTYPE_F4 && type != MTYPE_F8)
  1301. return NULL;
  1302. if (tree)
  1303. {
  1304. /*
  1305. * x ** n+1/3 -> (x**n) * (x**1/3)
  1306. * where the function em_exp_int has already evaluated
  1307. */
  1308. PREG_NUM xN = AssignExpr(block, x_copy, type);
  1309. WN *kid = WN_CreateParm(type, WN_LdidPreg(type, xN), Be_Type_Tbl(type),
  1310. WN_PARM_BY_VALUE | WN_PARM_READ_ONLY);
  1311. WN* fraction = WN_Create_Intrinsic(
  1312. OPCODE_make_op(OPR_INTRINSIC_OP, type, MTYPE_V),
  1313. type == MTYPE_F4 ? INTRN_F4CBRT : INTRN_F8CBRT,
  1314. 1, &kid);
  1315. if (cbrt_66) {
  1316. PREG_NUM x13 = AssignExpr(block, fraction, type);
  1317. fraction = WN_Mpy(type, WN_LdidPreg(type, x13),
  1318. WN_LdidPreg(type, x13));
  1319. }
  1320. tree = WN_Mpy(type, tree, fraction);
  1321. }
  1322. }
  1323. #endif
  1324. return tree;
  1325. }
  1326. return NULL;
  1327. }
  1328. static WN *em_exp_int(WN *block, WN *x, WN *pow, TYPE_ID type)
  1329. {
  1330. if (Is_Integer_Constant(pow))
  1331. {
  1332. INT32 n = WN_const_val(pow);
  1333. INT32 absN = ABS(n);
  1334. WN *exp= NULL;
  1335. if (em_exp_int_max < absN)
  1336. return NULL;
  1337. switch(n) {
  1338. case 1:
  1339. exp = x;
  1340. break;
  1341. case -1:
  1342. exp = WN_Inverse(type, x);
  1343. break;
  1344. case 0:
  1345. if (MTYPE_type_class(type) & MTYPE_CLASS_INTEGER)
  1346. exp = WN_Intconst(type, 1);
  1347. else
  1348. exp = WN_Floatconst(type, 1.0);
  1349. break;
  1350. case 2:
  1351. {
  1352. PREG_NUM xN;
  1353. xN = AssignExpr(block, x, type);
  1354. exp = WN_Mpy(type,
  1355. WN_LdidPreg(type, xN),
  1356. WN_LdidPreg(type, xN));
  1357. break;
  1358. }
  1359. default:
  1360. {
  1361. PREG_NUM xN;
  1362. if (Fast_Exp_Allowed)
  1363. {
  1364. xN = AssignExpr(block, x, type);
  1365. exp = build_mult_tree(block, type, xN, absN);
  1366. WN_Delete(pow);
  1367. if (n < 0)
  1368. exp = WN_Inverse(type, exp);
  1369. }
  1370. }
  1371. }
  1372. return exp;
  1373. }
  1374. else if (Is_Integer_Constant(x))
  1375. {
  1376. /*
  1377. * Optimize {-2,-1,0,1,2} ** n
  1378. */
  1379. INT32 val = WN_const_val(x);
  1380. switch(val)
  1381. {
  1382. case -2:
  1383. {
  1384. /*
  1385. * (n>=0) ? ( (n&1) ? - (1<<n) : 1<<n ) : 0
  1386. */
  1387. PREG_NUM powN, shlN;
  1388. WN *shl, *band, *cond, *select, *ge;
  1389. powN = AssignExpr(block, pow, type);
  1390. shl = WN_Shl(type,
  1391. WN_Intconst(type, 1),
  1392. WN_LdidPreg(type, powN));
  1393. shlN = AssignExpr(block, shl, type);
  1394. band = WN_Band(type,
  1395. WN_LdidPreg(type, powN),
  1396. WN_Intconst(type, 1));
  1397. cond = WN_EQ(type, band, WN_Zerocon(type));
  1398. select = WN_Select(type,
  1399. cond,
  1400. WN_LdidPreg(type, shlN),
  1401. WN_Neg(type, WN_LdidPreg(type, shlN)));
  1402. ge = WN_GE(type,
  1403. WN_LdidPreg(type, powN),
  1404. WN_Zerocon(type));
  1405. return WN_Select(type,
  1406. ge,
  1407. select,
  1408. WN_Zerocon(type));
  1409. }
  1410. case -1:
  1411. {
  1412. /*
  1413. * (n&1) ? -1 : 1;
  1414. */
  1415. WN *band;
  1416. band = WN_Band(type, pow, WN_Intconst(type, 1));
  1417. return WN_Select(type,
  1418. WN_EQ(type, band, WN_Zerocon(type)),
  1419. WN_Intconst(type, 1),
  1420. WN_Intconst(type, -1));
  1421. }
  1422. case 0:
  1423. /*
  1424. * (n==0) ? 1 : 0
  1425. * simpler is (n==0)
  1426. */
  1427. return WN_EQ(type, pow, WN_Zerocon(type));
  1428. case 1:
  1429. /*
  1430. * always and forever 1
  1431. */
  1432. return WN_Intconst(type, 1);
  1433. case 2:
  1434. {
  1435. /*
  1436. * (n>=0) ? 1<<n : 0
  1437. * simpler is (n>=0) << n
  1438. */
  1439. WN *ge;
  1440. PREG_NUM powN;
  1441. powN = AssignExpr(block, pow, type);
  1442. ge = WN_GE(type,
  1443. WN_LdidPreg(type, powN),
  1444. WN_Zerocon(type));
  1445. return WN_Shl(type,
  1446. ge,
  1447. WN_LdidPreg(type, powN));
  1448. }
  1449. }
  1450. }
  1451. return NULL;
  1452. }
  1453. /*
  1454. ** quad negate looks like complex negate
  1455. **
  1456. ** if q = (x,y) then
  1457. ** -q = (-x, -y)
  1458. **
  1459. ** TODO nail down preg offset interface
  1460. ** Bug 12895: MIPS quad represents ieee 128, so -q = (-x, y)
  1461. */
  1462. static WN *em_quad_neg(WN *block, WN *tree)
  1463. {
  1464. TYPE_ID newType;
  1465. TYPE_ID type = WN_rtype(tree);
  1466. PREG_NUM qN, qNlo;
  1467. /*
  1468. * assign a quad preg temp as we will be referencing twice (sortof)
  1469. */
  1470. qN = AssignExpr(block, WN_kid0(tree), type);
  1471. if (MTYPE_is_complex(type))
  1472. {
  1473. newType = MTYPE_C8;
  1474. qNlo = qN+2;
  1475. }
  1476. else /* assume MTYPE_FQ or MTYPE_F16 */
  1477. {
  1478. newType = MTYPE_F8;
  1479. qNlo = qN+1;
  1480. }
  1481. {
  1482. WN *wn, *st;
  1483. ST *npreg = MTYPE_To_PREG(newType);
  1484. #ifdef TARG_MIPS
  1485. wn = WN_LdidPreg(newType, qN); // Bug 12895
  1486. #else
  1487. wn = WN_Neg(newType, WN_LdidPreg(newType, qN));
  1488. #endif
  1489. st = WN_StidIntoPreg(newType, qN, npreg, wn);
  1490. WN_INSERT_BlockLast(block, st);
  1491. wn = WN_Neg(newType, WN_LdidPreg(newType, qNlo));
  1492. st = WN_StidIntoPreg(newType, qNlo, npreg, wn);
  1493. WN_INSERT_BlockLast(block, st);
  1494. }
  1495. WN_Delete(tree);
  1496. return WN_LdidPreg(type, qN);
  1497. }
  1498. static WN *em_quad_abs(WN *block, WN *tree)
  1499. {
  1500. TYPE_ID newType;
  1501. TYPE_ID type = WN_rtype(tree);
  1502. PREG_NUM qN, qNlo;
  1503. /*
  1504. * assign a quad preg temp as we will be referencing twice (sortof)
  1505. */
  1506. qN = AssignExpr(block, WN_kid0(tree), type);
  1507. Is_True(! MTYPE_is_complex(type), ("em_quad_abs emulates FQ not CQ"));
  1508. newType = MTYPE_F8;
  1509. qNlo = qN+1;
  1510. {
  1511. WN *wn, *st;
  1512. ST *npreg = MTYPE_To_PREG(newType);
  1513. #ifdef TARG_MIPS
  1514. wn = WN_LdidPreg(newType, qN); // Bug 12895
  1515. #else
  1516. wn = WN_Abs(newType, WN_LdidPreg(newType, qN));
  1517. #endif
  1518. st = WN_StidIntoPreg(newType, qN, npreg, wn);
  1519. WN_INSERT_BlockLast(block, st);
  1520. wn = WN_Abs(newType, WN_LdidPreg(newType, qNlo));
  1521. st = WN_StidIntoPreg(newType, qNlo, npreg, wn);
  1522. WN_INSERT_BlockLast(block, st);
  1523. }
  1524. WN_Delete(tree);
  1525. return WN_LdidPreg(type, qN);
  1526. }
  1527. /*
  1528. ** There is no no native quad select, so we must turn the
  1529. ** expression back into an if/else block
  1530. **
  1531. ** select: (cond) ? exp1 : exp2
  1532. **
  1533. ** --> if (cond) qN = exp1;
  1534. ** else qN = exp2;
  1535. ** return qN
  1536. **
  1537. */
  1538. static WN *em_split_select(WN *block, WN *tree)
  1539. {
  1540. TYPE_ID rtype = WN_rtype(tree);
  1541. PREG_NUM qN;
  1542. WN *if_then, *if_else;
  1543. if_then = WN_CreateBlock();
  1544. if_else = WN_CreateBlock();
  1545. {
  1546. WN *exp1 = WN_kid1(tree);
  1547. qN = AssignExpr(if_then, exp1, rtype);
  1548. }
  1549. {
  1550. WN *wn;
  1551. WN *exp2 = WN_kid2(tree);
  1552. ST *preg = MTYPE_To_PREG(rtype);
  1553. wn = WN_StidIntoPreg(rtype, qN, preg, exp2);
  1554. WN_INSERT_BlockLast(if_else, wn);
  1555. }
  1556. {
  1557. WN *IF;
  1558. WN *cond = WN_kid0(tree);
  1559. IF = WN_CreateIf(cond, if_then, if_else);
  1560. WN_INSERT_BlockLast(block, IF);
  1561. }
  1562. WN_Delete(tree);
  1563. return WN_LdidPreg(rtype, qN);
  1564. }
  1565. /*
  1566. ** Evaluate the following function
  1567. **
  1568. ** Definition
  1569. ** x y INTRN_DIVFLOOR INTRN_DIVCEIL
  1570. ** --- -------------- -------------
  1571. ** + + x / y (x+y-1) / y
  1572. **
  1573. ** - - x / y (x+y+1) / y
  1574. **
  1575. ** + - (x+ -1-y)/y x / y
  1576. **
  1577. ** - + (x+ 1-y)/y x / y
  1578. **
  1579. **
  1580. ** The issue was to evaulate (divfloor) without branch code.
  1581. **
  1582. ** Tricks
  1583. ** f(x) = -1 (x<0)
  1584. ** +1 (x>=0)
  1585. ** {
  1586. ** t= x>>31;
  1587. ** f= t+t+1
  1588. ** }
  1589. ** MASK(x,y,v)= 0 (x>=0, y>=0), (x<0, y<0) ++, --
  1590. ** v (x>=0, y<0), (x<0, y>=0) +-, -+
  1591. ** {
  1592. ** t= (x^y)>>31
  1593. ** MASK= t & v
  1594. ** }
  1595. **
  1596. ** The cleverness (shapiro's) was the composition of these functions
  1597. ** to evaluate divfloor.
  1598. **
  1599. ** DIVFLOOR(x,y)=
  1600. ** v = f(y) - y; (-1-y) [+-], (+1-y) [-+]
  1601. ** (x + MASK(x,y,v)) / y
  1602. **
  1603. ** DIVCEIL(x,y) = -DIVFLOOR(-x,y)
  1604. **
  1605. ** x,y are assumed integral or we could just do a divide/floor
  1606. **
  1607. **
  1608. */
  1609. static WN *em_divfloor(WN *block, TYPE_ID type, WN *x, WN *y)
  1610. {
  1611. PREG_NUM xN, yN;
  1612. WN *numer, *div;
  1613. Is_True((MTYPE_is_integral(WN_rtype(x)) &&
  1614. MTYPE_is_integral(WN_rtype(y))),
  1615. ("em_divfloor() arguments should be type integral"));
  1616. xN = AssignExpr(block, x, type);
  1617. yN = AssignExpr(block, y, type);
  1618. {
  1619. /*
  1620. * one = 1 (y >= 0)
  1621. * -1 (y < 0)
  1622. */
  1623. TYPE_ID ytype = WN_rtype(y);
  1624. WN *sra, *add, *one, *bxor, *mask, *sub, *band;
  1625. #ifdef TARG_X8664
  1626. // Bug 3264 - This algorithm requires that byte size be identical for
  1627. // ytype and type, for zero-extended 64-bit target ISA.
  1628. if (MTYPE_is_unsigned(ytype) &&
  1629. MTYPE_byte_size(ytype) < MTYPE_byte_size(type))
  1630. ytype = type;
  1631. #endif
  1632. sra = WN_Ashr(type,
  1633. WN_LdidPreg(type, yN),
  1634. WN_Intconst(type, MTYPE_size_reg(ytype)-1));
  1635. add = WN_Add(type,
  1636. sra,
  1637. WN_COPY_Tree(sra));
  1638. one = WN_Add(ytype,
  1639. add,
  1640. WN_Intconst(ytype, 1));
  1641. /*
  1642. * mask = 0 (x,y)= ++ --
  1643. * mask = -1 (x,y)= +- +-
  1644. */
  1645. bxor = WN_Bxor(ytype,
  1646. WN_LdidPreg(type, xN),
  1647. WN_LdidPreg(type, yN));
  1648. mask = WN_Ashr(type,
  1649. bxor,
  1650. WN_Intconst(type, MTYPE_size_reg(type)-1));
  1651. /*
  1652. * sub = 1 - y (y >= 0)
  1653. * -1 - y (y < 0)
  1654. */
  1655. sub = WN_Sub(type, one, WN_LdidPreg

Large files files are truncated, but you can click here to view the full file