PageRenderTime 121ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 1ms

/pathscale/be/com/emulate.cxx

https://github.com/somian/Path64
C++ | 5500 lines | 3562 code | 761 blank | 1177 comment | 398 complexity | 585ce9277290458cdce41b68e1891e67 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0
  1. /*
  2. * Copyright (C) 2007, 2008, 2009 PathScale, LLC. All Rights Reserved.
  3. */
  4. /*
  5. * Copyright (C) 2006, 2007. QLogic Corporation. All Rights Reserved.
  6. */
  7. /*
  8. * Copyright 2003, 2004, 2005, 2006 PathScale, Inc. All Rights Reserved.
  9. */
  10. /*
  11. Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved.
  12. Path64 is free software; you can redistribute it and/or modify it
  13. under the terms of the GNU General Public License as published by
  14. the Free Software Foundation; either version 3, or (at your option)
  15. any later version.
  16. Path64 is distributed in the hope that it will be useful, but WITHOUT
  17. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  18. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
  19. License for more details.
  20. You should have received a copy of the GNU General Public License
  21. along with Path64; see the file COPYING. If not, write to the Free
  22. Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  23. 02110-1301, USA.
  24. Special thanks goes to SGI for their continued support to open source
  25. */
  26. #ifdef USE_PCH
  27. #include "be_com_pch.h"
  28. #endif /* USE_PCH */
  29. #pragma hdrstop
  30. #include <math.h>
  31. #if defined(BUILD_OS_DARWIN)
  32. #include <limits.h>
  33. #else /* defined(BUILD_OS_DARWIN) */
  34. #include <values.h>
  35. #endif /* defined(BUILD_OS_DARWIN) */
  36. #include <alloca.h>
  37. #include "defs.h"
  38. #include "config.h"
  39. #include "config_debug.h"
  40. #include "config_opt.h"
  41. #include "config_targ_opt.h"
  42. #include "errors.h"
  43. #include "erglob.h"
  44. #include "tracing.h"
  45. #include "stab.h"
  46. #include "data_layout.h"
  47. #include "wn.h"
  48. #include "wn_util.h"
  49. #include "const.h"
  50. #include "targ_const.h"
  51. #include "targ_sim.h"
  52. #include "fb_whirl.h"
  53. #include "be_symtab.h"
  54. #include "intrn_info.h"
  55. #if (__GNUC__ == 2)
  56. //
  57. // Provide trunc(), which doesn't exist in the GNU library. This is a
  58. // quick and dirty hack, and should be handled elsehow.
  59. //
  60. static inline double trunc(double d)
  61. {
  62. if (d < 0.0) {
  63. return 1.0 + floor(d);
  64. }
  65. else {
  66. return floor(d);
  67. }
  68. }
  69. #endif
  70. /*
  71. ** For lack of a better word, these emulations are run time
  72. ** routines that supply functionality to whirl expression nodes
  73. **
  74. ** The list was ripped off from ragnarok and may be
  75. ** incomplete/NYI
  76. */
  77. typedef enum
  78. {
  79. EM_TRAPUV, /* sets fpc_csr to interrupt on NaN */
  80. EM_RTS_CHECKSTACK, /* checks for stack overflow */
  81. EM_LL_MUL, /* double-word multiply */
  82. EM_LL_DIV, /* double-word divide */
  83. EM_ULL_DIV, /* unsigned double-word divide */
  84. EM_LL_MOD, /* double-word mod */
  85. EM_LL_REM, /* double-word remainder */
  86. EM_ULL_REM, /* unsigned double-word remainder */
  87. EM_LL_LSHIFT, /* double-word left shift */
  88. EM_LL_RSHIFT, /* double-word right shift */
  89. EM_ULL_RSHIFT, /* unsigned double-word right shift */
  90. EM_LL_M3_DSLLV, /* mips 3 simulation of dsllv */
  91. EM_LL_M3_DSRAV, /* mips 3 simulation of dsrav */
  92. EM_LL_M3_DSRLV, /* mips 3 simulation of dsrlv */
  93. EM_LL_TO_F, /* cvt double-word to float */
  94. EM_ULL_TO_F, /* cvt unsigned double-word to float */
  95. EM_LL_TO_D, /* cvt double-word to double float */
  96. EM_ULL_TO_D, /* cvt unsigned double-word to double float */
  97. EM_F_TO_LL, /* cvt float to double-word */
  98. EM_F_TO_ULL, /* cvt float to unsigned double-word */
  99. EM_F_ROUND_LL_F, /* round float to float */
  100. EM_F_TRUNC_LL_F, /* trunc float to float */
  101. EM_D_TO_LL, /* cvt double float to double-word */
  102. EM_D_TO_ULL, /* cvt double float to unsigned double-word */
  103. EM_D_ROUND_LL_D, /* round double to double */
  104. EM_D_TRUNC_LL_D, /* trunc double to double */
  105. EM_LL_BIT_EXTRACT , /* double-word bit-field extraction */
  106. EM_LL_BIT_INSERT , /* double-word bit-field insertion */
  107. EM_Q_ABS, /* quad absolute value */
  108. EM_Q_SQRT, /* quad square root */
  109. EM_Q_ADD, /* quad plus */
  110. EM_Q_SUB, /* quad minus */
  111. EM_Q_MPY, /* quad multiply */
  112. EM_Q_DIV, /* quad divide */
  113. EM_Q_MAX1, /* quad max */
  114. EM_Q_MIN1, /* quad min */
  115. EM_Q_EQ, /* quad equal */
  116. EM_Q_NE, /* quad not equal */
  117. EM_Q_GE, /* quad greater equal */
  118. EM_Q_GT, /* quad greater than */
  119. EM_Q_LE, /* quad less equal */
  120. EM_Q_LT, /* quad less than */
  121. EM_SNGL_Q, /* convert quad to single */
  122. EM_DBLE_Q, /* convert quad to double */
  123. EM_KI_QINT, /* convert quad to 64 bits int */
  124. EM_JI_QINT, /* convert quad to 32 bits int */
  125. EM_Q_EXT, /* convert float to quad */
  126. EM_Q_EXTD, /* convert double to quad */
  127. EM_Q_FLOTK, /* convert to quad from 64 bits int */
  128. EM_Q_FLOTKU, /* convert to quad from unsigned 64 bits int */
  129. EM_Q_FLOTJ, /* convert to quad from 32 bits int */
  130. EM_Q_FLOTJU, /* convert to quad from unsigned 32 bits int */
  131. EM_KIQNNT, /* round quad to closest 64 bits int value */
  132. EM_JIQNNT, /* round quad to closest 32 bits int value */
  133. EM_C4_SQRT, /* float complex sqrt */
  134. EM_C8_SQRT, /* double complex sqrt */
  135. EM_CQ_SQRT, /* quad complex sqrt */
  136. EM_C4_RSQRT, /* float complex recipricol sqrt */
  137. EM_C8_RSQRT, /* double complex recipricol sqrt */
  138. EM_CQ_RSQRT, /* quad complex recipricol sqrt */
  139. EM_C4_ABS, /* float complex abs */
  140. EM_C8_ABS, /* double complex abs */
  141. EM_CQ_ABS, /* quad complex abs */
  142. EM_KI_QCEIL, /* ceil quad to 64 bits int (f90 only) */
  143. EM_JI_QCEIL, /* ceil quad to 32 bits in( f90 only)t */
  144. EM_KI_QFLOOR, /* floor quad to 64 bits int (f90 only) */
  145. EM_JI_QFLOOR, /* floor quad to 32 bits int (f90 only) */
  146. EM_LAST /* sentinel */
  147. } EMULATION;
  148. /*
  149. ** describe calling semantics for FE and runtime
  150. ** intrinsics and expression
  151. */
  152. typedef enum
  153. {
  154. COERCE_none,
  155. COERCE_by_reference,
  156. COERCE_by_value,
  157. COERCE_struct_by_value,
  158. COERCE_struct_by_reference,
  159. COERCE_split_complex
  160. } COERCE, *COERCEp;
  161. typedef struct EM_ROUTINES
  162. {
  163. EMULATION id;
  164. const char *functionName;
  165. INT32 functionAttributes;
  166. COERCE runtimeArg0coercion;
  167. } EM_ROUTINES, *EM_ROUTINESp;
  168. #define EM_id(x) em_routines[x].id
  169. #define EM_rt_name(x) em_routines[x].functionName
  170. #define EM_attributes(x) em_routines[x].functionAttributes
  171. #define EM_coerce0(x) em_routines[x].runtimeArg0coercion
  172. /*
  173. ** Keep track of intrinsic/emulation arguments
  174. ** Problems we are trying to solve
  175. **
  176. ** COERCE_by_reference
  177. ** are (unfortunately) provided by the FE to match the
  178. ** run time routine. When we get the argument we might have
  179. ** an address (anonymous pointer) and hence, lost the
  180. ** type to dereference (if we are trying to inline it)
  181. **
  182. ** COERCE_split_complex
  183. ** complex are split into real/imaginary pairs doubling
  184. ** the number of argumemts
  185. **
  186. ** This entire mechanism should be provided by the FE
  187. ** as part of wtable.h
  188. */
  189. #define NSE PU_NO_SIDE_EFFECTS
  190. #define PURE_NSE (PU_IS_PURE | NSE)
  191. #define INVALID NULL
  192. /*
  193. ** The emulation table may not yet be complete (or used)
  194. ** The fields are
  195. **
  196. ** EMULATION id;
  197. ** The table must be kept in order with the enumeration
  198. ** as it is a direct lookup
  199. **
  200. ** char *functionName;
  201. ** The exact external name, no underbars
  202. **
  203. ** INT32 functionAttributes;
  204. **
  205. ** COERCEp functionArgCoercion;
  206. ** Actual to runtime formal conversion
  207. ** The child of an expression/intrinsic WN needs to be
  208. ** converted to call it's runtime function.
  209. ** ex.
  210. ** complex routines are now split-by_value
  211. **
  212. ** These routines are all by value so we already know the
  213. ** argument type
  214. */
  215. #define NONE 0
  216. const EM_ROUTINES em_routines[]=
  217. {
  218. EM_TRAPUV, "__trapuv", PURE_NSE, COERCE_none,
  219. EM_RTS_CHECKSTACK,"_RtlCheckStack",PURE_NSE, COERCE_none,
  220. EM_LL_MUL, "__ll_mul", PURE_NSE, COERCE_none,
  221. EM_LL_DIV, "__ll_div", PURE_NSE, COERCE_none,
  222. EM_ULL_DIV, "__ull_div", PURE_NSE, COERCE_none,
  223. EM_LL_MOD, "__ll_mod", PURE_NSE, COERCE_none,
  224. EM_LL_REM, "__ll_rem", PURE_NSE, COERCE_none,
  225. EM_ULL_REM, "__ull_rem", PURE_NSE, COERCE_none,
  226. EM_LL_LSHIFT, "__ll_lshift", PURE_NSE, COERCE_none,
  227. EM_LL_RSHIFT, "__ll_rshift", PURE_NSE, COERCE_none,
  228. EM_ULL_RSHIFT, "__ull_rshift", PURE_NSE, COERCE_none,
  229. EM_LL_M3_DSLLV, "__dsllv", PURE_NSE, COERCE_none,
  230. EM_LL_M3_DSRAV, "__dsrav", PURE_NSE, COERCE_none,
  231. EM_LL_M3_DSRLV, "__dsrlv", PURE_NSE, COERCE_none,
  232. EM_LL_TO_F, "__ll_to_f", PURE_NSE, COERCE_none,
  233. EM_ULL_TO_F, "__ull_to_f", PURE_NSE, COERCE_none,
  234. EM_LL_TO_D, "__ll_to_d", PURE_NSE, COERCE_none,
  235. EM_ULL_TO_D, "__ull_to_d", PURE_NSE, COERCE_none,
  236. EM_F_TO_LL, "__f_to_ll", PURE_NSE, COERCE_none,
  237. EM_F_TO_ULL, "__f_to_ull", PURE_NSE, COERCE_none,
  238. EM_F_ROUND_LL_F, "__f_round_ll_f",PURE_NSE, COERCE_none,
  239. EM_F_TRUNC_LL_F, "__f_trunc_ll_f",PURE_NSE, COERCE_none,
  240. EM_D_TO_LL, "__d_to_ll", PURE_NSE, COERCE_none,
  241. EM_D_TO_ULL, "__d_to_ull", PURE_NSE, COERCE_none,
  242. EM_D_ROUND_LL_D, "__d_round_ll_d",PURE_NSE, COERCE_none,
  243. EM_D_TRUNC_LL_D, "__d_trunc_ll_d",PURE_NSE, COERCE_none,
  244. EM_LL_BIT_EXTRACT,"__ll_bit_extract",PURE_NSE,COERCE_none,
  245. EM_LL_BIT_INSERT, "__ll_bit_insert",PURE_NSE, COERCE_none,
  246. EM_Q_ABS, "__qabs", PURE_NSE, COERCE_none,
  247. EM_Q_SQRT, "__qsqrt", PURE_NSE, COERCE_none,
  248. EM_Q_ADD, "__q_add", PURE_NSE, COERCE_none,
  249. EM_Q_SUB, "__q_sub", PURE_NSE, COERCE_none,
  250. EM_Q_MPY, "__q_mul", PURE_NSE, COERCE_none,
  251. EM_Q_DIV, "__q_div", PURE_NSE, COERCE_none,
  252. EM_Q_MAX1, "__q_max1", PURE_NSE, COERCE_none,
  253. EM_Q_MIN1, "__q_min1", PURE_NSE, COERCE_none,
  254. EM_Q_EQ, "__q_eq", PURE_NSE, COERCE_none,
  255. EM_Q_NE, "__q_ne", PURE_NSE, COERCE_none,
  256. EM_Q_GE, "__q_ge", PURE_NSE, COERCE_none,
  257. EM_Q_GT, "__q_gt", PURE_NSE, COERCE_none,
  258. EM_Q_LE, "__q_le", PURE_NSE, COERCE_none,
  259. EM_Q_LT, "__q_lt", PURE_NSE, COERCE_none,
  260. EM_SNGL_Q, "__sngl_q", PURE_NSE, COERCE_none,
  261. EM_DBLE_Q, "__dble_q", PURE_NSE, COERCE_none,
  262. EM_KI_QINT, "__ki_qint", PURE_NSE, COERCE_none,
  263. EM_JI_QINT, "__ji_qint", PURE_NSE, COERCE_none,
  264. EM_Q_EXT, "__q_ext", PURE_NSE, COERCE_none,
  265. EM_Q_EXTD, "__q_extd", PURE_NSE, COERCE_none,
  266. EM_Q_FLOTK, "__q_flotk", PURE_NSE, COERCE_none,
  267. EM_Q_FLOTKU, "__q_flotku", PURE_NSE, COERCE_none,
  268. EM_Q_FLOTJ, "__q_flotj", PURE_NSE, COERCE_none,
  269. EM_Q_FLOTJU, "__q_flotju", PURE_NSE, COERCE_none,
  270. EM_KIQNNT, "__kiqnnt", PURE_NSE, COERCE_none,
  271. EM_JIQNNT, "__jiqnnt", PURE_NSE, COERCE_none,
  272. EM_C4_SQRT, "__csqrt", PURE_NSE, COERCE_split_complex,
  273. EM_C8_SQRT, "__zsqrt", PURE_NSE, COERCE_split_complex,
  274. EM_CQ_SQRT, "__cqsqrt", PURE_NSE, COERCE_split_complex,
  275. EM_C4_RSQRT, INVALID, NONE, COERCE_none,
  276. EM_C8_RSQRT, INVALID, NONE, COERCE_none,
  277. EM_CQ_RSQRT, INVALID, NONE, COERCE_none,
  278. EM_C4_ABS, INVALID, NONE, COERCE_none,
  279. EM_C8_ABS, INVALID, NONE, COERCE_none,
  280. EM_CQ_ABS, INVALID, NONE, COERCE_none,
  281. EM_KI_QCEIL, "_CEILING_16_8", PURE_NSE, COERCE_none,
  282. EM_JI_QCEIL, "_CEILING_16_4", PURE_NSE, COERCE_none,
  283. EM_KI_QFLOOR, "_FLOOR_16_8", PURE_NSE, COERCE_none,
  284. EM_JI_QFLOOR, "_FLOOR_16_4", PURE_NSE, COERCE_none,
  285. };
  286. typedef struct
  287. {
  288. INTRINSIC id;
  289. COERCE runtimeArg0;
  290. COERCE runtimeArg1;
  291. } INTRINSIC_RUNTIME_FORMALS;
  292. #define INTR_id(x) intrinsic_runtime_formals[(x)].id
  293. #define INTR_coerce0(x) intrinsic_runtime_formals[(x)].runtimeArg0
  294. #define INTR_coerce1(x) intrinsic_runtime_formals[(x)].runtimeArg1
  295. /*
  296. ** TODO
  297. ** eventually the FE will supply this information
  298. ** from the intrinsic table, when we finish the implementation
  299. */
  300. INTRINSIC_RUNTIME_FORMALS intrinsic_runtime_formals[]=
  301. {
  302. INTRN_C4I4EXPEXPR, COERCE_split_complex, COERCE_none,
  303. INTRN_C4I8EXPEXPR, COERCE_split_complex, COERCE_none,
  304. INTRN_C8I4EXPEXPR, COERCE_split_complex, COERCE_none,
  305. INTRN_C8I8EXPEXPR, COERCE_split_complex, COERCE_none,
  306. INTRN_CQI4EXPEXPR, COERCE_split_complex, COERCE_none,
  307. INTRN_CQI8EXPEXPR, COERCE_split_complex, COERCE_none,
  308. INTRN_C16I4EXPEXPR, COERCE_split_complex, COERCE_none,
  309. INTRN_C16I8EXPEXPR, COERCE_split_complex, COERCE_none,
  310. INTRN_C4EXPEXPR, COERCE_split_complex, COERCE_split_complex,
  311. INTRN_C8EXPEXPR, COERCE_split_complex, COERCE_split_complex,
  312. INTRN_CQEXPEXPR, COERCE_split_complex, COERCE_split_complex,
  313. INTRN_C16EXPEXPR, COERCE_split_complex, COERCE_split_complex,
  314. INTRN_F4C4ABS, COERCE_split_complex, COERCE_none,
  315. INTRN_F8C8ABS, COERCE_split_complex, COERCE_none,
  316. INTRN_FQCQABS, COERCE_split_complex, COERCE_none,
  317. INTRN_F16C16ABS, COERCE_split_complex, COERCE_none,
  318. INTRN_C4EXP, COERCE_split_complex, COERCE_none,
  319. INTRN_C8EXP, COERCE_split_complex, COERCE_none,
  320. INTRN_CQEXP, COERCE_split_complex, COERCE_none,
  321. INTRN_C16EXP, COERCE_split_complex, COERCE_none,
  322. INTRN_C4LOG, COERCE_split_complex, COERCE_none,
  323. INTRN_C8LOG, COERCE_split_complex, COERCE_none,
  324. INTRN_CQLOG, COERCE_split_complex, COERCE_none,
  325. INTRN_C16LOG, COERCE_split_complex, COERCE_none,
  326. INTRN_C4COS, COERCE_split_complex, COERCE_none,
  327. INTRN_C8COS, COERCE_split_complex, COERCE_none,
  328. INTRN_CQCOS, COERCE_split_complex, COERCE_none,
  329. INTRN_C16COS, COERCE_split_complex, COERCE_none,
  330. INTRN_C4SIN, COERCE_split_complex, COERCE_none,
  331. INTRN_C8SIN, COERCE_split_complex, COERCE_none,
  332. INTRN_CQSIN, COERCE_split_complex, COERCE_none,
  333. INTRN_C16SIN, COERCE_split_complex, COERCE_none
  334. };
  335. INT32 intrinsic_runtime_formals_size = sizeof(intrinsic_runtime_formals) /
  336. sizeof( INTRINSIC_RUNTIME_FORMALS);
  337. typedef struct
  338. {
  339. INTRINSIC id;
  340. TYPE_ID parameterType0;
  341. TYPE_ID parameterType1;
  342. TYPE_ID parameterType2;
  343. } INTRINSIC_PARAMETER_TYPE;
  344. #define INTR_parm_id(x) intrinsic_parameter_type[(x)].id
  345. #define INTR_parmtype0(x) intrinsic_parameter_type[(x)].parameterType0
  346. #define INTR_parmtype1(x) intrinsic_parameter_type[(x)].parameterType1
  347. #define INTR_parmtype2(x) intrinsic_parameter_type[(x)].parameterType2
  348. INTRINSIC_PARAMETER_TYPE intrinsic_parameter_type[]=
  349. {
  350. INTRN_I1DIM, MTYPE_I1, MTYPE_I1, MTYPE_V,
  351. INTRN_I2DIM, MTYPE_I2, MTYPE_I2, MTYPE_V,
  352. INTRN_I4DIM, MTYPE_I4, MTYPE_I4, MTYPE_V,
  353. INTRN_I8DIM, MTYPE_I8, MTYPE_I8, MTYPE_V,
  354. INTRN_F4DIM, MTYPE_F4, MTYPE_F4, MTYPE_V,
  355. INTRN_F8DIM, MTYPE_F8, MTYPE_F8, MTYPE_V,
  356. INTRN_FQDIM, MTYPE_FQ, MTYPE_FQ, MTYPE_V,
  357. INTRN_F16DIM, MTYPE_F16, MTYPE_F16, MTYPE_V,
  358. INTRN_F4MOD, MTYPE_F4, MTYPE_F4, MTYPE_V,
  359. INTRN_F8MOD, MTYPE_F8, MTYPE_F8, MTYPE_V,
  360. INTRN_FQMOD, MTYPE_FQ, MTYPE_FQ, MTYPE_V,
  361. INTRN_F16MOD, MTYPE_F16, MTYPE_F16, MTYPE_V,
  362. INTRN_F8F4PROD, MTYPE_F4, MTYPE_F4, MTYPE_V,
  363. INTRN_FQF8PROD, MTYPE_F8, MTYPE_F8, MTYPE_V,
  364. INTRN_F16F8PROD, MTYPE_F8, MTYPE_F8, MTYPE_V,
  365. INTRN_I1SIGN, MTYPE_I1, MTYPE_I1, MTYPE_V,
  366. INTRN_I2SIGN, MTYPE_I2, MTYPE_I2, MTYPE_V,
  367. INTRN_I4SIGN, MTYPE_I4, MTYPE_I4, MTYPE_V,
  368. INTRN_I8SIGN, MTYPE_I8, MTYPE_I8, MTYPE_V,
  369. INTRN_F4SIGN, MTYPE_F4, MTYPE_F4, MTYPE_V,
  370. INTRN_F8SIGN, MTYPE_F8, MTYPE_F8, MTYPE_V,
  371. INTRN_FQSIGN, MTYPE_FQ, MTYPE_FQ, MTYPE_V,
  372. INTRN_F16SIGN, MTYPE_F16, MTYPE_F16, MTYPE_V,
  373. INTRN_F4AINT, MTYPE_F4, MTYPE_V, MTYPE_V,
  374. INTRN_F8AINT, MTYPE_F8, MTYPE_V, MTYPE_V,
  375. INTRN_FQAINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
  376. INTRN_F16AINT, MTYPE_F16, MTYPE_V, MTYPE_V,
  377. INTRN_I2F4NINT, MTYPE_F4, MTYPE_V, MTYPE_V,
  378. INTRN_I4F4NINT, MTYPE_F4, MTYPE_V, MTYPE_V,
  379. INTRN_I8F4NINT, MTYPE_F4, MTYPE_V, MTYPE_V,
  380. INTRN_I2F8IDNINT, MTYPE_F8, MTYPE_V, MTYPE_V,
  381. INTRN_I4F8IDNINT, MTYPE_F8, MTYPE_V, MTYPE_V,
  382. INTRN_I8F8IDNINT, MTYPE_F8, MTYPE_V, MTYPE_V,
  383. INTRN_I2FQIQNINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
  384. INTRN_I4FQIQNINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
  385. INTRN_I8FQIQNINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
  386. INTRN_I2F16IQNINT, MTYPE_F16, MTYPE_V, MTYPE_V,
  387. INTRN_I4F16IQNINT, MTYPE_F16, MTYPE_V, MTYPE_V,
  388. INTRN_I8F16IQNINT, MTYPE_F16, MTYPE_V, MTYPE_V,
  389. INTRN_F4ANINT, MTYPE_F4, MTYPE_V, MTYPE_V,
  390. INTRN_F8ANINT, MTYPE_F8, MTYPE_V, MTYPE_V,
  391. INTRN_FQANINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
  392. INTRN_F16ANINT, MTYPE_F16, MTYPE_V, MTYPE_V,
  393. INTRN_F4LOG10, MTYPE_F4, MTYPE_V, MTYPE_V,
  394. INTRN_F8LOG10, MTYPE_F8, MTYPE_V, MTYPE_V,
  395. INTRN_FQLOG10, MTYPE_FQ, MTYPE_V, MTYPE_V,
  396. INTRN_F16LOG10, MTYPE_F16, MTYPE_V, MTYPE_V,
  397. INTRN_I1BTEST, MTYPE_I1, MTYPE_I1, MTYPE_V,
  398. INTRN_I2BTEST, MTYPE_I2, MTYPE_I2, MTYPE_V,
  399. INTRN_I4BTEST, MTYPE_I4, MTYPE_I4, MTYPE_V,
  400. INTRN_I8BTEST, MTYPE_I8, MTYPE_I8, MTYPE_V,
  401. INTRN_I1BSET, MTYPE_I1, MTYPE_I1, MTYPE_V,
  402. INTRN_I2BSET, MTYPE_I2, MTYPE_I2, MTYPE_V,
  403. INTRN_I4BSET, MTYPE_I4, MTYPE_I4, MTYPE_V,
  404. INTRN_I8BSET, MTYPE_I8, MTYPE_I8, MTYPE_V,
  405. INTRN_I1BCLR, MTYPE_I1, MTYPE_I1, MTYPE_V,
  406. INTRN_I2BCLR, MTYPE_I2, MTYPE_I2, MTYPE_V,
  407. INTRN_I4BCLR, MTYPE_I4, MTYPE_I4, MTYPE_V,
  408. INTRN_I8BCLR, MTYPE_I8, MTYPE_I8, MTYPE_V,
  409. INTRN_I1BITS, MTYPE_I1, MTYPE_I1, MTYPE_I1,
  410. INTRN_I2BITS, MTYPE_I2, MTYPE_I2, MTYPE_I2,
  411. INTRN_I4BITS, MTYPE_I4, MTYPE_I4, MTYPE_I4,
  412. INTRN_I8BITS, MTYPE_I8, MTYPE_I8, MTYPE_I8,
  413. INTRN_I1SHL, MTYPE_I1, MTYPE_I1, MTYPE_V,
  414. INTRN_I2SHL, MTYPE_I2, MTYPE_I2, MTYPE_V,
  415. INTRN_I1SHR, MTYPE_I1, MTYPE_I1, MTYPE_V,
  416. INTRN_I2SHR, MTYPE_I2, MTYPE_I2, MTYPE_V,
  417. INTRN_I1SHFT, MTYPE_I1, MTYPE_I1, MTYPE_V,
  418. INTRN_I2SHFT, MTYPE_I2, MTYPE_I2, MTYPE_V,
  419. INTRN_I4SHFT, MTYPE_I4, MTYPE_I4, MTYPE_V,
  420. INTRN_I8SHFT, MTYPE_I8, MTYPE_I8, MTYPE_V,
  421. INTRN_I1SHFTC, MTYPE_I1, MTYPE_I1, MTYPE_I1,
  422. INTRN_I2SHFTC, MTYPE_I2, MTYPE_I2, MTYPE_I2,
  423. INTRN_I4SHFTC, MTYPE_I4, MTYPE_I4, MTYPE_I4,
  424. INTRN_I8SHFTC, MTYPE_I8, MTYPE_I8, MTYPE_I8,
  425. INTRN_I1MVBITS, MTYPE_I1, MTYPE_I1, MTYPE_I1,
  426. INTRN_I2MVBITS, MTYPE_I2, MTYPE_I2, MTYPE_I2,
  427. INTRN_I4MVBITS, MTYPE_I4, MTYPE_I4, MTYPE_I4,
  428. INTRN_I8MVBITS, MTYPE_I8, MTYPE_I8, MTYPE_I8,
  429. };
  430. INT32 intrinsic_parameter_type_size = sizeof(intrinsic_parameter_type) /
  431. sizeof( INTRINSIC_PARAMETER_TYPE);
  432. #define WN_has_ty(x) (OPCODE_has_1ty(WN_opcode(x)) || OPCODE_has_2ty(WN_opcode(x)))
  433. #define WN_is_pointer(x) (WN_has_ty(x) && (TY_kind(WN_ty(x)) == KIND_POINTER))
  434. #define Is_Integer_Constant(x) (WN_operator(x) == OPR_INTCONST)
  435. #define Is_Constant(x) (WN_operator(x) == OPR_CONST)
  436. #define OPCODE_is_intrinsic(op) \
  437. ((OPCODE_operator((op)) == OPR_INTRINSIC_CALL) || \
  438. (OPCODE_operator((op)) == OPR_INTRINSIC_OP))
  439. #define ABS(x) (((x)<0) ? -(x) : (x))
  440. /* ====================================================================
  441. * Exported Functions
  442. * ====================================================================
  443. */
  444. extern const char * INTR_intrinsic_name(WN *tree);
  445. extern WN * make_pointer_to_node(WN *block, WN *tree);
  446. /* ====================================================================
  447. * Imported Functions
  448. * ====================================================================
  449. */
  450. extern PREG_NUM AssignExpr(WN *block, WN *tree, TYPE_ID type);
  451. extern TY_IDX compute_alignment_type(WN *tree, TY_IDX, INT64 offset);
  452. extern INT32 compute_copy_alignment(TY_IDX, TY_IDX, INT32 offset);
  453. extern BOOL lower_is_aliased(WN *wn1, WN *wn2, INT64 size);
  454. extern TYPE_ID compute_copy_quantum(INT32 );
  455. extern WN *WN_I1const(TYPE_ID type, INT64 con);
  456. extern void WN_annotate_call_flags(WN *call, ST *sym);
  457. extern BOOL CG_bcopy_cannot_overlap;
  458. extern BOOL CG_memcpy_cannot_overlap;
  459. extern BOOL CG_memmove_cannot_overlap;
  460. extern INT32 CG_memmove_inst_count;
  461. #ifdef KEY
  462. extern INT32 CG_memmove_align_inst_count;
  463. #endif
  464. /* ====================================================================
  465. * Forward Declarations
  466. * ====================================================================
  467. */
  468. static EMULATION WN_emulation(WN *tree);
  469. static WN *em_exp_int(WN *block, WN *x, WN *pow, TYPE_ID type);
  470. static WN *em_exp_float(WN *block, WN *x, WN *pow, TYPE_ID type);
  471. static WN *em_mod_float(WN *block, WN *x, WN *y);
  472. static WN *em_complex_exp(WN *block, WN *x);
  473. static WN *em_complex_cos(WN *block, WN *x);
  474. static COERCE INTR_coerce_runtime(WN *tree, INT32 arg);
  475. static TYPE_ID INTR_parameter_type(WN *tree, INT32 arg);
  476. static TY_IDX aux_compute_alignment(WN *tree);
  477. /* ====================================================================
  478. * private variables
  479. * ====================================================================
  480. */
  481. static INT32 em_exp_int_max = 256;
  482. #define MAX_INTRINSIC_ARGS 20
  483. /* ====================================================================
  484. *
  485. * TYPE_ID INTR_return_mtype(id)
  486. *
  487. *
  488. *
  489. * ==================================================================== */
  490. TYPE_ID INTR_return_mtype(INTRINSIC id)
  491. {
  492. INTRN_RETKIND rtype = INTRN_return_kind(id);
  493. switch(rtype)
  494. {
  495. case IRETURN_I1: return MTYPE_I1;
  496. case IRETURN_I2: return MTYPE_I2;
  497. case IRETURN_I4: return MTYPE_I4;
  498. case IRETURN_I8: return MTYPE_I8;
  499. case IRETURN_U1: return MTYPE_U1;
  500. case IRETURN_U2: return MTYPE_U2;
  501. case IRETURN_U4: return MTYPE_U4;
  502. case IRETURN_U8: return MTYPE_U8;
  503. case IRETURN_F4: return MTYPE_F4;
  504. case IRETURN_F8: return MTYPE_F8;
  505. case IRETURN_FQ: return MTYPE_FQ;
  506. case IRETURN_F16: return MTYPE_F16;
  507. case IRETURN_C4: return MTYPE_C4;
  508. case IRETURN_C8: return MTYPE_C8;
  509. case IRETURN_CQ: return MTYPE_CQ;
  510. case IRETURN_C16: return MTYPE_C16;
  511. case IRETURN_V: return MTYPE_V;
  512. case IRETURN_PV:
  513. case IRETURN_PU1:
  514. case IRETURN_DA1:
  515. case IRETURN_SZT:
  516. case IRETURN_PC :
  517. case IRETURN_UNKNOWN:
  518. return MTYPE_UNKNOWN;
  519. }
  520. return MTYPE_UNKNOWN;
  521. }
  522. /* ====================================================================
  523. *
  524. * EMULATION WN_emulation(WN *tree)
  525. *
  526. * Provide the correct emulation enum for a given WN
  527. *
  528. * TODO: cache most frequently used id's
  529. *
  530. * ==================================================================== */
  531. static EMULATION WN_emulation(WN *tree)
  532. {
  533. OPCODE op = WN_opcode(tree);
  534. TYPE_ID type = OPCODE_rtype(op);
  535. switch (WN_operator(tree)) {
  536. case OPR_SQRT:
  537. switch(type) {
  538. case MTYPE_C4: return EM_C4_SQRT;
  539. case MTYPE_C8: return EM_C8_SQRT;
  540. case MTYPE_CQ: return EM_CQ_SQRT;
  541. case MTYPE_C16: return EM_CQ_SQRT;
  542. case MTYPE_FQ: return EM_Q_SQRT;
  543. case MTYPE_F16: return EM_Q_SQRT;
  544. }
  545. break;
  546. case OPR_RSQRT:
  547. switch(type) {
  548. case MTYPE_C4: return EM_C4_RSQRT;
  549. case MTYPE_C8: return EM_C8_RSQRT;
  550. case MTYPE_CQ: return EM_CQ_RSQRT;
  551. case MTYPE_C16: return EM_CQ_RSQRT;
  552. }
  553. break;
  554. case OPR_CVT:
  555. {
  556. TYPE_ID desc = WN_desc(tree);
  557. if (desc == MTYPE_FQ || desc == MTYPE_F16)
  558. {
  559. switch(type) {
  560. case MTYPE_I4: return EM_JI_QINT;
  561. case MTYPE_I8: return EM_KI_QINT;
  562. case MTYPE_F4: return EM_SNGL_Q;
  563. case MTYPE_F8: return EM_DBLE_Q;
  564. }
  565. break;
  566. }
  567. else if (type == MTYPE_FQ || type == MTYPE_F16)
  568. {
  569. switch(desc) {
  570. case MTYPE_U4: return EM_Q_FLOTJU;
  571. case MTYPE_I4: return EM_Q_FLOTJ;
  572. case MTYPE_U8: return EM_Q_FLOTKU;
  573. case MTYPE_I8: return EM_Q_FLOTK;
  574. case MTYPE_F8: return EM_Q_EXTD;
  575. case MTYPE_F4: return EM_Q_EXT;
  576. }
  577. }
  578. }
  579. break;
  580. case OPR_RND:
  581. {
  582. TYPE_ID desc = WN_desc(tree);
  583. if (desc == MTYPE_FQ || desc == MTYPE_F16)
  584. {
  585. switch(type)
  586. {
  587. case MTYPE_I4: return EM_JIQNNT;
  588. case MTYPE_I8: return EM_KIQNNT;
  589. }
  590. break;
  591. }
  592. }
  593. break;
  594. default:
  595. if (type == MTYPE_FQ || type == MTYPE_F16)
  596. {
  597. switch(WN_operator(tree)) {
  598. case OPR_ISTORE:
  599. case OPR_ISTOREX:
  600. case OPR_STID:
  601. case OPR_ILOAD:
  602. case OPR_ILOADX:
  603. case OPR_SELECT:
  604. case OPR_LDID:
  605. case OPR_CONST:
  606. case OPR_NEG:
  607. break;
  608. case OPR_ABS: return EM_Q_ABS;
  609. case OPR_ADD: return EM_Q_ADD;
  610. case OPR_SUB: return EM_Q_SUB;
  611. case OPR_MPY: return EM_Q_MPY;
  612. case OPR_DIV: return EM_Q_DIV;
  613. case OPR_MAX: return EM_Q_MAX1;
  614. case OPR_MIN: return EM_Q_MIN1;
  615. case OPR_RECIP:
  616. case OPR_RSQRT:
  617. case OPR_MADD:
  618. case OPR_MSUB:
  619. case OPR_NMADD:
  620. case OPR_NMSUB:
  621. case OPR_RND:
  622. case OPR_TRUNC:
  623. case OPR_CVT:
  624. case OPR_SQRT:
  625. Is_True(FALSE, ("WN_emulation() %s should be already processed", OPCODE_name(WN_opcode(tree))));
  626. break;
  627. case OPR_CEIL:
  628. case OPR_FLOOR:
  629. case OPR_MOD:
  630. case OPR_REM:
  631. case OPR_CVTL:
  632. case OPR_CALL:
  633. case OPR_INTRINSIC_CALL:
  634. Is_True(FALSE, ("WN_emulation() %s invalid context for op", OPCODE_name(WN_opcode(tree))));
  635. }
  636. }
  637. else if (WN_desc(tree)== MTYPE_FQ || WN_desc(tree)== MTYPE_F16)
  638. {
  639. switch(WN_operator(tree)) {
  640. case OPR_EQ: return EM_Q_EQ;
  641. case OPR_NE: return EM_Q_NE;
  642. case OPR_GT: return EM_Q_GT;
  643. case OPR_GE: return EM_Q_GE;
  644. case OPR_LT: return EM_Q_LT;
  645. case OPR_LE: return EM_Q_LE;
  646. case OPR_TRUNC:
  647. switch(type)
  648. {
  649. case MTYPE_I4: return EM_JI_QINT;
  650. case MTYPE_I8: return EM_KI_QINT;
  651. }
  652. break;
  653. case OPR_CEIL:
  654. switch(type)
  655. {
  656. case MTYPE_I4: return EM_JI_QCEIL;
  657. case MTYPE_I8: return EM_KI_QCEIL;
  658. }
  659. break;
  660. case OPR_FLOOR:
  661. switch(type)
  662. {
  663. case MTYPE_I4: return EM_JI_QFLOOR;
  664. case MTYPE_I8: return EM_KI_QFLOOR;
  665. }
  666. break;
  667. }
  668. }
  669. break;
  670. }
  671. FmtAssert(FALSE, ("WN_emulation() %s not recognized", OPCODE_name(WN_opcode(tree))));
  672. return EM_LAST;
  673. }
  674. /* ====================================================================
  675. *
  676. * WN *checkForZero(WN *block, TYPE_ID type, PREG_NUM xN, WN *if_else, WN *value)
  677. *
  678. * Create test block for zero
  679. * if (x==0)
  680. * { ret = 0; }
  681. * else
  682. * { ret = value }
  683. * return ret;
  684. *
  685. * ==================================================================== */
  686. static WN *checkForZero(WN *block, TYPE_ID type, PREG_NUM xN, WN *if_else, WN *value)
  687. {
  688. TYPE_ID rtype = WN_rtype(value);
  689. WN *if_then;
  690. PREG_NUM retN;
  691. if_then = WN_CreateBlock();
  692. retN = AssignExpr(if_then, WN_Zerocon(rtype), rtype);
  693. {
  694. WN *st;
  695. st = WN_StidIntoPreg(rtype, retN, MTYPE_To_PREG(rtype), value);
  696. WN_INSERT_BlockLast(if_else, st);
  697. }
  698. {
  699. WN *cond, *IF;
  700. Is_True(MTYPE_is_float(type), ("unexpected type"));
  701. cond = WN_EQ(type,
  702. WN_LdidPreg(type, xN),
  703. WN_Zerocon(type));
  704. IF = WN_CreateIf(cond, if_then, if_else);
  705. WN_INSERT_BlockLast(block, IF);
  706. }
  707. return WN_LdidPreg(rtype, retN);
  708. }
  709. /* ====================================================================
  710. *
  711. * WN * WN_arg(WN *tree, INT32 arg)
  712. *
  713. * return Nth kid , skiping PARM
  714. * ==================================================================== */
  715. static WN *WN_arg(WN *tree, INT32 arg)
  716. {
  717. WN *child= WN_kid(tree, arg);
  718. if (WN_operator_is(child, OPR_PARM))
  719. {
  720. return WN_kid0(child);
  721. }
  722. return child;
  723. }
  724. static WN *em_clen(WN *block, WN *len)
  725. {
  726. return len;
  727. }
  728. /*
  729. **
  730. ** Auxillary routine to implement ( x + .5 * sign(x) )
  731. */
  732. static WN *aux_nearest(TYPE_ID rtype, PREG_NUM xN)
  733. {
  734. WN *rel, *select;
  735. rel = WN_GE(rtype, WN_LdidPreg(rtype, xN), WN_Zerocon(rtype));
  736. select = WN_Select(rtype,
  737. rel,
  738. WN_Floatconst(rtype, .5),
  739. WN_Floatconst(rtype, -.5));
  740. return WN_Add(rtype, WN_LdidPreg(rtype, xN), select);
  741. }
  742. /*
  743. ** Auxillary routine for Convert ( {Round,Trunc}(rtype) )
  744. */
  745. static WN *aux_CvtRnd(TYPE_ID rtype, WN *x)
  746. {
  747. WN *rnd;
  748. TYPE_ID intToFloat = (Slow_CVTDL) ? MTYPE_I4 : MTYPE_I8;
  749. // Needed for correctness, no matter how slow the truncate
  750. if (WN_rtype(x) != MTYPE_F4) {
  751. intToFloat = MTYPE_I8;
  752. }
  753. rnd = WN_Rnd(rtype, intToFloat, x);
  754. return WN_Cvt(intToFloat, rtype, rnd);
  755. }
  756. static WN *aux_CvtTrunc(TYPE_ID rtype, WN *x)
  757. {
  758. WN *trunc;
  759. TYPE_ID intToFloat = (Slow_CVTDL) ? MTYPE_I4 : MTYPE_I8;
  760. /*
  761. * this is em_aint()
  762. */
  763. // Needed for correctness, no matter how slow the truncate
  764. if (WN_rtype(x) != MTYPE_F4) {
  765. intToFloat = MTYPE_I8;
  766. }
  767. trunc = WN_Trunc(rtype, intToFloat, x);
  768. return WN_Cvt(intToFloat, rtype, trunc);
  769. }
  770. /*
  771. ** Optimizer cannot deal with zero length mstore so return BLOCK
  772. */
  773. static WN *aux_CreateMstore(WN_OFFSET offset, TY_IDX type, WN *value, WN *addr,
  774. WN *size)
  775. {
  776. if (Is_Integer_Constant(size) && WN_const_val(size) <= 0)
  777. {
  778. /* Cannot delete these nodes, since they are used later (bug 623566)
  779. WN_Delete(value);
  780. WN_Delete(addr);
  781. WN_Delete(size);
  782. */
  783. return WN_CreateBlock();
  784. }
  785. UINT64 ty_size = TY_size(TY_pointed(type));
  786. if (ty_size != 0 && WN_const_val (size) % ty_size != 0) {
  787. // size copied is not a multiple of the size of the type, which means
  788. // that we are copying part of the type. We then change the pointer
  789. // to (void*)
  790. static TY_IDX void_star = TY_IDX_ZERO;
  791. if (void_star == TY_IDX_ZERO)
  792. void_star = Make_Pointer_Type (MTYPE_To_TY (MTYPE_V));
  793. Set_TY_IDX_index (type, TY_IDX_index (void_star));
  794. }
  795. return WN_CreateMstore(offset, type, value, addr, size);
  796. }
  797. /*
  798. **
  799. ** Notes for the following functions:
  800. **
  801. ** [1] Fast_trunc_Allowed (currently when Roundoff_Level >= ROUNDOFF_SIMPLE)
  802. ** generate trunc. This will fail when (-2**63 <= |x| < 2**63-1)
  803. **
  804. ** [2] Test x against TWO_EXP
  805. ** Floating point value is such that (x+1 == x), ie. there is no
  806. ** possible fractional value ie.
  807. ** 2**23 <= |x| return x
  808. **
  809. ** It is possible (if necessary) to special case MTYPE_F4 and generate
  810. ** a trunc to MTYPE_I4.
  811. **/
  812. #define TWO_EXP_23 8388608.0
  813. #define TWO_EXP_52 4503599627370496.0
  814. /*
  815. **
  816. ** INTRN_I2F4NINT:
  817. ** INTRN_I4F4NINT:
  818. ** INTRN_I8F4NINT:
  819. ** INTRN_I2F8IDNINT:
  820. ** INTRN_I4F8IDNINT:
  821. ** INTRN_I8F8IDNINT:
  822. ** INTRN_I2FQIQNINT:
  823. ** INTRN_I4FQIQNINT:
  824. ** INTRN_I8FQIQNINT:
  825. ** INTRN_I2F16IQNINT:
  826. ** INTRN_I4F16IQNINT:
  827. ** INTRN_I8F16IQNINT:
  828. **
  829. ** change into
  830. ** rnd(x) roundoff >= 3
  831. ** trunc( x + .5 * sign(x) )
  832. */
  833. static WN *em_nearest_int(WN *block, TYPE_ID rtype, WN *x)
  834. {
  835. TYPE_ID type = WN_rtype(x);
  836. if (Fast_NINT_Allowed)
  837. {
  838. return WN_Rnd(type, rtype, x);
  839. }
  840. else if ((type == MTYPE_F4) || (type == MTYPE_F8))
  841. {
  842. WN *add;
  843. PREG_NUM xN;
  844. xN = AssignExpr(block, x, type);
  845. add = aux_nearest(type, xN);
  846. if (Fast_trunc_Allowed)
  847. {
  848. return WN_Trunc(type, rtype, add);
  849. }
  850. else
  851. {
  852. WN *rel, *select;
  853. double con= (type==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;
  854. rel = WN_GE(type,
  855. WN_Abs(type, WN_LdidPreg(type, xN)),
  856. WN_Floatconst(type, con));
  857. select = WN_Select(type, rel, WN_LdidPreg(type, xN), add);
  858. return WN_Trunc(type, rtype, select);
  859. }
  860. }
  861. else
  862. {
  863. return NULL;
  864. }
  865. }
  866. /*
  867. **
  868. ** INTRN_F4ANINT:
  869. ** INTRN_F8ANINT:
  870. ** INTRN_FQANINT:
  871. ** INTRN_F16ANINT:
  872. **
  873. ** change into
  874. ** cvt (float, trunc( x + .5 * sign(x) )) roundoff>= 3
  875. */
  876. static WN *em_nearest_aint(WN *block, TYPE_ID rtype, WN *x)
  877. {
  878. if (Fast_NINT_Allowed)
  879. {
  880. return aux_CvtRnd(rtype, x);
  881. }
  882. else if ((rtype == MTYPE_F4) || (rtype == MTYPE_F8))
  883. {
  884. PREG_NUM xN;
  885. WN *add, *cvt;
  886. xN = AssignExpr(block, x, rtype);
  887. add = aux_nearest(rtype, xN);
  888. /*
  889. * this is em_aint()
  890. */
  891. cvt = aux_CvtTrunc(rtype, add);
  892. if (Fast_trunc_Allowed)
  893. {
  894. return cvt;
  895. }
  896. else
  897. {
  898. WN *rel;
  899. double con= (rtype==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;
  900. rel = WN_GE(rtype,
  901. WN_Abs(rtype, WN_LdidPreg(rtype, xN)),
  902. WN_Floatconst(rtype, con));
  903. return WN_Select(rtype, rel, WN_LdidPreg(rtype, xN), cvt);
  904. }
  905. }
  906. return NULL;
  907. }
  908. /*
  909. **
  910. ** INTRN_F4AINT
  911. ** INTRN_F8AINT
  912. ** INTRN_FQAINT
  913. ** INTRN_F16AINT
  914. **
  915. ** change into
  916. ** cvt (float, trunc(x))
  917. */
  918. static WN *em_aint(WN *block, TYPE_ID rtype, WN *x)
  919. {
  920. if (Fast_trunc_Allowed)
  921. {
  922. return aux_CvtTrunc(rtype, x);
  923. }
  924. else if ((rtype == MTYPE_F4) || (rtype == MTYPE_F8))
  925. {
  926. PREG_NUM xN;
  927. WN *rel, *cvt;
  928. double con= (rtype==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;
  929. xN = AssignExpr(block, x, rtype);
  930. rel = WN_GE(rtype,
  931. WN_Abs(rtype, WN_LdidPreg(rtype, xN)),
  932. WN_Floatconst(rtype, con));
  933. cvt = aux_CvtTrunc(rtype, WN_LdidPreg(rtype, xN));
  934. return WN_Select(rtype, rel, WN_LdidPreg(rtype, xN), cvt);
  935. }
  936. else
  937. {
  938. return NULL;
  939. }
  940. }
  941. /*
  942. **
  943. ** change into
  944. ** | x | if y >= 0
  945. ** - | x | if y < 0
  946. **
  947. ** --> absN = | x |;
  948. ** --> (y>=0) ? absN : -absN;
  949. */
  950. static WN *em_sign(WN *block, WN *x, WN *y)
  951. {
  952. PREG_NUM absN;
  953. TYPE_ID type = WN_rtype(x);
  954. WN *abs, *select;
  955. #ifdef KEY // bug 9660
  956. if (MTYPE_is_integral(type) && ! MTYPE_signed(type))
  957. type = Mtype_TransferSign(MTYPE_I4, type);
  958. #endif
  959. #ifdef KEY // bug 12052
  960. if (MTYPE_is_integral(type) &&
  961. MTYPE_byte_size(type) < MTYPE_byte_size(WN_rtype(y)))
  962. type = Mtype_TransferSize(WN_rtype(y), type);
  963. #endif
  964. abs = WN_Abs(type, x);
  965. absN = AssignExpr(block, abs, type);
  966. select = WN_Select(type,
  967. WN_GE(type, y, WN_Zerocon(type)),
  968. WN_LdidPreg(type, absN),
  969. WN_Neg(type, WN_LdidPreg(type, absN)));
  970. return select;
  971. }
  972. /*
  973. **
  974. ** change into
  975. ** cvt (x) * cvt(y)
  976. */
  977. static WN *em_prod(WN *block, TYPE_ID rtype, WN *x, WN *y)
  978. {
  979. TYPE_ID type = WN_rtype(x);
  980. WN *mpy;
  981. mpy = WN_Mpy(rtype,
  982. WN_Cvt(type, rtype, x),
  983. WN_Cvt(type, rtype, y));
  984. return mpy;
  985. }
  986. /*
  987. **
  988. ** change into
  989. ** (x>y) ? (x-y) : 0
  990. */
  991. static WN *em_dim(WN *block, WN *x, WN *y)
  992. {
  993. PREG_NUM xN, yN;
  994. TYPE_ID type = WN_rtype(x);
  995. WN *rel, *sub, *select;
  996. xN = AssignExpr(block, x, type);
  997. yN = AssignExpr(block, y, type);
  998. rel = WN_GT(type,
  999. WN_LdidPreg(type, xN),
  1000. WN_LdidPreg(type, yN));
  1001. sub = WN_Sub(type,
  1002. WN_LdidPreg(type, xN),
  1003. WN_LdidPreg(type, yN));
  1004. select = WN_Select(type,
  1005. rel,
  1006. sub,
  1007. WN_Zerocon(type));
  1008. return select;
  1009. }
  1010. /*
  1011. **
  1012. ** change into
  1013. ** x - y * ( FLOAT ( |(x / y)| ))
  1014. */
  1015. static WN *em_mod_float(WN *block, WN *x, WN *y)
  1016. {
  1017. PREG_NUM xN, yN;
  1018. TYPE_ID type = WN_rtype(x);
  1019. WN *div, *cvt, *mpy, *sub;
  1020. if ((type == MTYPE_F4) || (type == MTYPE_F8)) {
  1021. xN = AssignExpr(block, x, type);
  1022. yN = AssignExpr(block, y, type);
  1023. div = WN_Div(type,
  1024. WN_LdidPreg(type, xN),
  1025. WN_LdidPreg(type, yN));
  1026. cvt = em_aint(block, type, div);
  1027. mpy = WN_Mpy(type,
  1028. WN_LdidPreg(type, yN),
  1029. cvt);
  1030. sub = WN_Sub(type,
  1031. WN_LdidPreg(type, xN),
  1032. mpy);
  1033. return sub;
  1034. } else {
  1035. return NULL;
  1036. }
  1037. }
  1038. /*
  1039. ** WN *build_mult_tree(block, TYPE_ID type, PREG_NUM xN, int pow)
  1040. **
  1041. ** Build a multiply tree to make shipiro happy.
  1042. **
  1043. ** Actually, create a series of temporaries to hold the powers that be.
  1044. **
  1045. ** ex. x ** 9 (= 1001)
  1046. ** t0= x;
  1047. ** t1= t0*t0; (x**2)
  1048. ** t2= t1*t1; (x**4)
  1049. ** t3= t2*t2; (x**8)
  1050. ** ans = t3 * t0;
  1051. **
  1052. */
  1053. #define BIT_IS_ON(x,i) ((x) & (1<<(i)))
  1054. static WN *build_mult_tree(WN *block, TYPE_ID type, PREG_NUM xN, INT32 pow)
  1055. {
  1056. PREG_NUM powers[16]; /* could handle pow = 64k */
  1057. INT32 i, n = 0;
  1058. PREG_NUM xNm1;
  1059. WN *tree = NULL;
  1060. Is_True((pow>0), ("expected pow>0"));
  1061. powers[n++] = xN;
  1062. xNm1 = xN;
  1063. for(i= 1; ((1<<i) <= pow); i++)
  1064. {
  1065. WN *mpy;
  1066. mpy = WN_Mpy(type, WN_LdidPreg(type, xNm1), WN_LdidPreg(type, xNm1));
  1067. xNm1 = AssignExpr(block, mpy, type);
  1068. powers[n++] = xNm1;
  1069. }
  1070. for(i= 0; ((1<<i) <= pow); i++)
  1071. {
  1072. if (BIT_IS_ON(pow, i))
  1073. {
  1074. PREG_NUM powerN = powers[i];
  1075. if (tree)
  1076. {
  1077. tree = WN_Mpy(type, tree, WN_LdidPreg(type, powerN));
  1078. }
  1079. else
  1080. {
  1081. tree = WN_LdidPreg(type, powerN);
  1082. }
  1083. }
  1084. }
  1085. return tree;
  1086. }
  1087. static WN *em_exp_float(WN *block, WN *x, WN *pow, TYPE_ID type)
  1088. {
  1089. if (Is_Constant(pow))
  1090. {
  1091. TCON con = Const_Val(pow);
  1092. BOOL sqrt, rsqrt;
  1093. #ifdef KEY
  1094. BOOL sqrt_25, rsqrt_25, sqrt_75, rsqrt_75;
  1095. BOOL cbrt_33, cbrt_66;
  1096. #endif
  1097. WN *tree, *x_copy;
  1098. double n;
  1099. /*
  1100. * for complex x verify the power is a real number
  1101. * (TODO) general complex ** complex
  1102. */
  1103. if (MTYPE_is_complex(type))
  1104. {
  1105. TCON Ipow;
  1106. Ipow = Extract_Complex_Imag(con);
  1107. if (Targ_To_Host_Float(Ipow) == 0.0)
  1108. {
  1109. con = Extract_Complex_Real(con);
  1110. }
  1111. else
  1112. {
  1113. return NULL;
  1114. }
  1115. }
  1116. n = Targ_To_Host_Float(con);
  1117. sqrt = rsqrt = FALSE;
  1118. cbrt_33 = cbrt_66 = FALSE;
  1119. #ifdef KEY
  1120. sqrt_25 = rsqrt_25 = sqrt_75 = rsqrt_75 = FALSE;
  1121. #endif
  1122. if (trunc(n) == n)
  1123. {
  1124. ;
  1125. }
  1126. else if ((trunc(ABS(n))+.5) == ABS(n))
  1127. {
  1128. /*
  1129. * if we need to multiply by sqrt we need a copy of x
  1130. * as it might get changed underneath us.
  1131. */
  1132. if (n<0)
  1133. rsqrt = TRUE;
  1134. else
  1135. sqrt = TRUE;
  1136. x_copy = WN_COPY_Tree(x);
  1137. }
  1138. #ifdef KEY
  1139. else if ((trunc(ABS(n))+.25) == ABS(n))
  1140. {
  1141. /*
  1142. * if we need to multiply by sqrt we need a copy of x
  1143. * as it might get changed underneath us.
  1144. */
  1145. if (n<0)
  1146. rsqrt_25 = TRUE;
  1147. else
  1148. sqrt_25 = TRUE;
  1149. x_copy = WN_COPY_Tree(x);
  1150. }
  1151. else if ((trunc(ABS(n))+.75) == ABS(n))
  1152. {
  1153. /*
  1154. * if we need to multiply by sqrt we need a copy of x
  1155. * as it might get changed underneath us.
  1156. */
  1157. if (n<0)
  1158. rsqrt_75 = TRUE;
  1159. else
  1160. sqrt_75 = TRUE;
  1161. x_copy = WN_COPY_Tree(x);
  1162. }
  1163. #ifdef TARG_X8664
  1164. else if (ABS((trunc(n)+1.0/3) - n) < .0000001 &&
  1165. ! (Is_Target_64bit() && !Is_Target_Anyx86() && OPT_Fast_Math))
  1166. { // the pow in fast_math is faster than cbrt, so no point converting
  1167. cbrt_33 = TRUE;
  1168. x_copy = WN_COPY_Tree(x);
  1169. }
  1170. else if (ABS((trunc(n)+2.0/3) - n) < .0000001 &&
  1171. ! (Is_Target_64bit() && !Is_Target_Anyx86() && OPT_Fast_Math))
  1172. { // the pow in fast_math is faster than cbrt, so no point converting
  1173. cbrt_66 = TRUE;
  1174. x_copy = WN_COPY_Tree(x);
  1175. }
  1176. #endif
  1177. #endif
  1178. else
  1179. {
  1180. return NULL;
  1181. }
  1182. {
  1183. WN *ipow = WN_Intconst(MTYPE_I4, (INT64) trunc(n));
  1184. tree = em_exp_int(block, x, ipow, type);
  1185. }
  1186. if (sqrt || rsqrt)
  1187. {
  1188. #ifdef KEY
  1189. // bug 4824: non-constant float x could be negative
  1190. // bug 4990: Do the check only for C/C++ and if
  1191. // -fmath-errno (-LANG:math_errno=on)
  1192. if (!PU_f77_lang (Get_Current_PU()) &&
  1193. !PU_f90_lang (Get_Current_PU()) && // ! Fortran
  1194. LANG_Math_Errno && // -fmath-errno
  1195. MTYPE_is_float (WN_rtype (x_copy)) &&
  1196. (!Is_Constant (x_copy) ||
  1197. Targ_To_Host_Float (Const_Val (x_copy)) < 0))
  1198. return NULL;
  1199. #endif // KEY
  1200. #ifdef TARG_X8664
  1201. // Bug 5935 - rsqrtsd or rsqrtpd is absent.
  1202. if (rsqrt && (type == MTYPE_F8 || type == MTYPE_V16F8))
  1203. return NULL;
  1204. #endif
  1205. if (tree)
  1206. {
  1207. /*
  1208. * x ** n+.5 -> (x**n) * (x**.5)
  1209. * where the function em_exp_int has already evaluated
  1210. */
  1211. PREG_NUM xN, treeN;
  1212. WN *fractional;
  1213. xN = AssignExpr(block, x_copy, type);
  1214. treeN = AssignExpr(block, tree, type);
  1215. fractional = (sqrt) ? WN_Sqrt(type, WN_LdidPreg(type, xN)) :
  1216. WN_Rsqrt(type, WN_LdidPreg(type, xN));
  1217. tree = WN_Mpy(type,
  1218. WN_LdidPreg(type, treeN),
  1219. fractional);
  1220. }
  1221. }
  1222. #ifdef KEY // bug 6932
  1223. // evaluate (x**0.25) as sqrt(sqrt(x))
  1224. if (sqrt_25 || rsqrt_25)
  1225. {
  1226. if (!PU_f77_lang (Get_Current_PU()) &&
  1227. !PU_f90_lang (Get_Current_PU()) && // ! Fortran
  1228. LANG_Math_Errno && // -fmath-errno
  1229. MTYPE_is_float (WN_rtype (x_copy)) &&
  1230. (!Is_Constant (x_copy) ||
  1231. Targ_To_Host_Float (Const_Val (x_copy)) < 0))
  1232. return NULL;
  1233. #ifdef TARG_X8664
  1234. // rsqrtsd or rsqrtpd is absent.
  1235. if (rsqrt_25 && (type == MTYPE_F8 || type == MTYPE_V16F8))
  1236. return NULL;
  1237. #endif
  1238. if (tree)
  1239. {
  1240. /*
  1241. * x ** n+.25 -> (x**n) * (x**.25)
  1242. * where the function em_exp_int has already evaluated
  1243. */
  1244. PREG_NUM xN, treeN;
  1245. WN *fractional;
  1246. xN = AssignExpr(block, x_copy, type);
  1247. treeN = AssignExpr(block, tree, type);
  1248. if (sqrt_25)
  1249. fractional = WN_Sqrt(type, WN_Sqrt(type, WN_LdidPreg(type, xN)));
  1250. else
  1251. fractional = WN_Sqrt(type, WN_Rsqrt(type, WN_LdidPreg(type, xN)));
  1252. tree = WN_Mpy(type,
  1253. WN_LdidPreg(type, treeN),
  1254. fractional);
  1255. }
  1256. }
  1257. // evaluate (x**0.75) as sqrt(x)*sqrt(sqrt(x))
  1258. if (sqrt_75 || rsqrt_75)
  1259. {
  1260. if (!PU_f77_lang (Get_Current_PU()) &&
  1261. !PU_f90_lang (Get_Current_PU()) && // ! Fortran
  1262. LANG_Math_Errno && // -fmath-errno
  1263. MTYPE_is_float (WN_rtype (x_copy)) &&
  1264. (!Is_Constant (x_copy) ||
  1265. Targ_To_Host_Float (Const_Val (x_copy)) < 0))
  1266. return NULL;
  1267. #ifdef TARG_X8664
  1268. // rsqrtsd or rsqrtpd is absent.
  1269. if (rsqrt_75 && (type == MTYPE_F8 || type == MTYPE_V16F8))
  1270. return NULL;
  1271. #endif
  1272. if (tree)
  1273. {
  1274. /*
  1275. * x ** n+.75 -> (x**n) * (x**.75)
  1276. * where the function em_exp_int has already evaluated
  1277. */
  1278. PREG_NUM xN, treeN;
  1279. WN *fractional;
  1280. xN = AssignExpr(block, x_copy, type);
  1281. treeN = AssignExpr(block, tree, type);
  1282. if (sqrt_75)
  1283. fractional = WN_Mpy(type,
  1284. WN_Sqrt(type, WN_LdidPreg(type, xN)),
  1285. WN_Sqrt(type,
  1286. WN_Sqrt(type, WN_LdidPreg(type, xN))));
  1287. else
  1288. fractional = WN_Mpy(type,
  1289. WN_Rsqrt(type, WN_LdidPreg(type, xN)),
  1290. WN_Rsqrt(type,
  1291. WN_Sqrt(type, WN_LdidPreg(type, xN))));
  1292. tree = WN_Mpy(type,
  1293. WN_LdidPreg(type, treeN),
  1294. fractional);
  1295. }
  1296. }
  1297. // evaluate (x**0.333333) by calling cbrt()/cbrtf()
  1298. if (cbrt_33 || cbrt_66)
  1299. {
  1300. if (type != MTYPE_F4 && type != MTYPE_F8)
  1301. return NULL;
  1302. if (tree)
  1303. {
  1304. /*
  1305. * x ** n+1/3 -> (x**n) * (x**1/3)
  1306. * where the function em_exp_int has already evaluated
  1307. */
  1308. PREG_NUM xN = AssignExpr(block, x_copy, type);
  1309. WN *kid = WN_CreateParm(type, WN_LdidPreg(type, xN), Be_Type_Tbl(type),
  1310. WN_PARM_BY_VALUE | WN_PARM_READ_ONLY);
  1311. WN* fraction = WN_Create_Intrinsic(
  1312. OPCODE_make_op(OPR_INTRINSIC_OP, type, MTYPE_V),
  1313. type == MTYPE_F4 ? INTRN_F4CBRT : INTRN_F8CBRT,
  1314. 1, &kid);
  1315. if (cbrt_66) {
  1316. PREG_NUM x13 = AssignExpr(block, fraction, type);
  1317. fraction = WN_Mpy(type, WN_LdidPreg(type, x13),
  1318. WN_LdidPreg(type, x13));
  1319. }
  1320. tree = WN_Mpy(type, tree, fraction);
  1321. }
  1322. }
  1323. #endif
  1324. return tree;
  1325. }
  1326. return NULL;
  1327. }
  1328. static WN *em_exp_int(WN *block, WN *x, WN *pow, TYPE_ID type)
  1329. {
  1330. if (Is_Integer_Constant(pow))
  1331. {
  1332. INT32 n = WN_const_val(pow);
  1333. INT32 absN = ABS(n);
  1334. WN *exp= NULL;
  1335. if (em_exp_int_max < absN)
  1336. return NULL;
  1337. switch(n) {
  1338. case 1:
  1339. exp = x;
  1340. break;
  1341. case -1:
  1342. exp = WN_Inverse(type, x);
  1343. break;
  1344. case 0:
  1345. if (MTYPE_type_class(type) & MTYPE_CLASS_INTEGER)
  1346. exp = WN_Intconst(type, 1);
  1347. else
  1348. exp = WN_Floatconst(type, 1.0);
  1349. break;
  1350. case 2:
  1351. {
  1352. PREG_NUM xN;
  1353. xN = AssignExpr(block, x, type);
  1354. exp = WN_Mpy(type,
  1355. WN_LdidPreg(type, xN),
  1356. WN_LdidPreg(type, xN));
  1357. break;
  1358. }
  1359. default:
  1360. {
  1361. PREG_NUM xN;
  1362. if (Fast_Exp_Allowed)
  1363. {
  1364. xN = AssignExpr(block, x, type);
  1365. exp = build_mult_tree(block, type, xN, absN);
  1366. WN_Delete(pow);
  1367. if (n < 0)
  1368. exp = WN_Inverse(type, exp);
  1369. }
  1370. }
  1371. }
  1372. return exp;
  1373. }
  1374. else if (Is_Integer_Constant(x))
  1375. {
  1376. /*
  1377. * Optimize {-2,-1,0,1,2} ** n
  1378. */
  1379. INT32 val = WN_const_val(x);
  1380. switch(val)
  1381. {
  1382. case -2:
  1383. {
  1384. /*
  1385. * (n>=0) ? ( (n&1) ? - (1<<n) : 1<<n ) : 0
  1386. */
  1387. PREG_NUM powN, shlN;
  1388. WN *shl, *band, *cond, *select, *ge;
  1389. powN = AssignExpr(block, pow, type);
  1390. shl = WN_Shl(type,
  1391. WN_Intconst(type, 1),
  1392. WN_LdidPreg(type, powN));
  1393. shlN = AssignExpr(block, shl, type);
  1394. band = WN_Band(type,
  1395. WN_LdidPreg(type, powN),
  1396. WN_Intconst(type, 1));
  1397. cond = WN_EQ(type, band, WN_Zerocon(type));
  1398. select = WN_Select(type,
  1399. cond,
  1400. WN_LdidPreg(type, shlN),
  1401. WN_Neg(type, WN_LdidPreg(type, shlN)));
  1402. ge = WN_GE(type,
  1403. WN_LdidPreg(type, powN),
  1404. WN_Zerocon(type));
  1405. return WN_Select(type,
  1406. ge,
  1407. select,
  1408. WN_Zerocon(type));
  1409. }
  1410. case -1:
  1411. {
  1412. /*
  1413. * (n&1) ? -1 : 1;
  1414. */
  1415. WN *band;
  1416. band = WN_Band(type, pow, WN_Intconst(type, 1));
  1417. return WN_Select(type,
  1418. WN_EQ(type, band, WN_Zerocon(type)),
  1419. WN_Intconst(type, 1),
  1420. WN_Intconst(type, -1));
  1421. }
  1422. case 0:
  1423. /*
  1424. * (n==0) ? 1 : 0
  1425. * simpler is (n==0)
  1426. */
  1427. return WN_EQ(type, pow, WN_Zerocon(type));
  1428. case 1:
  1429. /*
  1430. * always and forever 1
  1431. */
  1432. return WN_Intconst(type, 1);
  1433. case 2:
  1434. {
  1435. /*
  1436. * (n>=0) ? 1<<n : 0
  1437. * simpler is (n>=0) << n
  1438. */
  1439. WN *ge;
  1440. PREG_NUM powN;
  1441. powN = AssignExpr(block, pow, type);
  1442. ge = WN_GE(type,
  1443. WN_LdidPreg(type, powN),
  1444. WN_Zerocon(type));
  1445. return WN_Shl(type,
  1446. ge,
  1447. WN_LdidPreg(type, powN));
  1448. }
  1449. }
  1450. }
  1451. return NULL;
  1452. }
  1453. /*
  1454. ** quad negate looks like complex negate
  1455. **
  1456. ** if q = (x,y) then
  1457. ** -q = (-x, -y)
  1458. **
  1459. ** TODO nail down preg offset interface
  1460. ** Bug 12895: MIPS quad represents ieee 128, so -q = (-x, y)
  1461. */
  1462. static WN *em_quad_neg(WN *block, WN *tree)
  1463. {
  1464. TYPE_ID newType;
  1465. TYPE_ID type = WN_rtype(tree);
  1466. PREG_NUM qN, qNlo;
  1467. /*
  1468. * assign a quad preg temp as we will be referencing twice (sortof)
  1469. */
  1470. qN = AssignExpr(block, WN_kid0(tree), type);
  1471. if (MTYPE_is_complex(type))
  1472. {
  1473. newType = MTYPE_C8;
  1474. qNlo = qN+2;
  1475. }
  1476. else /* assume MTYPE_FQ or MTYPE_F16 */
  1477. {
  1478. newType = MTYPE_F8;
  1479. qNlo = qN+1;
  1480. }
  1481. {
  1482. WN *wn, *st;
  1483. ST *npreg = MTYPE_To_PREG(newType);
  1484. #ifdef TARG_MIPS
  1485. wn = WN_LdidPreg(newType, qN); // Bug 12895
  1486. #else
  1487. wn = WN_Neg(newType, WN_LdidPreg(newType, qN));
  1488. #endif
  1489. st = WN_StidIntoPreg(newType, qN, npreg, wn);
  1490. WN_INSERT_BlockLast(block, st);
  1491. wn = WN_Neg(newType, WN_LdidPreg(newType, qNlo));
  1492. st = WN_StidIntoPreg(newType, qNlo, npreg, wn);
  1493. WN_INSERT_BlockLast(block, st);
  1494. }
  1495. WN_Delete(tree);
  1496. return WN_LdidPreg(type, qN);
  1497. }
  1498. static WN *em_quad_abs(WN *block, WN *tree)
  1499. {
  1500. TYPE_ID newType;
  1501. TYPE_ID type = WN_rtype(tree);
  1502. PREG_NUM qN, qNlo;
  1503. /*
  1504. * assign a quad preg temp as we will be referencing twice (sortof)
  1505. */
  1506. qN = AssignExpr(block, WN_kid0(tree), type);
  1507. Is_True(! MTYPE_is_complex(type), ("em_quad_abs emulates FQ not CQ"));
  1508. newType = MTYPE_F8;
  1509. qNlo = qN+1;
  1510. {
  1511. WN *wn, *st;
  1512. ST *npreg = MTYPE_To_PREG(newType);
  1513. #ifdef TARG_MIPS
  1514. wn = WN_LdidPreg(newType, qN); // Bug 12895
  1515. #else
  1516. wn = WN_Abs(newType, WN_LdidPreg(newType, qN));
  1517. #endif
  1518. st = WN_StidIntoPreg(newType, qN, npreg, wn);
  1519. WN_INSERT_BlockLast(block, st);
  1520. wn = WN_Abs(newType, WN_LdidPreg(newType, qNlo));
  1521. st = WN_StidIntoPreg(newType, qNlo, npreg, wn);
  1522. WN_INSERT_BlockLast(block, st);
  1523. }
  1524. WN_Delete(tree);
  1525. return WN_LdidPreg(type, qN);
  1526. }
  1527. /*
  1528. ** There is no no native quad select, so we must turn the
  1529. ** expression back into an if/else block
  1530. **
  1531. ** select: (cond) ? exp1 : exp2
  1532. **
  1533. ** --> if (cond) qN = exp1;
  1534. ** else qN = exp2;
  1535. ** return qN
  1536. **
  1537. */
  1538. static WN *em_split_select(WN *block, WN *tree)
  1539. {
  1540. TYPE_ID rtype = WN_rtype(tree);
  1541. PREG_NUM qN;
  1542. WN *if_then, *if_else;
  1543. if_then = WN_CreateBlock();
  1544. if_else = WN_CreateBlock();
  1545. {
  1546. WN *exp1 = WN_kid1(tree);
  1547. qN = AssignExpr(if_then, exp1, rtype);
  1548. }
  1549. {
  1550. WN *wn;
  1551. WN *exp2 = WN_kid2(tree);
  1552. ST *preg = MTYPE_To_PREG(rtype);
  1553. wn = WN_StidIntoPreg(rtype, qN, preg, exp2);
  1554. WN_INSERT_BlockLast(if_else, wn);
  1555. }
  1556. {
  1557. WN *IF;
  1558. WN *cond = WN_kid0(tree);
  1559. IF = WN_CreateIf(cond, if_then, if_else);
  1560. WN_INSERT_BlockLast(block, IF);
  1561. }
  1562. WN_Delete(tree);
  1563. return WN_LdidPreg(rtype, qN);
  1564. }
  1565. /*
  1566. ** Evaluate the following function
  1567. **
  1568. ** Definition
  1569. ** x y INTRN_DIVFLOOR INTRN_DIVCEIL
  1570. ** --- -------------- -------------
  1571. ** + + x / y (x+y-1) / y
  1572. **
  1573. ** - - x / y (x+y+1) / y
  1574. **
  1575. ** + - (x+ -1-y)/y x / y
  1576. **
  1577. ** - + (x+ 1-y)/y x / y
  1578. **
  1579. **
  1580. ** The issue was to evaulate (divfloor) without branch code.
  1581. **
  1582. ** Tricks
  1583. ** f(x) = -1 (x<0)
  1584. ** +1 (x>=0)
  1585. ** {
  1586. ** t= x>>31;
  1587. ** f= t+t+1
  1588. ** }
  1589. ** MASK(x,y,v)= 0 (x>=0, y>=0), (x<0, y<0) ++, --
  1590. ** v (x>=0, y<0), (x<0, y>=0) +-, -+
  1591. ** {
  1592. ** t= (x^y)>>31
  1593. ** MASK= t & v
  1594. ** }
  1595. **
  1596. ** The cleverness (shapiro's) was the composition of these functions
  1597. ** to evaluate divfloor.
  1598. **
  1599. ** DIVFLOOR(x,y)=
  1600. ** v = f(y) - y; (-1-y) [+-], (+1-y) [-+]
  1601. ** (x + MASK(x,y,v)) / y
  1602. **
  1603. ** DIVCEIL(x,y) = -DIVFLOOR(-x,y)
  1604. **
  1605. ** x,y are assumed integral or we could just do a divide/floor
  1606. **
  1607. **
  1608. */
  1609. static WN *em_divfloor(WN *block, TYPE_ID type, WN *x, WN *y)
  1610. {
  1611. PREG_NUM xN, yN;
  1612. WN *numer, *div;
  1613. Is_True((MTYPE_is_integral(WN_rtype(x)) &&
  1614. MTYPE_is_integral(WN_rtype(y))),
  1615. ("em_divfloor() arguments should be type integral"));
  1616. xN = AssignExpr(block, x, type);
  1617. yN = AssignExpr(block, y, type);
  1618. {
  1619. /*
  1620. * one = 1 (y >= 0)
  1621. * -1 (y < 0)
  1622. */
  1623. TYPE_ID ytype = WN_rtype(y);
  1624. WN *sra, *add, *one, *bxor, *mask, *sub, *band;
  1625. #ifdef TARG_X8664
  1626. // Bug 3264 - This algorithm requires that byte size be identical for
  1627. // ytype and type, for zero-extended 64-bit target ISA.
  1628. if (MTYPE_is_unsigned(ytype) &&
  1629. MTYPE_byte_size(ytype) < MTYPE_byte_size(type))
  1630. ytype = type;
  1631. #endif
  1632. sra = WN_Ashr(type,
  1633. WN_LdidPreg(type, yN),
  1634. WN_Intconst(type, MTYPE_size_reg(ytype)-1));
  1635. add = WN_Add(type,
  1636. sra,
  1637. WN_COPY_Tree(sra));
  1638. one = WN_Add(ytype,
  1639. add,
  1640. WN_Intconst(ytype, 1));
  1641. /*
  1642. * mask = 0 (x,y)= ++ --
  1643. * mask = -1 (x,y)= +- +-
  1644. */
  1645. bxor = WN_Bxor(ytype,
  1646. WN_LdidPreg(type, xN),
  1647. WN_LdidPreg(type, yN));
  1648. mask = WN_Ashr(type,
  1649. bxor,
  1650. WN_Intconst(type, MTYPE_size_reg(type)-1));
  1651. /*
  1652. * sub = 1 - y (y >= 0)
  1653. * -1 - y (y < 0)
  1654. */
  1655. sub = WN_Sub(type, one, WN_LdidPreg(type, yN));
  1656. band = WN_Band(type, sub, mask);
  1657. numer = WN_Add(type, band, WN_LdidPreg(type, xN));
  1658. }
  1659. div = WN_Div(type, numer, WN_LdidPreg(type, yN));
  1660. return div;
  1661. }
  1662. /*
  1663. ** Evaluate the following function
  1664. **
  1665. ** divceil(x,y)
  1666. ** (x)(y)
  1667. ** + + (x + (y-1) / y
  1668. ** - - (x + (y+1) / y
  1669. ** + - x / y
  1670. ** - + x / y
  1671. **
  1672. ** x,y are assumed integral or we could just do a divide/floor
  1673. **
  1674. ** for now please note the identify
  1675. **
  1676. ** divceil(x,y)= - divfloor(-x, y)
  1677. */
  1678. static WN *em_divceil(WN *block, TYPE_ID type, WN *x, WN*y)
  1679. {
  1680. WN *divfloor;
  1681. divfloor = em_divfloor(block, type, WN_Neg( WN_rtype(x), x), y);
  1682. return WN_Neg(type, divfloor);
  1683. }
  1684. /* ====================================================================
  1685. *
  1686. * em_alloca
  1687. * lower the alloca intrinsic call
  1688. *
  1689. * _builtin_alloca(size)
  1690. *
  1691. * to the sequence of whirl trees, if the stack direction is decrement.
  1692. *
  1693. * sp = sp - soundoff(size)
  1694. * return sp + arg_build_area_size
  1695. *
  1696. * ==================================================================== */
  1697. static WN *em_alloca(WN *block, WN *tree)
  1698. {
  1699. WN *size = WN_arg(tree, 0);
  1700. TYPE_ID stype = WN_rtype(size);
  1701. TYPE_ID type = WN_rtype(tree);
  1702. BOOL stack_decrement = (Stack_Direction() == DECREMENT);
  1703. {
  1704. WN *add, *adj, *inc, *st;
  1705. INT64 stack_alignment;
  1706. ST *preg = MTYPE_To_PREG(Pointer_type);
  1707. stack_alignment = Stack_Alignment();
  1708. add = WN_Add(stype,
  1709. size,
  1710. WN_Intconst(stype, stack_alignment-1));
  1711. adj = WN_Band(stype,
  1712. add,
  1713. WN_Intconst(stype, -stack_alignment));
  1714. inc = WN_Binary(stack_decrement ? OPR_SUB : OPR_ADD,
  1715. type,
  1716. WN_LdidPreg(Pointer_type, Stack_Pointer_Preg_Offset),
  1717. adj);
  1718. st = WN_StidIntoPreg(Pointer_type, Stack_Pointer_Preg_Offset, preg, inc);
  1719. WN_INSERT_BlockLast(block, st);
  1720. }
  1721. {
  1722. /*
  1723. * Add in the build area size (now that it is known)
  1724. */
  1725. WN *ptr;
  1726. Is_True(!Get_Trace(TP_DATALAYOUT, 2),("arg build area not correct"));
  1727. ptr = WN_Binary(stack_decrement ? OPR_ADD : OPR_SUB,
  1728. type,
  1729. WN_LdidPreg(Pointer_type, Stack_Pointer_Preg_Offset),
  1730. WN_Intconst(type, Current_PU_Actual_Size));
  1731. if ( DEBUG_Trap_Uv )
  1732. {
  1733. WN *con, *mstore;
  1734. con = WN_UVConst(stype);
  1735. mstore= aux_CreateMstore(0,
  1736. Make_Pointer_Type(MTYPE_To_TY(stype), TRUE),
  1737. con,
  1738. WN_COPY_Tree(ptr),
  1739. WN_COPY_Tree(size));
  1740. WN_INSERT_BlockLast(block, mstore);
  1741. }
  1742. return ptr;
  1743. }
  1744. }
  1745. /* ====================================================================
  1746. *
  1747. * WN *em_readstackpointer(WN *block, TYPE_ID rtype)
  1748. *
  1749. * return sp
  1750. *
  1751. * ==================================================================== */
  1752. static WN *em_readstackpointer(TYPE_ID rtype)
  1753. {
  1754. return WN_LdidPreg(rtype, Stack_Pointer_Preg_Offset);
  1755. }
  1756. #ifdef KEY
  1757. /* ====================================================================
  1758. *
  1759. * WN *em_readframepointer(WN *block, TYPE_ID rtype)
  1760. *
  1761. * return sp
  1762. *
  1763. * ==================================================================== */
  1764. static WN *em_readframepointer(TYPE_ID rtype)
  1765. {
  1766. return WN_LdidPreg(rtype, Frame_Pointer_Preg_Offset);
  1767. }
  1768. #endif
  1769. /* ====================================================================
  1770. *
  1771. * WN *em_setstackpointer(WN *block, TYPE_ID rtype, WN *value)
  1772. *
  1773. * Set the stack pointer (sp) to value
  1774. *
  1775. * ==================================================================== */
  1776. static WN *em_setstackpointer(WN *block, TYPE_ID rtype, WN *value)
  1777. {
  1778. WN *stid;
  1779. stid = WN_StidIntoPreg(rtype,
  1780. Stack_Pointer_Preg_Offset,
  1781. MTYPE_To_PREG(rtype),
  1782. value);
  1783. WN_INSERT_BlockLast(block, stid);
  1784. return WN_LdidPreg(rtype, Stack_Pointer_Preg_Offset);
  1785. }
  1786. static WN *createParm(WN *x, BOOL byvalue)
  1787. {
  1788. TYPE_ID type = WN_rtype(x);
  1789. TY_IDX ty;
  1790. if (WN_operator_is(x, OPR_PARM))
  1791. {
  1792. return x;
  1793. }
  1794. ty = MTYPE_To_TY(type);
  1795. return WN_CreateParm(type, x, ty,
  1796. (byvalue ? WN_PARM_BY_VALUE : WN_PARM_BY_REFERENCE));
  1797. }
  1798. static WN *Intrinsic(TYPE_ID type, INTRINSIC id, INT32 n, WN *x, WN *y)
  1799. {
  1800. OPCODE op= OPCODE_make_op (OPR_INTRINSIC_OP, type, MTYPE_V);
  1801. WN *wn, *kids[20];
  1802. BOOL byvalue = INTRN_by_value(id);
  1803. Is_True((n<=20), ("too many arguments for Intrinsic()"));
  1804. if (x)
  1805. x = createParm(x,byvalue);
  1806. if (y)
  1807. y = createParm(y,byvalue);
  1808. kids[0]= x;
  1809. kids[1]= y;
  1810. wn = WN_Create_Intrinsic(op, id, n, kids);
  1811. return wn;
  1812. }
  1813. /* ====================================================================
  1814. *
  1815. * real = e**(rz) * cos(iz);
  1816. * imag = e**(rz) * sin(iz);
  1817. *
  1818. * ==================================================================== */
  1819. static WN *em_complex_exp(WN *block, WN *x)
  1820. {
  1821. TYPE_ID type = WN_rtype(x);
  1822. TYPE_ID rtype = Mtype_complex_to_real(type);
  1823. PREG_NUM zN, expN;
  1824. WN *cosine,*sine;
  1825. WN *exp;
  1826. WN *realpart, *imagpart ;
  1827. INTRINSIC expID;
  1828. INTRINSIC cosID,sinID;
  1829. switch(rtype) {
  1830. case MTYPE_F4: expID = INTRN_F4EXP; cosID = INTRN_F4COS; sinID = INTRN_F4SIN; break;
  1831. case MTYPE_F8: expID = INTRN_F8EXP; cosID = INTRN_F8COS; sinID = INTRN_F8SIN; break;
  1832. case MTYPE_FQ: expID = INTRN_FQEXP; cosID = INTRN_FQCOS; sinID = INTRN_FQSIN; break;
  1833. case MTYPE_F16: expID = INTRN_F16EXP; cosID = INTRN_F16COS; sinID = INTRN_F16SIN; break;
  1834. }
  1835. zN = AssignExpr(block, x, type);
  1836. exp= Intrinsic(rtype,
  1837. expID,
  1838. 1,
  1839. WN_Realpart(rtype,WN_LdidPreg(type, zN)), NULL);
  1840. expN = AssignExpr(block, exp, rtype);
  1841. cosine = Intrinsic(rtype,
  1842. cosID,
  1843. 1,
  1844. WN_Imagpart(rtype,WN_LdidPreg(type, zN)), NULL);
  1845. sine = Intrinsic(rtype,
  1846. sinID,
  1847. 1,
  1848. WN_Imagpart(rtype,WN_LdidPreg(type, zN)), NULL);
  1849. /*
  1850. * cis = cos(iz) + i*sin(iz);
  1851. *
  1852. * real = e**(rz) * REAL(cis);
  1853. * imag = e**(rz) * IMAG(cis);
  1854. */
  1855. realpart = WN_Mpy(rtype,
  1856. WN_LdidPreg(rtype, expN),
  1857. cosine);
  1858. imagpart = WN_Mpy(rtype,
  1859. WN_LdidPreg(rtype, expN),
  1860. sine);
  1861. return WN_Complex(type, realpart, imagpart);
  1862. }
  1863. /* ====================================================================
  1864. *
  1865. * WN *em_complex_cos(WN *block, WN *x)
  1866. *
  1867. * real = cos(rz) * cosh(iz);
  1868. * imag = -sin(rz) * sinh(iz);
  1869. *
  1870. * ==================================================================== */
  1871. static WN *em_complex_cos(WN *block, WN *x)
  1872. {
  1873. TYPE_ID type = WN_rtype(x);
  1874. TYPE_ID rtype = Mtype_complex_to_real(type);
  1875. PREG_NUM zN;
  1876. WN *realpart, *imagpart ;
  1877. zN = AssignExpr(block, x, type);
  1878. {
  1879. INTRINSIC cosID;
  1880. INTRINSIC coshID;
  1881. WN *cos, *cosh;
  1882. switch(rtype)
  1883. {
  1884. case MTYPE_F4:
  1885. cosID = INTRN_F4COS;
  1886. coshID = INTRN_F4COSH;
  1887. break;
  1888. case MTYPE_F8:
  1889. cosID = INTRN_F8COS;
  1890. coshID = INTRN_F8COSH;
  1891. break;
  1892. case MTYPE_FQ:
  1893. cosID = INTRN_FQCOS;
  1894. coshID = INTRN_FQCOSH;
  1895. case MTYPE_F16:
  1896. cosID = INTRN_F16COS;
  1897. coshID = INTRN_F16COSH;
  1898. break;
  1899. }
  1900. cos= Intrinsic(rtype,
  1901. cosID,
  1902. 1,
  1903. WN_Realpart(rtype,WN_LdidPreg(type, zN)), NULL);
  1904. cosh= Intrinsic(rtype,
  1905. coshID,
  1906. 1,
  1907. WN_Imagpart(rtype,WN_LdidPreg(type, zN)), NULL);
  1908. /*
  1909. * real = cos(rz) * cosh(iz);
  1910. */
  1911. realpart = WN_Mpy(rtype, cos, cosh);
  1912. }
  1913. {
  1914. INTRINSIC sinID, sinhID;
  1915. WN *sin, *sinh;
  1916. switch(rtype)
  1917. {
  1918. case MTYPE_F4:
  1919. sinID = INTRN_F4SIN;
  1920. sinhID = INTRN_F4SINH;
  1921. break;
  1922. case MTYPE_F8:
  1923. sinID = INTRN_F8SIN;
  1924. sinhID = INTRN_F8SINH;
  1925. break;
  1926. case MTYPE_FQ:
  1927. sinID = INTRN_FQSIN;
  1928. sinhID = INTRN_FQSINH;
  1929. break;
  1930. case MTYPE_F16:
  1931. sinID = INTRN_F16SIN;
  1932. sinhID = INTRN_F16SINH;
  1933. break;
  1934. }
  1935. sin= Intrinsic(rtype,
  1936. sinID,
  1937. 1,
  1938. WN_Realpart(rtype,WN_LdidPreg(type, zN)), NULL);
  1939. sinh= Intrinsic(rtype,
  1940. sinhID,
  1941. 1,
  1942. WN_Imagpart(rtype,WN_LdidPreg(type, zN)), NULL);
  1943. /*
  1944. * imag = -sin(rz) * sinh(iz);
  1945. */
  1946. imagpart = WN_Neg(rtype, WN_Mpy(rtype, sin, sinh));
  1947. }
  1948. return WN_Complex(type, realpart, imagpart);
  1949. }
  1950. /* ====================================================================
  1951. *
  1952. * WN *em_complex_sin(WN *block, WN *x)
  1953. *
  1954. * real = sin(rz) * cosh(iz);
  1955. * imag = cos(rz) * sinh(iz);
  1956. *
  1957. * ==================================================================== */
  1958. static WN *em_complex_sin(WN *block, WN *x)
  1959. {
  1960. TYPE_ID type = WN_rtype(x);
  1961. TYPE_ID rtype = Mtype_complex_to_real(type);
  1962. PREG_NUM zN;
  1963. WN *realpart, *imagpart ;
  1964. zN = AssignExpr(block, x, type);
  1965. {
  1966. INTRINSIC sinID, coshID;
  1967. WN *sin, *cosh;
  1968. switch(rtype)
  1969. {
  1970. case MTYPE_F4:
  1971. sinID = INTRN_F4SIN;
  1972. coshID = INTRN_F4COSH;
  1973. break;
  1974. case MTYPE_F8:
  1975. sinID = INTRN_F8SIN;
  1976. coshID = INTRN_F8COSH;
  1977. break;
  1978. case MTYPE_FQ:
  1979. sinID = INTRN_FQSIN;
  1980. coshID = INTRN_FQCOSH;
  1981. break;
  1982. case MTYPE_F16:
  1983. sinID = INTRN_F16SIN;
  1984. coshID = INTRN_F16COSH;
  1985. break;
  1986. }
  1987. sin= Intrinsic(rtype,
  1988. sinID,
  1989. 1,
  1990. WN_Realpart(rtype,WN_LdidPreg(type, zN)), NULL);
  1991. cosh= Intrinsic(rtype,
  1992. coshID,
  1993. 1,
  1994. WN_Imagpart(rtype,WN_LdidPreg(type, zN)), NULL);
  1995. /*
  1996. * real = sin(rz) * cosh(iz);
  1997. */
  1998. realpart = WN_Mpy(rtype, sin, cosh);
  1999. }
  2000. {
  2001. INTRINSIC cosID, sinhID;
  2002. WN *cos, *sinh;
  2003. switch(rtype)
  2004. {
  2005. case MTYPE_F4:
  2006. cosID = INTRN_F4COS;
  2007. sinhID = INTRN_F4SINH;
  2008. break;
  2009. case MTYPE_F8:
  2010. cosID = INTRN_F8COS;
  2011. sinhID = INTRN_F8SINH;
  2012. break;
  2013. case MTYPE_FQ:
  2014. cosID = INTRN_FQCOS;
  2015. sinhID = INTRN_FQSINH;
  2016. break;
  2017. case MTYPE_F16:
  2018. cosID = INTRN_F16COS;
  2019. sinhID = INTRN_F16SINH;
  2020. break;
  2021. }
  2022. cos= Intrinsic(rtype,
  2023. cosID,
  2024. 1,
  2025. WN_Realpart(rtype,WN_LdidPreg(type, zN)), NULL);
  2026. sinh= Intrinsic(rtype,
  2027. sinhID,
  2028. 1,
  2029. WN_Imagpart(rtype,WN_LdidPreg(type, zN)), NULL);
  2030. /*
  2031. * imag = cos(rz) * sinh(iz);
  2032. */
  2033. imagpart = WN_Mpy(rtype, cos, sinh);
  2034. }
  2035. return WN_Complex(type, realpart, imagpart);
  2036. }
  2037. /* ====================================================================
  2038. *
  2039. * WN *em_preg_hypot(WN *block, TYPE_ID type, PREG_NUM xN, PREG_NUM yN)
  2040. *
  2041. * compute sqrt ( x**2 + y**2 ) via two methods, depending on
  2042. * Fast_Complex_Allowed (maybe this should be (Roundoff_Level >= ROUNDOFF_ANY)??
  2043. *
  2044. * WARNING!!
  2045. *
  2046. * If (Fast_Complex_Allowed == FALSE) will generate more accurate code
  2047. * but divide by zero for xN and yN both zero !!!
  2048. *
  2049. * ==================================================================== */
  2050. static WN *em_preg_hypot(WN *block, TYPE_ID type, PREG_NUM xN, PREG_NUM yN)
  2051. {
  2052. if (Fast_Complex_Allowed)
  2053. {
  2054. /*
  2055. * SQRT( x**2 + y**2 )
  2056. *
  2057. */
  2058. WN *x2, *y2, *add, *hypot;
  2059. x2 = WN_Mpy(type,
  2060. WN_LdidPreg(type, xN),
  2061. WN_LdidPreg(type, xN));
  2062. y2 = WN_Mpy(type,
  2063. WN_LdidPreg(type, yN),
  2064. WN_LdidPreg(type, yN));
  2065. add = WN_Add(type, x2, y2);
  2066. hypot = WN_Sqrt(type, add);
  2067. return hypot;
  2068. }
  2069. else
  2070. {
  2071. /*
  2072. * After factoring out max( |x| , |y| )
  2073. *
  2074. * | x | > | y |
  2075. *
  2076. * | x | * SQRT(1 + (y/x)**2)
  2077. *
  2078. * | y | > | x |
  2079. *
  2080. * | y | * SQRT(1 + (x/y)**2)
  2081. *
  2082. */
  2083. PREG_NUM axN, ayN, zN, divN;
  2084. WN *cond, *w, *z, *div, *mpy, *add, *sqrt, *hypot, *az;
  2085. axN = AssignExpr(block,
  2086. WN_Abs(type, WN_LdidPreg(type, xN)),
  2087. type);
  2088. ayN = AssignExpr(block,
  2089. WN_Abs(type, WN_LdidPreg(type, yN)),
  2090. type);
  2091. /*
  2092. * w = | x | > | y | ? y : x
  2093. * z = | x | > | y | ? x : y;
  2094. *
  2095. * Let div = w / z
  2096. */
  2097. cond = WN_GT(type,
  2098. WN_LdidPreg(type, axN),
  2099. WN_LdidPreg(type, ayN)),
  2100. w = WN_Select(type,
  2101. cond,
  2102. WN_LdidPreg(type, yN),
  2103. WN_LdidPreg(type, xN));
  2104. cond = WN_GT(type,
  2105. WN_LdidPreg(type, axN),
  2106. WN_LdidPreg(type, ayN));
  2107. z = WN_Select(type,
  2108. cond,
  2109. WN_LdidPreg(type, xN),
  2110. WN_LdidPreg(type, yN));
  2111. zN = AssignExpr(block, z, type);
  2112. div = WN_Div(type,
  2113. w,
  2114. WN_LdidPreg(type, zN));
  2115. divN = AssignExpr(block, div, type);
  2116. /*
  2117. * form zN * SQRT(1.0 + divN**2)
  2118. */
  2119. mpy = WN_Mpy(type,
  2120. WN_LdidPreg(type, divN),
  2121. WN_LdidPreg(type, divN));
  2122. add = WN_Add(type,
  2123. WN_Floatconst(type, 1.0),
  2124. mpy);
  2125. sqrt = WN_Sqrt(type, add);
  2126. az = WN_Abs(type, WN_LdidPreg(type, zN));
  2127. hypot = WN_Mpy(type, sqrt, az);
  2128. return hypot;
  2129. }
  2130. }
  2131. /* ====================================================================
  2132. *
  2133. *
  2134. * ==================================================================== */
  2135. static WN *em_hypot(WN *block, WN *x, WN *y)
  2136. {
  2137. TYPE_ID type = WN_rtype(x);
  2138. PREG_NUM xN, yN;
  2139. Is_True((type == WN_rtype(y)), ("em_hypot(): type mismatch"));
  2140. xN = AssignExpr(block, x, type);
  2141. yN = AssignExpr(block, y, type);
  2142. return em_preg_hypot(block, type, xN, yN);
  2143. }
  2144. /* ====================================================================
  2145. *
  2146. * WN *em_complex_log(WN *block, WN *x)
  2147. *
  2148. * real = log ( sqrt(rz**2 + iz**2) )
  2149. * imag = fatan2(iz, rz)
  2150. *
  2151. * ==================================================================== */
  2152. static WN *em_complex_log(WN *block, WN *x)
  2153. {
  2154. PREG_NUM zN;
  2155. WN *hypot, *realpart, *imagpart ;
  2156. TYPE_ID type = WN_rtype(x);
  2157. TYPE_ID rtype = Mtype_complex_to_real(type);
  2158. zN = AssignExpr(block, x, type);
  2159. /*
  2160. * log(0) already undefined, so there is no need to test for zer
  2161. */
  2162. hypot = em_hypot(block,
  2163. WN_Realpart(rtype,WN_LdidPreg(type,zN)),
  2164. WN_Imagpart(rtype,WN_LdidPreg(type,zN)));
  2165. {
  2166. INTRINSIC logID;
  2167. switch(rtype)
  2168. {
  2169. case MTYPE_F4: logID = INTRN_F4LOG; break;
  2170. case MTYPE_F8: logID = INTRN_F8LOG; break;
  2171. case MTYPE_FQ: logID = INTRN_FQLOG; break;
  2172. case MTYPE_F16: logID = INTRN_F16LOG; break;
  2173. }
  2174. realpart= Intrinsic(rtype, logID, 1, hypot, NULL);
  2175. }
  2176. {
  2177. INTRINSIC atan2ID;
  2178. switch(rtype)
  2179. {
  2180. case MTYPE_F4: atan2ID = INTRN_F4ATAN2; break;
  2181. case MTYPE_F8: atan2ID = INTRN_F8ATAN2; break;
  2182. case MTYPE_FQ: atan2ID = INTRN_FQATAN2; break;
  2183. case MTYPE_F16: atan2ID = INTRN_F16ATAN2; break;
  2184. }
  2185. imagpart= Intrinsic(rtype, atan2ID, 2,
  2186. WN_Imagpart(rtype,WN_LdidPreg(type, zN)),
  2187. WN_Realpart(rtype,WN_LdidPreg(type, zN)));
  2188. }
  2189. return WN_Complex(type, realpart, imagpart);
  2190. }
  2191. /* ====================================================================
  2192. *
  2193. * WN *em_complex_abs(WN *block, WN *x)
  2194. *
  2195. * ( sqrt(rz**2 + iz**2) )
  2196. *
  2197. * ==================================================================== */
  2198. static WN *em_complex_abs(WN *block, WN *z)
  2199. {
  2200. PREG_NUM zN;
  2201. WN *hypot;
  2202. TYPE_ID type = WN_rtype(z);
  2203. TYPE_ID rtype = Mtype_complex_to_real(type);
  2204. zN = AssignExpr(block, z, type);
  2205. if (Fast_Complex_Allowed==FALSE)
  2206. {
  2207. /*
  2208. * It is unfortunate that we have to "know" the internals of em_preg_hypot
  2209. * We must check if z==0
  2210. */
  2211. WN *if_else;
  2212. if_else = WN_CreateBlock();
  2213. hypot = em_hypot(if_else,
  2214. WN_Realpart(rtype,WN_LdidPreg(type,zN)),
  2215. WN_Imagpart(rtype,WN_LdidPreg(type,zN)));
  2216. hypot = checkForZero(block, type, zN, if_else, hypot);
  2217. }
  2218. else
  2219. {
  2220. hypot = em_hypot(block,
  2221. WN_Realpart(rtype,WN_LdidPreg(type,zN)),
  2222. WN_Imagpart(rtype,WN_LdidPreg(type,zN)));
  2223. }
  2224. return hypot;
  2225. }
  2226. /* ====================================================================
  2227. *
  2228. * WN *em_complex_sqrt_preg(WN *block, TYPE_ID type, PREG_NUM zN)
  2229. *
  2230. * From library routine __zsqrt
  2231. * R(z) == I(z) == 0
  2232. * real = imag = 0
  2233. * R(z)>0
  2234. * real = sqrt ( (abs(z) + R(z)) / 2 )
  2235. * imag = I(z) / (2 * real)
  2236. * R(z)<=0
  2237. * imag = sqrt ( (abs(z) - R(z)) / 2 )
  2238. * if (I(z)<0)
  2239. * imag = -imag;
  2240. * real = I(z) / (2 * imag)
  2241. *
  2242. * The implementation will be
  2243. *
  2244. * t1 = sqrt ( (abs(z) + abs(R(z))) * .5 ) (NOTE R(z) always>0)
  2245. * t2 = (R(z)<=0 && I(z)<0) -t1 : t1;
  2246. * t3 = (I(z) / t2) * .5
  2247. * realpart = (R(z)>0) ? t2 : t3
  2248. * imagpart = (R(z)>0) ? t3 : t2
  2249. *
  2250. * ==================================================================== */
  2251. static WN *em_complex_sqrt_preg(WN *block, TYPE_ID type, PREG_NUM zN_in)
  2252. {
  2253. PREG_NUM ziN, zN, absN, t1N, t2N, t3N;
  2254. TYPE_ID rtype = Mtype_complex_to_real(type);
  2255. zN = AssignExpr(block,WN_Realpart(rtype,WN_LdidPreg(type,zN_in)),rtype);
  2256. ziN = AssignExpr(block,WN_Imagpart(rtype,WN_LdidPreg(type,zN_in)),rtype);
  2257. {
  2258. /*
  2259. * t1 = sqrt ( (abs(z) + abs(R(z)) * .5 )
  2260. */
  2261. WN *norm, *add, *mpy, *sqrt;
  2262. norm = em_preg_hypot(block, rtype, zN, ziN);
  2263. absN = AssignExpr(block, norm, rtype);
  2264. add = WN_Add(rtype,
  2265. WN_LdidPreg(rtype, absN),
  2266. WN_Abs(rtype, WN_LdidPreg(rtype, zN)));
  2267. mpy = WN_Mpy(rtype,
  2268. add,
  2269. WN_Floatconst(rtype, .5000));
  2270. sqrt = WN_Sqrt(rtype, mpy);
  2271. t1N = AssignExpr(block, sqrt, rtype);
  2272. }
  2273. {
  2274. /*
  2275. * t2 = (R(z)<=0 && I(z)<0) -t1 : t1;
  2276. */
  2277. WN *le, *lt, *cond, *neg, *sel;
  2278. le = WN_LE(rtype,
  2279. WN_LdidPreg(rtype, zN),
  2280. WN_Zerocon(rtype));
  2281. lt = WN_LT(rtype,
  2282. WN_LdidPreg(rtype, ziN),
  2283. WN_Zerocon(rtype));
  2284. cond = WN_LAND(le, lt);
  2285. neg = WN_Neg(rtype, WN_LdidPreg(rtype, t1N));
  2286. sel = WN_Select(rtype,
  2287. cond,
  2288. neg,
  2289. WN_LdidPreg(rtype, t1N));
  2290. t2N = AssignExpr(block, sel, rtype);
  2291. }
  2292. {
  2293. /*
  2294. * t3 = (I(z) / t2) * .5
  2295. */
  2296. WN *div, *mpy;
  2297. div = WN_Div(rtype,
  2298. WN_LdidPreg(rtype, ziN),
  2299. WN_LdidPreg(rtype, t2N));
  2300. mpy = WN_Mpy(rtype,
  2301. div,
  2302. WN_Floatconst(rtype, .5000));
  2303. t3N = AssignExpr(block, mpy, rtype);
  2304. }
  2305. {
  2306. /*
  2307. * realpart = (R(z)>0) ? t2 : t3
  2308. * imagpart = (R(z)>0) ? t3 : t2
  2309. */
  2310. WN *gt, *realpart, *imagpart;
  2311. gt = WN_GT(rtype,
  2312. WN_LdidPreg(rtype, zN),
  2313. WN_Zerocon(rtype));
  2314. realpart = WN_Select(rtype,
  2315. gt,
  2316. WN_LdidPreg(rtype, t2N),
  2317. WN_LdidPreg(rtype, t3N));
  2318. gt = WN_GT(rtype,
  2319. WN_LdidPreg(rtype, zN),
  2320. WN_Zerocon(rtype));
  2321. imagpart = WN_Select(rtype,
  2322. gt,
  2323. WN_LdidPreg(rtype, t3N),
  2324. WN_LdidPreg(rtype, t2N));
  2325. return WN_Complex(type, realpart, imagpart);
  2326. }
  2327. }
  2328. /* ====================================================================
  2329. *
  2330. * WN *em_complex_sqrt(WN *block, WN *z)
  2331. *
  2332. * ==================================================================== */
  2333. static WN *em_complex_sqrt(WN *block, WN *z)
  2334. {
  2335. PREG_NUM zN;
  2336. WN *sqrt, *if_else;
  2337. TYPE_ID type = WN_rtype(z);
  2338. zN = AssignExpr(block, z, type);
  2339. if_else = WN_CreateBlock();
  2340. sqrt = em_complex_sqrt_preg(if_else, type, zN);
  2341. sqrt = checkForZero(block, type, zN, if_else, sqrt);
  2342. return sqrt;
  2343. }
  2344. /* ====================================================================
  2345. *
  2346. * WN *em_conjg(WN *block, WN *x)
  2347. *
  2348. * real = Realpart(x)
  2349. * imag = -Imagpart(x)
  2350. *
  2351. * ==================================================================== */
  2352. static WN *em_conjg(WN *block, WN *x)
  2353. {
  2354. PREG_NUM zN;
  2355. TYPE_ID type = WN_rtype(x);
  2356. TYPE_ID rtype = Mtype_complex_to_real(type);
  2357. WN *realpart, *imagpart;
  2358. zN = AssignExpr(block, x, type);
  2359. realpart = WN_Realpart(rtype, WN_LdidPreg(type, zN));
  2360. imagpart = WN_Neg(rtype,
  2361. WN_Imagpart(rtype, WN_LdidPreg(type, zN)));
  2362. return WN_Complex(type, realpart, imagpart);
  2363. }
  2364. /* ====================================================================
  2365. *
  2366. * WN *em_alog10(WN *block, WN *x)
  2367. *
  2368. * log(x) * (M_LOG10E= 0.43429448190325182765)
  2369. *
  2370. * ==================================================================== */
  2371. #define M_LOG10 0.4342944819032518276511289189166050822943970058036665661144537831658646492088707747292249493384317483
  2372. #define M_LOG10Q 0.434294481903251827651128918916605082294L
  2373. static WN *em_alog10(WN *block, WN *x)
  2374. {
  2375. TYPE_ID type = WN_rtype(x);
  2376. INTRINSIC logID;
  2377. WN *log, *mpy;
  2378. switch(type)
  2379. {
  2380. case MTYPE_F4: logID = INTRN_F4LOG; break;
  2381. case MTYPE_F8: logID = INTRN_F8LOG; break;
  2382. case MTYPE_FQ: logID = INTRN_FQLOG; break;
  2383. case MTYPE_F16: logID = INTRN_F16LOG; break;
  2384. }
  2385. log = Intrinsic(type, logID, 1, x, NULL);
  2386. if (type != MTYPE_FQ && type != MTYPE_F16) {
  2387. mpy = WN_Mpy(type,
  2388. WN_Floatconst(type, M_LOG10),
  2389. log);
  2390. } else {
  2391. #ifdef TARG_MIPS
  2392. Fail_FmtAssertion ("em_alog10: FQ type not yet supported");
  2393. #else
  2394. mpy = WN_Mpy(type,
  2395. Make_Const(Host_To_Targ_Quad(M_LOG10Q)),
  2396. log);
  2397. #endif
  2398. }
  2399. return mpy;
  2400. }
  2401. /* ====================================================================
  2402. *
  2403. * WN *em_bclr(WN *block, WN *n, WN *i)
  2404. * clear bit i (n, i)
  2405. * if (0 <= i && i < NUMBERBITS) n & ~(1<<i)
  2406. * else 0
  2407. *
  2408. * ==================================================================== */
  2409. static WN *em_bclr(WN *block, WN *n, WN *i)
  2410. {
  2411. TYPE_ID type = WN_rtype(n);
  2412. PREG_NUM iN;
  2413. WN *cond, *band;
  2414. iN = AssignExpr(block, i, type);
  2415. {
  2416. /*
  2417. * form n & ~(1<<i)
  2418. */
  2419. WN *shft, *bnot;
  2420. shft = WN_Shl(type,
  2421. WN_Intconst(type, 1),
  2422. WN_LdidPreg(type, iN));
  2423. bnot = WN_Bnot(type, shft);
  2424. band = WN_Band(type, n, bnot);
  2425. if (Fast_Bit_Allowed)
  2426. return band;
  2427. }
  2428. {
  2429. /*
  2430. * form logical condition
  2431. * (0 <= i && i < NUMBERBITS)
  2432. */
  2433. WN *le, *lt;
  2434. le = WN_LE(Mtype_comparison(type),
  2435. WN_Zerocon(type),
  2436. WN_LdidPreg(type, iN));
  2437. lt = WN_LT( Mtype_comparison(type),
  2438. WN_LdidPreg(type, iN),
  2439. WN_Intconst(type, MTYPE_size_reg(type)));
  2440. cond = WN_LAND(le, lt);
  2441. }
  2442. return WN_Select(type, cond, band, WN_Zerocon(type));
  2443. }
  2444. /* ====================================================================
  2445. *
  2446. * WN *em_bset(WN *block, WN *n, WN *i)
  2447. *
  2448. * set bit i (n, i)
  2449. * if (0 <= i && i < NUMBERBITS): n | (1<<i)
  2450. * else 0
  2451. *
  2452. * ==================================================================== */
  2453. static WN *em_bset(WN *block, WN *n, WN *i)
  2454. {
  2455. TYPE_ID type = WN_rtype(n);
  2456. PREG_NUM iN;
  2457. WN *cond, *bor;
  2458. iN = AssignExpr(block, i, type);
  2459. {
  2460. /*
  2461. * form n | (1<<i)
  2462. */
  2463. WN *shft;
  2464. shft = WN_Shl(type,
  2465. WN_Intconst(type, 1),
  2466. WN_LdidPreg(type, iN));
  2467. bor = WN_Bior(type, n, shft);
  2468. if (Fast_Bit_Allowed)
  2469. return bor;
  2470. }
  2471. {
  2472. /*
  2473. * form logical condition
  2474. * (0 <= i && i < NUMBERBITS)
  2475. */
  2476. WN *le, *lt;
  2477. le = WN_LE(Mtype_comparison(type),
  2478. WN_Zerocon(type),
  2479. WN_LdidPreg(type, iN));
  2480. lt = WN_LT(Mtype_comparison(type),
  2481. WN_LdidPreg(type, iN),
  2482. WN_Intconst(type, MTYPE_size_reg(type)));
  2483. cond = WN_LAND(le, lt);
  2484. }
  2485. return WN_Select(type, cond, bor, WN_Zerocon(type));
  2486. }
  2487. /* ====================================================================
  2488. *
  2489. * test bit i (n, i)
  2490. * if (0 <= i && i < NUMBERBITS): (n >> i) & 0x1
  2491. * else 0
  2492. *
  2493. * ==================================================================== */
  2494. static WN *em_btest(WN *block, WN *n, WN *i)
  2495. {
  2496. TYPE_ID type = WN_rtype(n);
  2497. PREG_NUM iN;
  2498. WN *cond, *band;
  2499. iN = AssignExpr(block, i, type);
  2500. {
  2501. /*
  2502. * form (n >> i) & 0x1
  2503. */
  2504. WN *shft;
  2505. shft = WN_Lshr(type,
  2506. n,
  2507. WN_LdidPreg(type, iN));
  2508. band = WN_Band(type, shft, WN_Intconst(type, 1));
  2509. if (Fast_Bit_Allowed)
  2510. return band;
  2511. }
  2512. {
  2513. /*
  2514. * form logical condition
  2515. * (0 <= i && i < NUMBERBITS)
  2516. */
  2517. WN *le, *lt;
  2518. le = WN_LE(Mtype_comparison(type),
  2519. WN_Zerocon(type),
  2520. WN_LdidPreg(type, iN));
  2521. lt = WN_LT(Mtype_comparison(type),
  2522. WN_LdidPreg(type, iN),
  2523. WN_Intconst(type, MTYPE_size_reg(type)));
  2524. cond = WN_LAND(le, lt);
  2525. }
  2526. return WN_Select(type, cond, band, WN_Zerocon(type));
  2527. }
  2528. /* ====================================================================
  2529. *
  2530. * auxilary routine to help develop mask
  2531. * does not check args, and assumes x,m are used once (dag police)
  2532. *
  2533. * ( (unsigned) -(m>0) >> (32-m))
  2534. *
  2535. * ((1<<m)-1) does not work, when m = 32
  2536. *
  2537. * ==================================================================== */
  2538. static WN *WN_mask(TYPE_ID type, PREG_NUM m)
  2539. {
  2540. WN *gt, *neg, *sub;
  2541. gt = WN_GT(type, WN_LdidPreg(type, m), WN_Zerocon(type));
  2542. gt = WN_Int_Type_Conversion(gt,type);
  2543. neg = WN_Neg(type, gt);
  2544. sub = WN_Sub(type, WN_Intconst(type, MTYPE_size_reg(type)), WN_LdidPreg(type, m));
  2545. return WN_Lshr(type, neg, sub);
  2546. }
  2547. /* ====================================================================
  2548. *
  2549. * auxilary routine to help develop mask
  2550. * does not check args, and assumes x,m are used once (dag police)
  2551. *
  2552. * return x & ((1<<m)-1);
  2553. * return x & ( (unsigned) -(m>0) >> (32-m))
  2554. *
  2555. * ==================================================================== */
  2556. static WN *em_mask(TYPE_ID type, WN *x, PREG_NUM m)
  2557. {
  2558. WN *mask;
  2559. mask = WN_mask(type, m);
  2560. return WN_Band(type, x, mask);
  2561. }
  2562. /* ====================================================================
  2563. *
  2564. * auxilary routine to help develop mask
  2565. * does not check args, and assumes x,m are used once (dag police)
  2566. *
  2567. * return x & ~ MASK(m)
  2568. *
  2569. * ==================================================================== */
  2570. static WN *em_mask_complement(TYPE_ID type, WN *x, PREG_NUM m)
  2571. {
  2572. WN *mask, *bnot;
  2573. mask = WN_mask(type, m);
  2574. bnot =WN_Bnot(type, mask);
  2575. return WN_Band(type, x, bnot);
  2576. }
  2577. /* ====================================================================
  2578. *
  2579. * extract bits [i ... i+len-1] (n, i, len)
  2580. *
  2581. * if (0 <= i && i < NUMBERBITS) &&
  2582. * (0 <= len && len <= NUMBERBITS) &&
  2583. * ((i+len) <= NUMBERBITS) (n>>i) & (1<<len -1)
  2584. * else n
  2585. * ==================================================================== */
  2586. static WN *em_bits(WN *block, WN *n, WN *i, WN *len)
  2587. {
  2588. TYPE_ID type = WN_rtype(n);
  2589. TYPE_ID desc = WN_desc(n);
  2590. PREG_NUM iN, lenN, nN;
  2591. WN *cond, *band;
  2592. iN = AssignExpr(block, i, desc);
  2593. nN = AssignExpr(block, n, desc);
  2594. lenN = AssignExpr(block, len, desc);
  2595. {
  2596. /*
  2597. *
  2598. * form (n>>i) & MASK(len)
  2599. */
  2600. WN *shft;
  2601. shft = WN_Lshr(type,
  2602. WN_LdidPreg(type, nN),
  2603. WN_LdidPreg(type, iN));
  2604. band = em_mask(type, shft, lenN);
  2605. if (Fast_Bit_Allowed)
  2606. return band;
  2607. }
  2608. {
  2609. /*
  2610. * Unfortunately the region of definition is very irregular
  2611. * so I believe all these tests are necessary to be compatible
  2612. * with the library function
  2613. *
  2614. * (0 <= i) && (i < NUMBERBITS) &&
  2615. * (0 <= len) &&
  2616. * ((i+len) <= NUMBERBITS)
  2617. */
  2618. WN *le, *lt, *land, *add;
  2619. le = WN_LE(Mtype_comparison(type),
  2620. WN_Zerocon(type),
  2621. WN_LdidPreg(type, iN));
  2622. lt = WN_LT(Mtype_comparison(type),
  2623. WN_LdidPreg(type, iN),
  2624. WN_Intconst(type, MTYPE_size_reg(type)));
  2625. land = WN_LAND(le, lt);
  2626. le = WN_LE(Mtype_comparison(type),
  2627. WN_Zerocon(type),
  2628. WN_LdidPreg(type, lenN));
  2629. land = WN_LAND(land, le);
  2630. add = WN_Add(type,
  2631. WN_LdidPreg(type, iN),
  2632. WN_LdidPreg(type, lenN));
  2633. lt = WN_LE(Mtype_comparison(type),
  2634. add,
  2635. WN_Intconst(type, MTYPE_size_reg(desc)));
  2636. cond = WN_LAND(land, lt);
  2637. }
  2638. return WN_Select(type, cond, band, WN_LdidPreg(type, nN));
  2639. }
  2640. /* ====================================================================
  2641. * shift n >> i places
  2642. *
  2643. * |i| < NUMBERBITS
  2644. * (n<<i)
  2645. * else 0
  2646. *
  2647. * ==================================================================== */
  2648. static WN *em_shl(WN *block, WN *n, WN *i)
  2649. {
  2650. TYPE_ID type = WN_rtype(n);
  2651. TYPE_ID desc = WN_desc(n);
  2652. PREG_NUM iN, nN;
  2653. iN = AssignExpr(block, i, desc);
  2654. nN = AssignExpr(block, n, desc);
  2655. {
  2656. /*
  2657. * form logical condition
  2658. * |i| < NUMBERBITS
  2659. * (n<<i)
  2660. * else 0
  2661. */
  2662. WN *shl, *lt;
  2663. shl = WN_Shl(type,
  2664. WN_LdidPreg(desc, nN),
  2665. WN_LdidPreg(desc, iN));
  2666. if (Fast_Bit_Allowed)
  2667. return shl;
  2668. lt = WN_LT(Mtype_comparison(type),
  2669. WN_LdidPreg(desc, iN),
  2670. WN_Intconst(type, MTYPE_size_reg(desc)));
  2671. return WN_Select(type, lt, shl, WN_Zerocon(type));
  2672. }
  2673. }
  2674. /* ====================================================================
  2675. * logical shift n right i places
  2676. *
  2677. * |i| < NUMBERBITS
  2678. * (n>>i)
  2679. * else 0
  2680. *
  2681. * ==================================================================== */
  2682. static WN *em_lshr(WN *block, WN *n, WN *i)
  2683. {
  2684. TYPE_ID type = WN_rtype(n);
  2685. TYPE_ID desc = WN_desc(n);
  2686. PREG_NUM iN, nN;
  2687. iN = AssignExpr(block, i, desc);
  2688. nN = AssignExpr(block, n, desc);
  2689. {
  2690. /*
  2691. * (n>>i) & ( (1<<(NUMBERBITS-i)) - 1)
  2692. */
  2693. WN *val, *sub, *lt, *shr;
  2694. PREG_NUM subN;
  2695. val = WN_Lshr(type, WN_LdidPreg(type, nN), WN_LdidPreg(type, iN));
  2696. sub = WN_Sub(type,
  2697. WN_Intconst(type, MTYPE_size_reg(desc)),
  2698. WN_LdidPreg(type, iN));
  2699. subN = AssignExpr(block, sub, type);
  2700. shr = em_mask(type, val, subN);
  2701. if (Fast_Bit_Allowed)
  2702. return shr;
  2703. lt = WN_LT(Mtype_comparison(type),
  2704. WN_LdidPreg(desc, iN),
  2705. WN_Intconst(type, MTYPE_size_reg(desc)));
  2706. return WN_Select(type, lt, shr, WN_Zerocon(type));
  2707. }
  2708. }
  2709. /* ====================================================================
  2710. * shift n i places
  2711. *
  2712. * |i| < NUMBERBITS
  2713. * i>=0 (n<<i)
  2714. * i<0 (n>>(-i)) & ( (1<<(NUMBERBITS-i)) - 1)
  2715. * else 0
  2716. *
  2717. * ==================================================================== */
  2718. static WN *em_shft(WN *block, WN *n, WN *i)
  2719. {
  2720. TYPE_ID type = WN_rtype(n);
  2721. TYPE_ID desc = WN_desc(n);
  2722. WN *v1, *v2;
  2723. PREG_NUM iN, nN;
  2724. iN = AssignExpr(block, i, type);
  2725. nN = AssignExpr(block, n, type);
  2726. {
  2727. /*
  2728. * form [v1] (n<<i)
  2729. */
  2730. v1 = WN_Shl(type,
  2731. WN_LdidPreg(type, nN),
  2732. WN_LdidPreg(type, iN));
  2733. }
  2734. {
  2735. /*
  2736. * form [v2] (n>>(-i)) & ( (1<<(NUMBERBITS-(-i))) - 1)
  2737. */
  2738. WN *neg, *val, *add;
  2739. PREG_NUM addN;
  2740. neg = WN_Neg(type, WN_LdidPreg(type, iN));
  2741. val = WN_Lshr(type, WN_LdidPreg(type, nN), neg);
  2742. add = WN_Add(type,
  2743. WN_Intconst(type, MTYPE_size_reg(desc)),
  2744. WN_LdidPreg(type, iN));
  2745. addN = AssignExpr(block, add, type);
  2746. v2 = em_mask(type, val, addN);
  2747. }
  2748. {
  2749. /*
  2750. * form logical condition
  2751. * |i| < NUMBERBITS
  2752. * i>=0 v1
  2753. * i<0 v2
  2754. * else 0
  2755. */
  2756. WN *abs, *lt, *ge, *select;
  2757. ge = WN_GE(Mtype_comparison(type),
  2758. WN_LdidPreg(type, iN),
  2759. WN_Zerocon(type));
  2760. select = WN_Select(type, ge, v1, v2);
  2761. if (Fast_Bit_Allowed)
  2762. return select;
  2763. abs = WN_Abs(type, WN_LdidPreg(type, iN));
  2764. lt = WN_LT(Mtype_comparison(type),
  2765. abs,
  2766. WN_Intconst(type, MTYPE_size_reg(desc)));
  2767. return WN_Select(type, lt, select, WN_Zerocon(type));
  2768. }
  2769. }
  2770. /* ====================================================================
  2771. * Circular shift - The rightmost ic bits are shifted circularly k-places.
  2772. * k > 0 => left shift.
  2773. * k < 0 => right shift.
  2774. * k = 0 => no shift.
  2775. * left shift right shift
  2776. * [ b1 | k2 | d ] [ b1 | d | k2 ]
  2777. *
  2778. * MASK(x) = (1<<x)-1;
  2779. *
  2780. * k2 = | k |
  2781. * if (0 < k2 <= NUMBITS) &&
  2782. * ( k2 <= ic ) &&
  2783. * (1 <= ic <= NUMBITS)
  2784. * {
  2785. * s1 = (k>0) ? |k| : ic-|k|;
  2786. * s2 = (k>0) ? ic-|k| : |k|
  2787. * B1 = m & ~MASK(ic);
  2788. * B2 = (m & MASK(s2)) << s1;
  2789. * B2 = (m & MASK(ic)) >> s2;
  2790. * return (B1 | B2 | B3)
  2791. * }
  2792. * else return m
  2793. *
  2794. * ==================================================================== */
  2795. static WN *em_shftc(WN *block, WN *m, WN *k, WN *ic)
  2796. {
  2797. TYPE_ID type = WN_rtype(m);
  2798. TYPE_ID desc = WN_desc(m);
  2799. WN *shiftc;
  2800. PREG_NUM mN, kN, icN, kabsN, s1N, s2N;
  2801. mN = AssignExpr(block, m, desc);
  2802. kN = AssignExpr(block, k, desc);
  2803. icN = AssignExpr(block, ic, desc);
  2804. kabsN = AssignExpr(block,
  2805. WN_Abs(type, WN_LdidPreg(type, kN)),
  2806. type);
  2807. {
  2808. /*
  2809. * s1 = (k>0) ? |k| : ic-|k|;
  2810. * s2 = (k>0) ? ic-|k| : |k|
  2811. */
  2812. PREG_NUM subN;
  2813. WN *sub, *gt, *s1, *s2;
  2814. sub = WN_Sub(type,
  2815. WN_LdidPreg(type, icN),
  2816. WN_LdidPreg(type, kabsN));
  2817. subN = AssignExpr(block, sub, type);
  2818. gt = WN_GT(Mtype_comparison(type),
  2819. WN_LdidPreg(type, kN),
  2820. WN_Zerocon(type));
  2821. s1 = WN_Select(type, gt,
  2822. WN_LdidPreg(type, kabsN),
  2823. WN_LdidPreg(type, subN));
  2824. s2 = WN_Select(type, WN_COPY_Tree(gt),
  2825. WN_LdidPreg(type, subN),
  2826. WN_LdidPreg(type, kabsN));
  2827. s1N = AssignExpr(block, s1, type);
  2828. s2N = AssignExpr(block, s2, type);
  2829. }
  2830. {
  2831. /*
  2832. *
  2833. * B1 = m & ~MASK(ic);
  2834. * B2 = (m & MASK(s2)) << s1;
  2835. * B2 = (m & MASK(ic)) >> s2;
  2836. * shiftc = B1 | B2 | B2;
  2837. */
  2838. WN *band, *b1, *b2, *b3;
  2839. b1 = em_mask_complement(type, WN_LdidPreg(type, mN), icN);
  2840. band = em_mask(type, WN_LdidPreg(type, mN), s2N);
  2841. b2 = WN_Shl(type,
  2842. band,
  2843. WN_LdidPreg(type, s1N));
  2844. band = em_mask(type, WN_LdidPreg(type, mN), icN);
  2845. b3 = WN_Lshr(type,
  2846. band,
  2847. WN_LdidPreg(type, s2N));
  2848. shiftc = WN_Bior(type,
  2849. b1,
  2850. WN_Bior(type, b2, b3));
  2851. if (Fast_Bit_Allowed)
  2852. return shiftc;
  2853. }
  2854. {
  2855. /*
  2856. * form logical condition
  2857. * the above boundary if equivalent to
  2858. * if 1 <= | k | <= ic &&
  2859. * ic <= NUMBITS
  2860. * shiftc;
  2861. * else m
  2862. */
  2863. WN *le1, *le2, *land, *le, *cond;
  2864. le1 = WN_LE(Mtype_comparison(type),
  2865. WN_Intconst(type, 1),
  2866. WN_LdidPreg(type, kabsN));
  2867. le2 = WN_LE(Mtype_comparison(type),
  2868. WN_LdidPreg(type, kabsN),
  2869. WN_LdidPreg(type, icN));
  2870. land = WN_LAND(le1, le2);
  2871. le = WN_LE(Mtype_comparison(type),
  2872. WN_LdidPreg(type, icN),
  2873. WN_Intconst(type, MTYPE_size_reg(desc)));
  2874. cond = WN_LAND(land, le);
  2875. return WN_Select(type, cond, shiftc, WN_LdidPreg(type, mN));
  2876. }
  2877. }
  2878. /* ====================================================================
  2879. * Parity - Use xors to combine integers bits down to a single bit.
  2880. * t1 = x ^ (x >> 32);
  2881. * t2 = t1 ^ (t1 >> 16);
  2882. * t3 = t2 ^ (t2 >> 8);
  2883. * t4 = t3 ^ (t3 >> 4);
  2884. * return (0x6996 >> (t4 & 0xf)) & 1;
  2885. * ==================================================================== */
  2886. static WN *em_parity(WN *block, WN *wn)
  2887. {
  2888. TYPE_ID type = WN_rtype(wn);
  2889. INT bitsize = MTYPE_size_reg(type);
  2890. // Parity of sign/zero extension is always zero.
  2891. if ((WN_operator(wn) == OPR_LDID || WN_operator(wn) == OPR_ILOAD)
  2892. && bitsize > MTYPE_size_reg(WN_desc(wn))) {
  2893. bitsize = MTYPE_size_reg(WN_desc(wn));
  2894. }
  2895. PREG_NUM preg = AssignExpr( block, wn, type );
  2896. // t1 = x ^ (x >> 32);
  2897. if (bitsize > 32) {
  2898. wn = WN_Ashr( type, WN_LdidPreg(type, preg), WN_Intconst(MTYPE_I4, 32) );
  2899. wn = WN_Bxor( type, wn, WN_LdidPreg(type, preg) );
  2900. preg = AssignExpr( block, wn, type );
  2901. }
  2902. // t2 = t1 ^ (t1 >> 16);
  2903. if (bitsize > 16) {
  2904. wn = WN_Ashr( type, WN_LdidPreg(type, preg), WN_Intconst(MTYPE_I4, 16) );
  2905. wn = WN_Bxor( type, wn, WN_LdidPreg(type, preg) );
  2906. preg = AssignExpr( block, wn, type );
  2907. }
  2908. // t3 = t2 ^ (t1 >> 8);
  2909. if (bitsize > 8) {
  2910. wn = WN_Ashr( type, WN_LdidPreg(type, preg), WN_Intconst(MTYPE_I4, 8) );
  2911. wn = WN_Bxor( type, wn, WN_LdidPreg(type, preg) );
  2912. preg = AssignExpr( block, wn, type );
  2913. }
  2914. // t4 = t3 ^ (t1 >> 4);
  2915. wn = WN_Ashr( type, WN_LdidPreg(type, preg), WN_Intconst(MTYPE_I4, 4) );
  2916. wn = WN_Bxor( type, wn, WN_LdidPreg(type, preg) );
  2917. // return (0x6996 >> (t4 & 0xf)) & 1;
  2918. wn = WN_Band( type, wn, WN_Intconst(MTYPE_I4, 15) );
  2919. wn = WN_Ashr( MTYPE_I4, WN_Intconst(MTYPE_I4, 0x6996), wn );
  2920. wn = WN_Band( MTYPE_I4, wn, WN_Intconst(MTYPE_I4, 1) );
  2921. return wn;
  2922. }
  2923. /* ====================================================================
  2924. * Popcount - Count the number of "1" bits in an integer. Here's the
  2925. * 64-bit algorithm:
  2926. * t1 = x - ((x >> 1) & 0x5555555555555555);
  2927. * t2 = (t1 & 0x3333333333333333) + ((t1 >> 2) & 0x3333333333333333);
  2928. * t3 = (t2 + (t2 >> 4)) & 0x0f0f0f0f0f0f0f0f;
  2929. * t4 = t3 + (t3 >> 8);
  2930. * t5 = t4 + (t4 >> 16);
  2931. * t6 = t5 + (t5 >> 32);
  2932. * return t6 & 0x000000ff;
  2933. * ==================================================================== */
  2934. static WN *em_popcount(WN *block, WN *wn, INT bitsize)
  2935. {
  2936. if ( bitsize == 0 ) {
  2937. Fail_FmtAssertion("em_popcount: expected nonzero bitsize");
  2938. }
  2939. TYPE_ID type = WN_rtype(wn);
  2940. // t1 = x - ((x >> 1) & 0x5555555555555555);
  2941. PREG_NUM preg = AssignExpr( block, wn, type );
  2942. UINT64 mask = 0x5555555555555555ULL >> (64 - bitsize);
  2943. WN *wn1 = WN_Intconst(type, mask);
  2944. wn = WN_Ashr( type, WN_LdidPreg(type, preg), WN_Intconst(MTYPE_I4, 1) );
  2945. wn = WN_Band( type, wn, wn1 );
  2946. wn = WN_Sub( type, WN_LdidPreg(type, preg), wn );
  2947. // t2 = (t1 & 0x3333333333333333) + ((t1 >> 2) & 0x3333333333333333);
  2948. preg = AssignExpr( block, wn, type );
  2949. mask = 0x3333333333333333ULL >> (64 - bitsize);
  2950. wn1 = WN_Intconst(type, mask);
  2951. PREG_NUM preg1 = AssignExpr( block, wn1, type );
  2952. wn = WN_Band( type, WN_LdidPreg(type, preg), WN_LdidPreg(type, preg1) );
  2953. wn1 = WN_Ashr( type, WN_LdidPreg(type, preg), WN_Intconst(MTYPE_I4, 2) );
  2954. wn1 = WN_Band( type, wn1, WN_LdidPreg(type, preg1) );
  2955. wn = WN_Add( type, wn, wn1 );
  2956. // t3 = (t2 + (t2 >> 4)) & 0x0f0f0f0f0f0f0f0f;
  2957. preg = AssignExpr( block, wn, type );
  2958. mask = 0x0f0f0f0f0f0f0f0fULL >> (64 - bitsize);
  2959. wn1 = WN_Intconst(type, mask);
  2960. wn = WN_Ashr( type, WN_LdidPreg(type, preg), WN_Intconst(MTYPE_I4, 4) );
  2961. wn = WN_Add( type, WN_LdidPreg(type, preg), wn );
  2962. wn = WN_Band( type, wn, wn1 );
  2963. if (bitsize > 8) {
  2964. // t4 = t3 + (t3 >> 8);
  2965. preg = AssignExpr( block, wn, type );
  2966. wn = WN_Ashr( type, WN_LdidPreg(type, preg), WN_Intconst(MTYPE_I4, 8) );
  2967. wn = WN_Add( type, WN_LdidPreg(type, preg), wn );
  2968. }
  2969. if (bitsize > 16) {
  2970. // t5 = t4 + (t4 >> 16);
  2971. preg = AssignExpr( block, wn, type );
  2972. wn = WN_Ashr( type, WN_LdidPreg(type, preg), WN_Intconst(MTYPE_I4, 16) );
  2973. wn = WN_Add( type, WN_LdidPreg(type, preg), wn );
  2974. }
  2975. if (bitsize > 32) {
  2976. // t6 = t5 + (t5 >> 32);
  2977. preg = AssignExpr( block, wn, type );
  2978. wn = WN_Ashr( type, WN_LdidPreg(type, preg), WN_Intconst(MTYPE_I4, 32) );
  2979. wn = WN_Add( type, WN_LdidPreg(type, preg), wn );
  2980. }
  2981. if (bitsize > 8) {
  2982. // return t6 & 0x000000ff;
  2983. // wn = WN_Band( type, wn, WN_Intconst(MTYPE_I4, 0xff) );
  2984. wn = WN_CreateCvtl( OPC_U4CVTL, 8, wn );
  2985. }
  2986. return wn;
  2987. }
  2988. /* ====================================================================
  2989. *
  2990. * BOOL decompose_address
  2991. *
  2992. * TY_IDX aux_compute_alignment
  2993. *
  2994. * Memory intrinsics will be changing to MSTORE/MLOAD depending on
  2995. * size and aliasing.
  2996. *
  2997. * During wopt we might lose alignment information, so we go to some
  2998. * lengths here to compute a good alignment.
  2999. *
  3000. * ==================================================================== */
  3001. static BOOL decompose_address(WN *addr, WN **base, INT64 *offset)
  3002. {
  3003. switch(WN_operator(addr))
  3004. {
  3005. case OPR_ADD:
  3006. case OPR_SUB:
  3007. if (WN_is_pointer(WN_kid0(addr)) && Is_Integer_Constant(WN_kid1(addr)))
  3008. {
  3009. *base= WN_kid0(addr);
  3010. // make sure offset is positive
  3011. *offset= ABS(WN_const_val(WN_kid1(addr)));
  3012. return TRUE;
  3013. }
  3014. break;
  3015. default:
  3016. if (WN_is_pointer(addr))
  3017. {
  3018. *offset= 0;
  3019. *base= addr;
  3020. return TRUE;
  3021. }
  3022. }
  3023. return FALSE;
  3024. }
  3025. static TY_IDX aux_compute_alignment(WN *wn)
  3026. {
  3027. WN *base;
  3028. INT64 offset;
  3029. if (decompose_address(wn, &base, &offset))
  3030. {
  3031. return compute_alignment_type(base, TY_pointed(WN_ty(base)), offset);
  3032. }
  3033. return compute_alignment_type(wn, MTYPE_To_TY(MTYPE_V), 0);
  3034. }
  3035. /* ====================================================================
  3036. *
  3037. * BOOL check_size(WN *size, WN *src, WN *dst)
  3038. *
  3039. * Check if the size is a constant, and below the threshold
  3040. * of CG_memmove_inst_count or CG_memmove_align_inst_count
  3041. *
  3042. * Count src and dst (if present)
  3043. *
  3044. * ==================================================================== */
  3045. static BOOL check_size(WN *size, WN *src, WN *dst)
  3046. {
  3047. if (Is_Integer_Constant(size))
  3048. {
  3049. TY_IDX srcTY, dstTY = (TY_IDX) 0;
  3050. INT64 n = WN_const_val(size);
  3051. if (n <= CG_memmove_inst_count)
  3052. return TRUE;
  3053. srcTY = aux_compute_alignment(src);
  3054. if (dst)
  3055. {
  3056. TY_IDX dstTY = aux_compute_alignment(dst);
  3057. n += n;
  3058. }
  3059. {
  3060. TYPE_ID quantum;
  3061. INT32 copy_alignment, instructions;
  3062. copy_alignment = compute_copy_alignment(srcTY, dstTY, 0);
  3063. quantum = compute_copy_quantum(copy_alignment);
  3064. instructions= n / MTYPE_alignment(quantum);
  3065. #if defined(KEY) && defined(TARG_MIPS)
  3066. // 14283: MIPS uses ldr/ldl,sdr/sdl and lwr/lwl,swr/swl pairs
  3067. if (copy_alignment < MTYPE_alignment(quantum))
  3068. instructions *= 2;
  3069. #endif
  3070. if (instructions <= CG_memmove_inst_count)
  3071. return TRUE;
  3072. #ifdef KEY
  3073. // 14283: If alignment is good, inline more aggressively
  3074. if (copy_alignment == MTYPE_alignment(Max_Uint_Mtype) &&
  3075. instructions <= CG_memmove_align_inst_count)
  3076. return TRUE;
  3077. #endif
  3078. }
  3079. }
  3080. else if (CG_memmove_nonconst)
  3081. return TRUE;
  3082. return FALSE;
  3083. }
  3084. static void aux_memory_msg(const char *msg, WN *tree, WN *mstore)
  3085. {
  3086. #if 0
  3087. char buff[120];
  3088. INT32 n;
  3089. // This is a pretty pointless thing to do, as it's awfully noisy
  3090. // If the size is 0, we inline expansion is empty. So we don't
  3091. // get a MSTORE. Check for that case.
  3092. if (WN_operator(mstore) != OPR_MSTORE) {
  3093. sprintf (buff, "inlined %s on line %d, size = 0",
  3094. msg, Srcpos_To_Line(WN_Get_Linenum(tree)));
  3095. DevWarn (buff);
  3096. return;
  3097. }
  3098. WN *load = WN_kid0(mstore);
  3099. WN *size = WN_kid2(mstore);
  3100. n= sprintf(buff, "inlined %s on line %d, dst align=%d",
  3101. msg,
  3102. Srcpos_To_Line(WN_Get_Linenum(tree)),
  3103. TY_align(TY_pointed(WN_ty(mstore))));
  3104. if (WN_opcode(load) == OPC_MLOAD)
  3105. {
  3106. n += sprintf(&buff[n], ", src align=%d", TY_align(TY_pointed(WN_ty(load))));
  3107. }
  3108. if (Is_Integer_Constant(size))
  3109. {
  3110. n += sprintf(&buff[n], ", size = %" SCNd64 "", WN_const_val(size));
  3111. }
  3112. else
  3113. {
  3114. n += sprintf(&buff[n], "size = unknown");
  3115. }
  3116. DevWarn(buff);
  3117. #endif
  3118. }
  3119. static WN *aux_memset(WN *var, WN *con, WN *size)
  3120. {
  3121. WN *mstore, *newcon;
  3122. TY_IDX align;
  3123. Is_True(Is_Integer_Constant(con), ("expected integer constant in aux_memset"));
  3124. /*
  3125. * The mstore TY_align will determine the eventual alignment of the
  3126. * component ISTOREs, so improve the alignment if possible
  3127. */
  3128. align = aux_compute_alignment(var);
  3129. /*
  3130. * The semantics of memset require replicating the byte constant
  3131. */
  3132. newcon= WN_I1const(WN_rtype(con), WN_const_val(con));
  3133. WN_Delete(con);
  3134. TY_IDX ptr_ty = Make_Pointer_Type (align);
  3135. mstore = aux_CreateMstore(0, ptr_ty, newcon, var, size);
  3136. return mstore;
  3137. }
  3138. static WN *aux_memcpy(WN *block, WN *src, WN *dst, WN *size)
  3139. {
  3140. WN *mload, *mstore, *wn;
  3141. ST *st_src, *st_dst;
  3142. TY_IDX srcTY, dstTY;
  3143. TY_IDX srcTY_ptr, dstTY_ptr;
  3144. /*
  3145. * The TY_align will determine the eventual alignment of the
  3146. * component ILOAD/ISTOREs, so improve the alignment if possible
  3147. */
  3148. srcTY = aux_compute_alignment(src);
  3149. if (TY_size(srcTY) != 0 &&
  3150. WN_const_val(size) % TY_size(srcTY) != 0) {
  3151. // size copied is not a multiple of the size of the type, which means
  3152. // that we are copying part of the type. We then change the pointer
  3153. // to (void*)
  3154. srcTY_ptr = Make_Pointer_Type (MTYPE_To_TY (MTYPE_V));
  3155. }
  3156. else srcTY_ptr = Make_Pointer_Type(srcTY);
  3157. #if 0
  3158. // Bug 14350: Marking the pointers as restricted (see below)
  3159. // unfortunately can cause two adjacent memcpy's to overlap
  3160. // loads and stores. Insert barriers to prevent overlap.
  3161. // Bug 14358: This approach is incompatable with
  3162. // -OPT:alias=disjoint, which is often useful for performance.
  3163. // So, I need to find a different approach.
  3164. WN_INSERT_BlockLast(block, WN_CreateBarrier(TRUE, 0));
  3165. WN_INSERT_BlockLast(block, WN_CreateBarrier(FALSE, 0));
  3166. // Memcpy can assume that src and dst don't overlap.
  3167. // Copy the addresses into new pointer symbols and mark them
  3168. // restricted so alias analysis will know they don't alias.
  3169. // This is like inlining:
  3170. // memcpy(dsttype * restrict, const srctype * restrict, size_t)
  3171. Set_TY_is_restrict(srcTY_ptr);
  3172. st_src = Gen_Temp_Symbol(srcTY_ptr, ".srcptr");
  3173. wn = WN_Stid(Pointer_type, 0, st_src, srcTY_ptr, src);
  3174. WN_INSERT_BlockLast(block, wn);
  3175. wn = WN_Ldid(Pointer_type, 0, st_src, srcTY_ptr);
  3176. mload = WN_CreateMload(0, srcTY_ptr, wn, size);
  3177. dstTY = aux_compute_alignment(dst);
  3178. dstTY_ptr = Make_Pointer_Type(dstTY);
  3179. Set_TY_is_restrict(dstTY_ptr);
  3180. st_dst = Gen_Temp_Symbol(dstTY_ptr, ".dstptr");
  3181. wn = WN_Stid(Pointer_type, 0, st_dst, dstTY_ptr, dst);
  3182. WN_INSERT_BlockLast(block, wn);
  3183. wn = WN_Ldid(Pointer_type, 0, st_dst, dstTY_ptr);
  3184. mstore = aux_CreateMstore(0, dstTY_ptr, mload, wn, WN_COPY_Tree(size));
  3185. WN_INSERT_BlockLast(block, mstore);
  3186. WN_INSERT_BlockLast(block, WN_CreateBarrier(TRUE, 0));
  3187. mstore = WN_CreateBarrier(FALSE, 0);
  3188. #else
  3189. mload = WN_CreateMload(0, srcTY_ptr, src, size);
  3190. dstTY = aux_compute_alignment(dst);
  3191. dstTY_ptr = Make_Pointer_Type(dstTY);
  3192. mstore = aux_CreateMstore(0, dstTY_ptr, mload, dst, WN_COPY_Tree(size));
  3193. #endif
  3194. return mstore;
  3195. }
  3196. static WN *em_memset(WN *block, WN *tree, WN *var, WN *con, WN *size)
  3197. {
  3198. WN *em = NULL;
  3199. if (check_size(size, var, NULL) && Is_Integer_Constant(con))
  3200. {
  3201. if (em = aux_memset(var, con, size))
  3202. {
  3203. aux_memory_msg("memset()", tree, em);
  3204. WN_INSERT_BlockLast(block, em);
  3205. return WN_COPY_Tree(var);
  3206. }
  3207. }
  3208. return em;
  3209. }
  3210. static WN *em_bzero(WN *block, WN *tree, WN *var, WN *size)
  3211. {
  3212. WN *em = NULL;
  3213. if (check_size(size, var, NULL))
  3214. {
  3215. WN *zero = WN_Zerocon(MTYPE_U8);
  3216. if (em = aux_memset(var, zero, size))
  3217. {
  3218. aux_memory_msg("bzero()", tree, em);
  3219. }
  3220. else
  3221. {
  3222. WN_Delete(zero);
  3223. }
  3224. }
  3225. return em;
  3226. }
  3227. static WN *em_bcopy(WN *block, WN *tree, WN *src, WN *dst, WN *size)
  3228. {
  3229. WN *em = NULL;
  3230. if (check_size(size, src, dst))
  3231. {
  3232. if (CG_bcopy_cannot_overlap ||
  3233. !lower_is_aliased(src, dst, WN_const_val(size)))
  3234. {
  3235. if (em = aux_memcpy(block, src, dst, size)) {
  3236. aux_memory_msg("bcopy()", tree, em);
  3237. }
  3238. }
  3239. }
  3240. return em;
  3241. }
  3242. /*
  3243. * memcpy requires the src/dst to be independent.
  3244. * The implementation however, handles the overlap cases, so we should also,
  3245. * unless we can prove otherwise or the user forces us not to.
  3246. */
  3247. // KEY: The above comment is wrong. For memcpy, src and dest may not
  3248. // overlap.
  3249. static WN *em_memcpy(WN *block, WN *tree, WN *dst, WN *src, WN *size)
  3250. {
  3251. if (check_size(size, src, dst))
  3252. {
  3253. #ifdef KEY
  3254. // See bugs 3510, 3924, and 14283
  3255. if ( CG_memcpy_cannot_overlap || // TRUE
  3256. ( Is_Integer_Constant(size) &&
  3257. ! lower_is_aliased(src, dst, WN_const_val(size)) ) )
  3258. #else
  3259. if (CG_memcpy_cannot_overlap ||
  3260. !lower_is_aliased(src, dst, WN_const_val(size)))
  3261. #endif
  3262. {
  3263. if (WN *em = aux_memcpy(block, src, dst, size)) {
  3264. aux_memory_msg("memcpy()", tree, em);
  3265. WN_INSERT_BlockLast(block, em);
  3266. return WN_COPY_Tree(dst);
  3267. }
  3268. }
  3269. }
  3270. return NULL;
  3271. }
  3272. /*
  3273. * memmov does indeed handle overlaping cases.
  3274. * We generate mload/mstore only when we can prove there is no overlap
  3275. * or the user forces us to.
  3276. */
  3277. static WN *em_memmove(WN *block, WN *tree, WN *dst, WN *src, WN *size)
  3278. {
  3279. if (check_size(size, src, dst))
  3280. {
  3281. if (CG_memmove_cannot_overlap ||
  3282. !lower_is_aliased(src, dst, WN_const_val(size)))
  3283. {
  3284. if (WN *em = aux_memcpy(block, src, dst, size)) {
  3285. aux_memory_msg("memmove()", tree, em);
  3286. WN_INSERT_BlockLast(block, em);
  3287. return WN_COPY_Tree(dst);
  3288. }
  3289. }
  3290. }
  3291. return NULL;
  3292. }
  3293. #ifdef TARG_X8664
  3294. extern /* defined in data_layout.cxx */
  3295. ST *Get_Vararg_Save_Area_Info(int &fixed_int_parms, int &fixed_float_parms,
  3296. ST *&upformal);
  3297. /* va_start under the X86-64 ABI */
  3298. static WN *em_x8664_va_start(WN *block, WN *ap)
  3299. {
  3300. TY_IDX ty_idx;
  3301. // TY_IDX va_list_struct_ty;
  3302. INT fixed_int_parms, fixed_float_parms;
  3303. BOOL direct;
  3304. BOOL non_leaf = FALSE;
  3305. if (WN_operator(ap) == OPR_LDA) {
  3306. ty_idx = WN_ty(ap);
  3307. Is_True(TY_kind(ty_idx) == KIND_POINTER,
  3308. ("em_x8664_va_start: argument not of pointer type"));
  3309. ty_idx = TY_pointed(ty_idx);
  3310. #if 0 // bug 10098
  3311. Is_True(TY_kind(ty_idx) == KIND_ARRAY && TY_size(ty_idx) == 24,
  3312. ("em_x8664_va_start: argument pointer does not point to type va_list"));
  3313. #endif
  3314. direct = TRUE;
  3315. // va_list_struct_ty = TY_etype(ty_idx);
  3316. }
  3317. else if (WN_operator(ap) == OPR_LDID) {
  3318. ty_idx = WN_ty(ap);
  3319. Is_True(TY_kind(ty_idx) == KIND_POINTER,
  3320. ("em_x8664_va_start: argument not of pointer type"));
  3321. ty_idx = TY_pointed(ty_idx);
  3322. Is_True(TY_size(ty_idx) == 24,
  3323. ("em_x8664_va_start: argument pointer does not point to type va_list"));
  3324. direct = FALSE;
  3325. // va_list_struct_ty = ty_idx;
  3326. }
  3327. else { // bug 3147
  3328. non_leaf = TRUE;
  3329. direct = FALSE;
  3330. }
  3331. ST *upformal;
  3332. ST *reg_save_area = Get_Vararg_Save_Area_Info(fixed_int_parms, fixed_float_parms, upformal);
  3333. WN *wn;
  3334. WN *addr;
  3335. wn = WN_Intconst(MTYPE_I4, fixed_int_parms * 8);
  3336. if (direct)
  3337. wn = WN_Stid(MTYPE_I4, WN_offset(ap), WN_st(ap), MTYPE_To_TY(MTYPE_I4), wn);
  3338. else {
  3339. if (! non_leaf)
  3340. addr = WN_Ldid(Pointer_Mtype, WN_offset(ap), WN_st(ap), WN_ty(ap));
  3341. else addr = WN_COPY_Tree(ap);
  3342. wn = WN_Istore(MTYPE_I4, 0, Make_Pointer_Type(MTYPE_To_TY(MTYPE_I4)),
  3343. addr, wn);
  3344. }
  3345. WN_INSERT_BlockLast(block, wn);
  3346. wn = WN_Intconst(MTYPE_I4, fixed_float_parms * 16 + 48);
  3347. if (direct)
  3348. wn = WN_Stid(MTYPE_I4, 4 + WN_offset(ap), WN_st(ap), MTYPE_To_TY(MTYPE_I4), wn);
  3349. else {
  3350. if (! non_leaf)
  3351. addr = WN_Ldid(Pointer_Mtype, WN_offset(ap), WN_st(ap), WN_ty(ap));
  3352. else addr = WN_COPY_Tree(ap);
  3353. wn = WN_Istore(MTYPE_I4, 4, Make_Pointer_Type(MTYPE_To_TY(MTYPE_I4)),
  3354. addr, wn);
  3355. }
  3356. WN_INSERT_BlockLast(block, wn);
  3357. wn = WN_Lda(Pointer_Mtype, STB_size(upformal), upformal);
  3358. if (direct)
  3359. wn = WN_Stid(Pointer_Mtype, 8 + WN_offset(ap), WN_st(ap), MTYPE_To_TY(Pointer_Mtype), wn);
  3360. else {
  3361. if (! non_leaf)
  3362. addr = WN_Ldid(Pointer_Mtype, WN_offset(ap), WN_st(ap), WN_ty(ap));
  3363. else addr = WN_COPY_Tree(ap);
  3364. wn = WN_Istore(Pointer_Mtype, 8,
  3365. Make_Pointer_Type(MTYPE_To_TY(Pointer_Mtype)), addr, wn);
  3366. }
  3367. if (ST_sclass(upformal) == SCLASS_UNKNOWN)
  3368. Set_ST_sclass (upformal, SCLASS_FORMAL);
  3369. if (reg_save_area) {
  3370. WN_INSERT_BlockLast(block, wn);
  3371. if (TY_size(ST_type(reg_save_area)) == 8)
  3372. wn = WN_Lda(Pointer_Mtype, -(fixed_int_parms * 8), reg_save_area);
  3373. else wn = WN_Lda(Pointer_Mtype, -(fixed_float_parms*16)-48, reg_save_area);
  3374. if (direct)
  3375. wn = WN_Stid(Pointer_Mtype, 16 + WN_offset(ap), WN_st(ap), MTYPE_To_TY(Pointer_Mtype),wn);
  3376. else {
  3377. if (! non_leaf)
  3378. addr = WN_Ldid(Pointer_Mtype, WN_offset(ap), WN_st(ap), WN_ty(ap));
  3379. else addr = WN_COPY_Tree(ap);
  3380. wn = WN_Istore(Pointer_Mtype, 16,
  3381. Make_Pointer_Type(MTYPE_To_TY(Pointer_Mtype)), addr, wn);
  3382. }
  3383. }
  3384. return wn;
  3385. }
  3386. #endif
  3387. /* ====================================================================
  3388. *
  3389. * COERCE INTRN_coerce_runtime(WN *tree, INT32 arg)
  3390. *
  3391. * Given an intrinsic or expression that will map to an emulation
  3392. * routine, return the action required for mapping the arguments
  3393. * to the runtime routine
  3394. * ==================================================================== */
  3395. static COERCE INTR_coerce_runtime(WN *tree, INT32 arg)
  3396. {
  3397. if (OPCODE_is_intrinsic(WN_opcode(tree)))
  3398. {
  3399. INT32 i;
  3400. INTRINSIC id = (INTRINSIC) WN_intrinsic(tree);
  3401. for(i=0; i < intrinsic_runtime_formals_size; i++)
  3402. {
  3403. if (id == INTR_id(i))
  3404. {
  3405. switch(arg)
  3406. {
  3407. case 0: return INTR_coerce0(i);
  3408. case 1: return INTR_coerce1(i);
  3409. default:
  3410. Is_True(FALSE, ("INTR_coerce_runtime, arg >1"));
  3411. }
  3412. }
  3413. }
  3414. return COERCE_none;
  3415. }
  3416. else
  3417. {
  3418. /*
  3419. * the coercion should be valid for all arguments
  3420. * otherwise the table must change
  3421. */
  3422. EMULATION id = WN_emulation(tree);
  3423. Is_True((EM_id(id) == id), ("em_routine table in a sorry state"));
  3424. return EM_coerce0(id);
  3425. }
  3426. }
  3427. /* ====================================================================
  3428. *
  3429. * COERCE INTRN_actual(WN *tree, INT32 arg)
  3430. *
  3431. * Given an intrinsic or expression that will map to an emulation
  3432. * routine, return the arguments types the FE will supply
  3433. * to this intrinsic/expression
  3434. * ==================================================================== */
  3435. static TYPE_ID INTR_parameter_type(WN *tree, INT32 arg)
  3436. {
  3437. if (OPCODE_is_intrinsic(WN_opcode(tree)))
  3438. {
  3439. INT32 i;
  3440. INTRINSIC id = (INTRINSIC) WN_intrinsic(tree);
  3441. for(i=0; i < intrinsic_parameter_type_size; i++)
  3442. {
  3443. if (id == INTR_parm_id(i))
  3444. {
  3445. switch(arg)
  3446. {
  3447. case 0: return INTR_parmtype0(i);
  3448. case 1: return INTR_parmtype1(i);
  3449. case 2: return INTR_parmtype2(i);
  3450. default:
  3451. /*
  3452. * take a wild guess
  3453. * if seems that the intrinsics are uniform
  3454. * (all arguments are the same)
  3455. */
  3456. return INTR_parmtype0(i);
  3457. }
  3458. }
  3459. }
  3460. Is_True(FALSE, ("INTR_parameter_type unknown for %s", INTR_intrinsic_name(tree)));
  3461. }
  3462. else
  3463. {
  3464. Is_True(FALSE, ("INTR_parameter_type not used for call by value"));
  3465. }
  3466. return MTYPE_V;
  3467. }
  3468. /* ====================================================================
  3469. *
  3470. * char *INTR_intrinsic_name(WN *tree)
  3471. *
  3472. * Given an intrinsic or expression that will map to an emulation
  3473. * routine, return the action required for mapping the arguments
  3474. * to the runtime routine
  3475. *
  3476. * ==================================================================== */
  3477. extern const char * INTR_intrinsic_name(WN *tree)
  3478. {
  3479. if (OPCODE_is_intrinsic(WN_opcode(tree)))
  3480. {
  3481. INTRINSIC id = (INTRINSIC) WN_intrinsic(tree);
  3482. return INTRN_rt_name(id);
  3483. }
  3484. else
  3485. {
  3486. EMULATION id = WN_emulation(tree);
  3487. Is_True((EM_id(id) == id), ("em_routine table in a sorry state"));
  3488. return EM_rt_name(id);
  3489. }
  3490. }
  3491. static void
  3492. Set_intrinsic_flags (ST *st, WN *tree)
  3493. {
  3494. PU& pu = Pu_Table[ST_pu (st)];
  3495. if (OPCODE_is_intrinsic(WN_opcode(tree))) {
  3496. INTRINSIC id = (INTRINSIC) WN_intrinsic(tree);
  3497. /**************************
  3498. one day
  3499. if (INTRN_never_returns(id))
  3500. flags |= ???
  3501. **************************/
  3502. if (INTRN_is_pure(id))
  3503. Set_PU_is_pure (pu);
  3504. if (INTRN_has_no_side_effects(id))
  3505. Set_PU_no_side_effects (pu);
  3506. } else {
  3507. EMULATION id = WN_emulation(tree);
  3508. if (EM_attributes(id) & PU_IS_PURE)
  3509. Set_PU_is_pure (pu);
  3510. if (EM_attributes(id) & NSE)
  3511. Set_PU_no_side_effects (pu);
  3512. }
  3513. }
  3514. /* ====================================================================
  3515. *
  3516. * WN * by_value (WN *tree, INT32 arg)
  3517. *
  3518. * Given an intrinsic that will map to an emulation
  3519. * routine, return a by value argument.
  3520. * ==================================================================== */
  3521. static WN *by_value(WN *tree, INT32 arg)
  3522. {
  3523. TYPE_ID type;
  3524. INTRINSIC id = (INTRINSIC) WN_intrinsic(tree);
  3525. WN *child= WN_arg(tree, arg);
  3526. Is_True((OPCODE_is_intrinsic(WN_opcode(tree))),
  3527. ("expected intrinsic call node, not %s", OPCODE_name(WN_opcode(tree))));
  3528. if (INTRN_by_value(id)==TRUE)
  3529. {
  3530. return child;
  3531. }
  3532. type = INTR_parameter_type(tree, arg);
  3533. Is_True((type != MTYPE_V), ("unexpected void type"));
  3534. if (WN_operator_is(child, OPR_LDA))
  3535. {
  3536. return WN_Ldid(type,
  3537. WN_lda_offset(child),
  3538. WN_st(child),
  3539. MTYPE_To_TY(type));
  3540. }
  3541. return WN_Iload(type, 0, MTYPE_To_TY(type), child);
  3542. }
  3543. /* ====================================================================
  3544. *
  3545. * WN *return_conversion(INTRINSIC id, TYPE_ID rtype, WN *function)
  3546. *
  3547. * Coerce the return type to match the INTRINSIC return.
  3548. * Necessary for correctness
  3549. *
  3550. * ex. I1SHFT(125, 1)
  3551. * 0d127 << 1
  3552. * should return a negative value (-2)
  3553. * ==================================================================== */
  3554. static WN *return_conversion(INTRINSIC id, TYPE_ID type, WN *function)
  3555. {
  3556. if (MTYPE_is_integral(type))
  3557. {
  3558. TYPE_ID return_type = INTR_return_mtype(id);
  3559. if (MTYPE_is_integral(return_type))
  3560. {
  3561. if (return_type != type)
  3562. {
  3563. return WN_Type_Conversion(function, return_type);
  3564. }
  3565. else if (WN_rtype(function) != type)
  3566. {
  3567. DevWarn("Unexpected return_conversion() while processing intrinsic %s",
  3568. INTRN_rt_name(id));
  3569. return WN_Type_Conversion(function, return_type);
  3570. }
  3571. }
  3572. }
  3573. return function;
  3574. }
  3575. /* ====================================================================
  3576. *
  3577. * WN *make_pointer_to_node(WN *block, WN *tree)
  3578. *
  3579. * Return the address of tree.
  3580. * for ILOAD/ISTORE we can use the address directly
  3581. * for LDID/STID we can create an LDA
  3582. * otherwise we need to create an addressable temp, store to it
  3583. * and try again.
  3584. * ==================================================================== */
  3585. extern WN *make_pointer_to_node(WN *block, WN *tree)
  3586. {
  3587. switch (WN_operator(tree))
  3588. {
  3589. case OPR_ILOAD:
  3590. return WN_kid0(tree);
  3591. case OPR_ISTORE:
  3592. return WN_kid1(tree);
  3593. case OPR_LDID:
  3594. return WN_Lda(Pointer_type, WN_load_offset(tree), WN_st(tree));
  3595. case OPR_STID:
  3596. return WN_Lda(Pointer_type, WN_store_offset(tree), WN_st(tree));
  3597. case OPR_ARRAY:
  3598. case OPR_LDA:
  3599. return tree;
  3600. default:
  3601. {
  3602. TYPE_ID type = WN_rtype(tree);
  3603. ST *st = Gen_Temp_Symbol( MTYPE_To_TY(type), "complex-temp-expr");
  3604. WN *stid;
  3605. Is_True((WN_operator_is(tree, OPR_PARM)==FALSE),("bad parm"));
  3606. /*
  3607. * store value to an addressible temporary, and take the address of that
  3608. */
  3609. stid = WN_Stid (type, 0, st, ST_type(st), tree);
  3610. WN_INSERT_BlockLast(block, stid);
  3611. return WN_Lda(Pointer_type, WN_store_offset(stid), st);
  3612. }
  3613. }
  3614. }
  3615. /* ====================================================================
  3616. *
  3617. * ST *GenLocalTable(WN *block, TYPE_ID element, INT32 n, WN **init)
  3618. *
  3619. * Create a local array of TYPE_ID elements (size n),
  3620. * and initialize them to init.
  3621. *
  3622. * ==================================================================== */
  3623. static ST *GenLocalTable(WN *block, TYPE_ID element, INT32 n, WN **init)
  3624. {
  3625. INT32 i;
  3626. TY_IDX arrayTY;
  3627. ST *arrayST;
  3628. arrayTY = Make_Array_Type (element, 1, n);
  3629. {
  3630. static INT32 table_cnt = 0;
  3631. char buffer[32];
  3632. sprintf(buffer, "localtable.%d", ++table_cnt);
  3633. arrayST = Gen_Temp_Symbol(arrayTY, buffer);
  3634. }
  3635. for(i=0; i<n; i++)
  3636. {
  3637. WN *st;
  3638. st = WN_Stid(element,
  3639. i * MTYPE_RegisterSize(element),
  3640. arrayST,
  3641. arrayTY,
  3642. init[i]);
  3643. WN_INSERT_BlockLast(block, st);
  3644. }
  3645. return arrayST;
  3646. }
  3647. /* ====================================================================
  3648. *
  3649. * concatexpr (really s_cat) has to be interpreted, as the arguments
  3650. * do not match
  3651. *
  3652. * CONCATEXPR( char *dst, int dstSize,
  3653. * char *src0, char *src1, ... char *srcN,
  3654. * int src0Size, int src1Size, ... int srcNSize)
  3655. * vrs.
  3656. *
  3657. * s_cat( char *dst,
  3658. * char *src[], vector of chars
  3659. * int srcSize[], vector of sizes
  3660. * int *srcN, number of vectors
  3661. * int dstSize)
  3662. *
  3663. * ==================================================================== */
  3664. static WN *process_concatexpr(WN *block, WN *tree)
  3665. {
  3666. INT16 nsrc;
  3667. WN **wn;
  3668. WN *srcAddr, *srcSize, *srcNp;
  3669. nsrc = (WN_kid_count(tree)-2) / 2;
  3670. wn = (WN **) alloca(nsrc * sizeof(WN));
  3671. {
  3672. /*
  3673. * create a pointer to a table of addresses (src[])
  3674. */
  3675. ST *srcST;
  3676. INT32 i;
  3677. for(i=0; i<nsrc; i++)
  3678. wn[i] = WN_arg(tree, i+2);
  3679. srcST = GenLocalTable(block, Pointer_type, nsrc, wn);
  3680. srcAddr = WN_Lda(Pointer_type, 0, srcST);
  3681. }
  3682. {
  3683. /*
  3684. * create a pointer to a table of sizes (srcSize[])
  3685. */
  3686. ST *sizeST;
  3687. INT32 i;
  3688. for(i=0; i<nsrc; i++)
  3689. wn[i] = WN_arg(tree, i+2+nsrc);
  3690. sizeST = GenLocalTable(block, Integer_type, nsrc, wn);
  3691. srcSize = WN_Lda(Pointer_type, 0, sizeST);
  3692. }
  3693. {
  3694. WN *count;
  3695. count = WN_Intconst(Integer_type, nsrc);
  3696. /* pointer to srcN */
  3697. srcNp = make_pointer_to_node(block, count);
  3698. }
  3699. /*
  3700. * s_cat will always have 5 arguments
  3701. */
  3702. {
  3703. WN *kids[5];
  3704. WN *s_cat;
  3705. INTRINSIC id = (INTRINSIC) WN_intrinsic(tree);
  3706. BOOL byvalue = INTRN_by_value(id);
  3707. kids[0] = WN_kid(tree, 0);
  3708. kids[1] = createParm( srcAddr, byvalue );
  3709. kids[2] = createParm( srcSize, byvalue );
  3710. kids[3] = createParm( srcNp, byvalue );
  3711. kids[4] = WN_kid(tree, 1);
  3712. s_cat = WN_Create_Intrinsic(WN_opcode(tree), id, 5, kids);
  3713. return s_cat;
  3714. }
  3715. }
  3716. /* ====================================================================
  3717. *
  3718. * Annotate_Weak_Runtime
  3719. *
  3720. * Check the runtime routine named against a list of those which need
  3721. * to be weak. At this time, this list includes:
  3722. *
  3723. * __C_runtime_error: A runtime routine for reporting errors (e.g.
  3724. * subscript range violations, divide by zero). This is added to
  3725. * libc with Irix 6.5, and is invoked only if compilations are
  3726. * done with -DEBUG:verbose_runtime. If such programs are run on
  3727. * older Irix versions, the weak symbol will resolve to zero and
  3728. * the error will result in a SIGSEGV instead.
  3729. *
  3730. * ====================================================================
  3731. */
  3732. static const char *Weak_Runtimes[] = {
  3733. "__C_runtime_error",
  3734. NULL /* List must be null-terminated */
  3735. };
  3736. static void
  3737. Annotate_Weak_Runtime ( ST *func, const char *name )
  3738. {
  3739. const char **weak_rt = Weak_Runtimes;
  3740. while ( *weak_rt != NULL ) {
  3741. if ( strcmp ( *weak_rt, name ) == 0 ) {
  3742. Set_ST_is_weak_symbol ( func );
  3743. return;
  3744. }
  3745. ++weak_rt;
  3746. }
  3747. }
  3748. /* ====================================================================
  3749. *
  3750. * WN *intrinsic_runtime(WN *block, WN *tree)
  3751. *
  3752. * Lower tree into a call (there is no going back now)
  3753. * The tree may be an expression or more likely an intrinsic
  3754. *
  3755. * ==================================================================== */
  3756. extern WN *intrinsic_runtime(WN *block, WN *tree)
  3757. {
  3758. INT16 n;
  3759. INT16 argC = 0;
  3760. WN *args[MAX_INTRINSIC_ARGS];
  3761. const char *function = INTR_intrinsic_name(tree);
  3762. BOOL byvalue = FALSE;
  3763. BOOL parmMod= FALSE;
  3764. Is_True((function), ("cannot emulate (null function)"));
  3765. if (OPCODE_is_intrinsic(WN_opcode(tree)))
  3766. {
  3767. switch(WN_intrinsic(tree))
  3768. {
  3769. case INTRN_CONCATEXPR:
  3770. tree = process_concatexpr(block, tree);
  3771. break;
  3772. }
  3773. byvalue = INTRN_by_value(WN_intrinsic(tree));
  3774. }
  3775. {
  3776. /*
  3777. * for complex quad we must create a dummy argument
  3778. * that has the address of a compiler temp
  3779. * This is known in the FE as RSTYLE_VIA_FIRST_ARG
  3780. * The parameter is by reference
  3781. */
  3782. TYPE_ID rtype = WN_rtype(tree);
  3783. switch(rtype)
  3784. {
  3785. #ifdef TARG_X8664
  3786. case MTYPE_C4:
  3787. case MTYPE_C8:
  3788. if (Is_Target_64bit())
  3789. break;
  3790. // fall thru
  3791. #endif
  3792. case MTYPE_CQ:
  3793. case MTYPE_C16:
  3794. {
  3795. ST *retST = Gen_Temp_Symbol(MTYPE_To_TY(rtype), "return_temp");
  3796. Set_BE_ST_addr_used_locally(retST);
  3797. args[ argC++] = createParm(WN_Lda(Pointer_type, 0, retST), FALSE);
  3798. parmMod= TRUE;
  3799. }
  3800. break;
  3801. default:
  3802. break;
  3803. }
  3804. }
  3805. for(n = 0; n < WN_num_actuals(tree); n++)
  3806. {
  3807. WN *actual = WN_kid(tree, n);
  3808. COERCE coerce = INTR_coerce_runtime(tree, n);
  3809. switch(coerce)
  3810. {
  3811. case COERCE_none:
  3812. args[ argC++] = actual;
  3813. break;
  3814. case COERCE_struct_by_value:
  3815. {
  3816. TYPE_ID type = WN_rtype(actual);
  3817. TY_IDX complexTY;
  3818. WN *mload;
  3819. INT32 size = MTYPE_size_min(type) >> 3;
  3820. /*
  3821. * create a structure type that looks like complex, but has
  3822. * btype of M, as lower call expects this type
  3823. */
  3824. TY& ty = New_TY (complexTY);
  3825. ty = Ty_Table[MTYPE_To_TY(type)];
  3826. Set_TY_mtype (ty, MTYPE_M);
  3827. Set_TY_align_exp (complexTY, 3);
  3828. TY_IDX complex_ptr = Make_Pointer_Type(complexTY);
  3829. if (WN_operator_is(actual, OPR_PARM))
  3830. actual = WN_kid0(actual);
  3831. mload = WN_CreateMload(0, complex_ptr,
  3832. make_pointer_to_node(block, actual),
  3833. WN_Intconst(Integer_type, size));
  3834. args[ argC++] = mload;
  3835. }
  3836. case COERCE_struct_by_reference:
  3837. break;
  3838. case COERCE_split_complex:
  3839. {
  3840. TYPE_ID type = WN_rtype(actual);
  3841. TYPE_ID rtype = Mtype_complex_to_real(type);
  3842. PREG_NUM valN;
  3843. if (WN_operator_is(actual, OPR_PARM))
  3844. actual = WN_kid0(actual);
  3845. valN = AssignExpr(block, actual, type);
  3846. args[ argC++] = WN_Realpart(rtype, WN_LdidPreg(type, valN));
  3847. args[ argC++] = WN_Imagpart(rtype, WN_LdidPreg(type, valN));
  3848. }
  3849. break;
  3850. default:
  3851. Fail_FmtAssertion("coerce case does not exist coerce = %d", coerce);
  3852. }
  3853. }
  3854. {
  3855. TYPE_ID rtype = WN_rtype(tree);
  3856. TY_IDX ty = Make_Function_Type( MTYPE_To_TY(rtype));
  3857. #ifdef TARG_X8664
  3858. // The return type is set up correctly in Make_Function_Type (above).
  3859. // (-m32 expects that the return type be setup correctly).
  3860. // Currently, there is no need for setting up the parameter list
  3861. // for the intrinsics before setting TY_has_prototype.
  3862. // lower_call for x86/x8664 expects every function to have a prototype
  3863. // and assumes a vararg function if there is no prorotype.
  3864. Set_TY_has_prototype(ty);
  3865. ST* st = NULL;
  3866. /* Do the following conversion if either of -ffast-math or -OPT:fast_math
  3867. are specified.
  3868. cos -> fastcos
  3869. exp -> fastexp
  3870. expf -> fastexpf
  3871. log -> fastlog
  3872. logf -> fastlogf
  3873. pow -> fastpow
  3874. powf -> fastpowf
  3875. sin -> fastsin
  3876. sincos -> fastsincos
  3877. (Bug 4680)
  3878. */
  3879. if( Is_Target_64bit() &&
  3880. !Is_Target_Anyx86() &&
  3881. OPT_Fast_Math &&
  3882. ( WN_intrinsic(tree) == INTRN_F8COS ||
  3883. WN_intrinsic(tree) == INTRN_F8EXP ||
  3884. WN_intrinsic(tree) == INTRN_F4EXP ||
  3885. WN_intrinsic(tree) == INTRN_F8LOG ||
  3886. WN_intrinsic(tree) == INTRN_F4LOG ||
  3887. WN_intrinsic(tree) == INTRN_F8EXPEXPR ||
  3888. WN_intrinsic(tree) == INTRN_F4EXPEXPR ||
  3889. WN_intrinsic(tree) == INTRN_F8SIN ||
  3890. WN_intrinsic(tree) == INTRN_SINCOS ||
  3891. WN_intrinsic(tree) == INTRN_F8VSIN ||
  3892. WN_intrinsic(tree) == INTRN_F8VCOS ||
  3893. WN_intrinsic(tree) == INTRN_F8VEXP ||
  3894. WN_intrinsic(tree) == INTRN_F4VEXP ||
  3895. WN_intrinsic(tree) == INTRN_F4VLOG ||
  3896. WN_intrinsic(tree) == INTRN_F8VLOG ) ) {
  3897. BOOL vector_call_check_constant_stride = FALSE;
  3898. switch (WN_intrinsic(tree)) {
  3899. case INTRN_F8COS: st = Gen_Intrinsic_Function(ty, "fastcos"); break;
  3900. case INTRN_F8EXP: st = Gen_Intrinsic_Function(ty, "fastexp"); break;
  3901. case INTRN_F4EXP: st = Gen_Intrinsic_Function(ty, "fastexpf"); break;
  3902. case INTRN_F8LOG: st = Gen_Intrinsic_Function(ty, "fastlog"); break;
  3903. case INTRN_F4LOG: st = Gen_Intrinsic_Function(ty, "fastlogf"); break;
  3904. case INTRN_F8EXPEXPR:st = Gen_Intrinsic_Function(ty, "fastpow"); break;
  3905. case INTRN_F4EXPEXPR:st = Gen_Intrinsic_Function(ty, "fastpowf"); break;
  3906. case INTRN_F8SIN: st = Gen_Intrinsic_Function(ty, "fastsin"); break;
  3907. case INTRN_SINCOS: st = Gen_Intrinsic_Function(ty, "fastsincos");break;
  3908. case INTRN_F8VSIN:
  3909. st = Gen_Intrinsic_Function(ty, "vrda_sin");
  3910. vector_call_check_constant_stride = TRUE;
  3911. break;
  3912. case INTRN_F8VCOS:
  3913. st = Gen_Intrinsic_Function(ty, "vrda_cos");
  3914. vector_call_check_constant_stride = TRUE;
  3915. break;
  3916. case INTRN_F8VEXP:
  3917. st = Gen_Intrinsic_Function(ty, "vrda_exp");
  3918. vector_call_check_constant_stride = TRUE;
  3919. break;
  3920. case INTRN_F4VEXP:
  3921. st = Gen_Intrinsic_Function(ty, "vrsa_expf");
  3922. vector_call_check_constant_stride = TRUE;
  3923. break;
  3924. case INTRN_F8VLOG:
  3925. st = Gen_Intrinsic_Function(ty, "vrda_log");
  3926. vector_call_check_constant_stride = TRUE;
  3927. break;
  3928. case INTRN_F4VLOG:
  3929. st = Gen_Intrinsic_Function(ty, "vrsa_logf");
  3930. vector_call_check_constant_stride = TRUE;
  3931. break;
  3932. }
  3933. if ( vector_call_check_constant_stride ) {
  3934. WN* x = WN_kid(tree, 0); // opnd
  3935. WN* y = WN_kid(tree, 1); // result
  3936. WN* count = WN_kid(tree, 2);
  3937. WN* stridex = WN_kid(tree, 3);
  3938. WN* stridey = WN_kid(tree, 4);
  3939. if ( WN_operator(WN_kid0(stridex)) != OPR_INTCONST ||
  3940. WN_operator(WN_kid0(stridey)) != OPR_INTCONST ||
  3941. WN_const_val(WN_kid0(stridex)) != 1 ||
  3942. WN_const_val(WN_kid0(stridey)) != 1 )
  3943. st = Gen_Intrinsic_Function(ty, function);
  3944. else {
  3945. args[ 0 ] = count;
  3946. args[ 1 ] = x;
  3947. args[ 2 ] = y;
  3948. argC = 3;
  3949. }
  3950. }
  3951. // Rename memset to the PathScale optimized memset.
  3952. } else if (WN_intrinsic(tree) == INTRN_MEMSET &&
  3953. OPT_Fast_Stdlib &&
  3954. Is_Target_64bit()) {
  3955. if (Is_Target_EM64T() || Is_Target_Core() || Is_Target_Wolfdale())
  3956. st = Gen_Intrinsic_Function(ty, "memset.pathscale.em64t");
  3957. else
  3958. st = Gen_Intrinsic_Function(ty, "memset.pathscale.opteron");
  3959. // Rename memcpy to the PathScale optimized memcpy.
  3960. } else if (WN_intrinsic(tree) == INTRN_MEMCPY &&
  3961. OPT_Fast_Stdlib &&
  3962. Is_Target_64bit()) {
  3963. if (Is_Target_EM64T() || Is_Target_Core() || Is_Target_Wolfdale())
  3964. st = Gen_Intrinsic_Function(ty, "__memcpy_pathscale_em64t");
  3965. else
  3966. st = Gen_Intrinsic_Function(ty, "__memcpy_pathscale_opteron");
  3967. } else if (WN_intrinsic(tree) == INTRN_POPCOUNT &&
  3968. MTYPE_byte_size(WN_rtype(WN_kid0(tree))) <= 4 &&
  3969. Is_Target_32bit()) {
  3970. st = Gen_Intrinsic_Function(ty, "__popcountsi2");
  3971. } else if (WN_intrinsic(tree) == INTRN_PARITY &&
  3972. MTYPE_byte_size(WN_rtype(WN_kid0(tree))) <= 4 &&
  3973. Is_Target_32bit()) {
  3974. st = Gen_Intrinsic_Function(ty, "__paritysi2");
  3975. } else {
  3976. st = Gen_Intrinsic_Function(ty, function);
  3977. }
  3978. #elif defined(TARG_MIPS)
  3979. ST *st = NULL;
  3980. #if 0 // Using __popcountsi2 fails at link-time on cross-compiler.
  3981. if (WN_intrinsic(tree) == INTRN_POPCOUNT &&
  3982. MTYPE_byte_size(WN_rtype(WN_kid0(tree))) <= 4) {
  3983. st = Gen_Intrinsic_Function(ty, "__popcountsi2");
  3984. }
  3985. #else
  3986. if (WN_intrinsic(tree) == INTRN_POPCOUNT &&
  3987. MTYPE_byte_size(WN_rtype(WN_kid0(tree))) <= 4) {
  3988. // Zero extend U4 to U8
  3989. // args[0] = WN_Cvt(MTYPE_U4, MTYPE_U8, args[0]);
  3990. // Using __popcountsi2 fails at link-time on cross-compiler.
  3991. WN *wn_cvt = WN_Cvt( MTYPE_U4, MTYPE_U8, WN_kid0( args[0] ) );
  3992. args[0] = WN_CreateParm( MTYPE_U8, wn_cvt,
  3993. MTYPE_To_TY( MTYPE_U8 ), WN_PARM_BY_VALUE );
  3994. }
  3995. #endif
  3996. // Rename memset/memcpy/strcpy to the SiCortex optimized versions.
  3997. // Also see code near top of "emulate" for bug 14529.
  3998. else if (OPT_Fast_Stdlib) {
  3999. switch (WN_intrinsic(tree)) {
  4000. case INTRN_MEMSET:
  4001. st = Gen_Intrinsic_Function(ty, "sc_memset"); break;
  4002. case INTRN_MEMCPY:
  4003. st = Gen_Intrinsic_Function(ty, "sc_memcpy"); break;
  4004. case INTRN_MEMMOVE:
  4005. st = Gen_Intrinsic_Function(ty, "sc_memmove"); break;
  4006. case INTRN_STRCAT:
  4007. st = Gen_Intrinsic_Function(ty, "sc_strcat"); break;
  4008. case INTRN_STRCHR:
  4009. st = Gen_Intrinsic_Function(ty, "sc_strchr"); break;
  4010. case INTRN_STRCPY:
  4011. st = Gen_Intrinsic_Function(ty, "sc_strcpy"); break;
  4012. case INTRN_STRLEN:
  4013. st = Gen_Intrinsic_Function(ty, "sc_strlen"); break;
  4014. case INTRN_BZERO:
  4015. st = Gen_Intrinsic_Function(ty, "sc_bzero"); break;
  4016. case INTRN_BCOPY:
  4017. st = Gen_Intrinsic_Function(ty, "sc_bcopy"); break;
  4018. }
  4019. }
  4020. if (st == NULL) st = Gen_Intrinsic_Function(ty, function);
  4021. #else
  4022. ST *st = Gen_Intrinsic_Function(ty, function);
  4023. #endif // KEY
  4024. WN *call;
  4025. /*
  4026. * annotate st flags with NO_SIDE_EFFECTS, IS_PURE etc.
  4027. */
  4028. Set_intrinsic_flags (st, tree);
  4029. Annotate_Weak_Runtime ( st, function );
  4030. #ifdef TARG_X8664
  4031. if (! Is_Target_64bit()) { // leave any complex type as is
  4032. call = WN_Create(OPR_CALL, rtype, MTYPE_V, argC);
  4033. WN_st_idx(call) = ST_st_idx(st);
  4034. }
  4035. else
  4036. #endif
  4037. call = WN_Call(rtype, MTYPE_V, argC, st);
  4038. WN_call_flag(call) = WN_call_flag(tree);
  4039. WN_annotate_call_flags(call, st);
  4040. /*
  4041. * The annotations on CQ intrinsics are not correct.
  4042. * They do modify their parameter
  4043. */
  4044. if (parmMod)
  4045. WN_Set_Call_Parm_Mod(call);
  4046. while (--argC >= 0)
  4047. {
  4048. WN_actual(call, argC) = createParm(args[ argC], byvalue);
  4049. }
  4050. // Update feedback
  4051. if ( Cur_PU_Feedback )
  4052. Cur_PU_Feedback->FB_set_in_out_same_node( call );
  4053. return call;
  4054. }
  4055. }
  4056. /* ====================================================================
  4057. *
  4058. *
  4059. *
  4060. * ==================================================================== */
  4061. #ifdef TARG_MIPS
  4062. extern WN * em_quad_intrn_cmp(WN *, WN *);
  4063. #endif
  4064. static WN *emulate_intrinsic_op(WN *block, WN *tree)
  4065. {
  4066. INTRINSIC id = (INTRINSIC) WN_intrinsic(tree);
  4067. TYPE_ID rtype = WN_rtype(tree);
  4068. WN *function;
  4069. Is_True((OPCODE_is_intrinsic(WN_opcode(tree))),
  4070. ("expected intrinsic call node, not %s", OPCODE_name(WN_opcode(tree))));
  4071. Is_True((INTRN_is_actual(WN_intrinsic(tree))==FALSE),
  4072. ("cannot emulate INTRN_is_actual"));
  4073. switch(id) {
  4074. case INTRN_I4EXPEXPR:
  4075. case INTRN_I8EXPEXPR:
  4076. /*
  4077. * do these regardless of flags, as they are always safe
  4078. */
  4079. return em_exp_int(block, by_value(tree, 0), by_value(tree,1), rtype);
  4080. case INTRN_F4I4EXPEXPR:
  4081. case INTRN_F4I8EXPEXPR:
  4082. case INTRN_F8I4EXPEXPR:
  4083. case INTRN_F8I8EXPEXPR:
  4084. case INTRN_FQI4EXPEXPR:
  4085. case INTRN_FQI8EXPEXPR:
  4086. case INTRN_F16I4EXPEXPR:
  4087. case INTRN_F16I8EXPEXPR:
  4088. case INTRN_C4I4EXPEXPR:
  4089. case INTRN_C4I8EXPEXPR:
  4090. case INTRN_C8I4EXPEXPR:
  4091. case INTRN_C8I8EXPEXPR:
  4092. case INTRN_CQI4EXPEXPR:
  4093. case INTRN_CQI8EXPEXPR:
  4094. case INTRN_C16I4EXPEXPR:
  4095. case INTRN_C16I8EXPEXPR:
  4096. /*
  4097. * The consensus is we allow constants (-1, 0, 1, 2) as
  4098. * always safe , regardless of the Fast_Exp_Allowed
  4099. */
  4100. return em_exp_int(block, by_value(tree, 0), by_value(tree,1), rtype);
  4101. case INTRN_F4EXPEXPR:
  4102. case INTRN_F8EXPEXPR:
  4103. case INTRN_FQEXPEXPR:
  4104. case INTRN_F16EXPEXPR:
  4105. case INTRN_C4EXPEXPR:
  4106. case INTRN_C8EXPEXPR:
  4107. case INTRN_CQEXPEXPR:
  4108. case INTRN_C16EXPEXPR:
  4109. if (Fast_Exp_Allowed)
  4110. return em_exp_float(block, by_value(tree, 0), by_value(tree, 1), rtype);
  4111. break;
  4112. case INTRN_F4MOD:
  4113. case INTRN_F8MOD:
  4114. case INTRN_FQMOD:
  4115. case INTRN_F16MOD:
  4116. return em_mod_float(block, by_value(tree, 0), by_value(tree, 1));
  4117. case INTRN_I1DIM:
  4118. case INTRN_I2DIM:
  4119. function= em_dim(block, by_value(tree, 0), by_value(tree, 1));
  4120. return return_conversion(id, rtype, function);
  4121. case INTRN_I4DIM:
  4122. case INTRN_I8DIM:
  4123. case INTRN_F4DIM:
  4124. case INTRN_F8DIM:
  4125. case INTRN_FQDIM:
  4126. case INTRN_F16DIM:
  4127. return em_dim(block, by_value(tree, 0), by_value(tree, 1));
  4128. case INTRN_F8F4PROD:
  4129. case INTRN_FQF8PROD:
  4130. case INTRN_F16F8PROD:
  4131. return em_prod(block, rtype, by_value(tree, 0), by_value(tree, 1));
  4132. case INTRN_I1SIGN:
  4133. case INTRN_I2SIGN:
  4134. function= em_sign(block, by_value(tree, 0), by_value(tree, 1));
  4135. return return_conversion(id, rtype, function);
  4136. case INTRN_I4SIGN:
  4137. case INTRN_I8SIGN:
  4138. case INTRN_F4SIGN:
  4139. case INTRN_F8SIGN:
  4140. case INTRN_FQSIGN:
  4141. case INTRN_F16SIGN:
  4142. return em_sign(block, by_value(tree, 0), by_value(tree, 1));
  4143. case INTRN_F4AINT:
  4144. case INTRN_F8AINT:
  4145. case INTRN_FQAINT:
  4146. case INTRN_F16AINT:
  4147. return em_aint(block, rtype, by_value(tree, 0));
  4148. case INTRN_I2F4NINT:
  4149. case INTRN_I4F4NINT:
  4150. case INTRN_I8F4NINT:
  4151. case INTRN_I2F8IDNINT:
  4152. case INTRN_I4F8IDNINT:
  4153. case INTRN_I8F8IDNINT:
  4154. case INTRN_I2FQIQNINT:
  4155. case INTRN_I4FQIQNINT:
  4156. case INTRN_I8FQIQNINT:
  4157. case INTRN_I2F16IQNINT:
  4158. case INTRN_I4F16IQNINT:
  4159. case INTRN_I8F16IQNINT:
  4160. return em_nearest_int(block, rtype, by_value(tree, 0));
  4161. case INTRN_F4ANINT:
  4162. case INTRN_F8ANINT:
  4163. case INTRN_FQANINT:
  4164. case INTRN_F16ANINT:
  4165. return em_nearest_aint(block, rtype, by_value(tree, 0));
  4166. case INTRN_I4CLEN:
  4167. return em_clen(block, by_value(tree, 1));
  4168. case INTRN_U4I4ALLOCA:
  4169. case INTRN_U8I8ALLOCA:
  4170. return em_alloca(block, tree);
  4171. case INTRN_U4READSTACKPOINTER:
  4172. case INTRN_U8READSTACKPOINTER:
  4173. return em_readstackpointer(Pointer_type);
  4174. #ifdef KEY
  4175. case INTRN_U4READFRAMEPOINTER:
  4176. case INTRN_U8READFRAMEPOINTER:
  4177. return em_readframepointer(Pointer_type);
  4178. #endif
  4179. case INTRN_U4I4SETSTACKPOINTER:
  4180. case INTRN_U8I8SETSTACKPOINTER:
  4181. return em_setstackpointer(block, Pointer_type, by_value(tree, 0));
  4182. case INTRN_C4CONJG:
  4183. case INTRN_C8CONJG:
  4184. case INTRN_CQCONJG:
  4185. case INTRN_C16CONJG:
  4186. return em_conjg(block, by_value(tree, 0));
  4187. /*
  4188. ** Generic problem with bit routines
  4189. ** They are all call by reference !!
  4190. */
  4191. case INTRN_I1BCLR:
  4192. case INTRN_I2BCLR:
  4193. case INTRN_I4BCLR:
  4194. case INTRN_I8BCLR:
  4195. /*
  4196. * clear bit i (n, i)
  4197. * (0 <= i && i < NUMBERBITS): n & ~(1<<i)
  4198. * else 0
  4199. */
  4200. return em_bclr(block, by_value(tree, 0), by_value(tree, 1));
  4201. case INTRN_I1BSET:
  4202. case INTRN_I2BSET:
  4203. case INTRN_I4BSET:
  4204. case INTRN_I8BSET:
  4205. /*
  4206. * set bit i (n, i)
  4207. * (0 <= i && i < NUMBERBITS): n | (1<<i)
  4208. * else 0
  4209. */
  4210. function= em_bset(block, by_value(tree, 0), by_value(tree, 1));
  4211. return return_conversion(id, rtype, function);
  4212. case INTRN_I1BTEST:
  4213. case INTRN_I2BTEST:
  4214. case INTRN_I4BTEST:
  4215. case INTRN_I8BTEST:
  4216. /*
  4217. * test bit i (n, i)
  4218. * (0 <= i && i < NUMBERBITS): (n >> i) & 0x1
  4219. * else 0
  4220. */
  4221. return em_btest(block, by_value(tree, 0), by_value(tree, 1));
  4222. case INTRN_I1BITS:
  4223. case INTRN_I2BITS:
  4224. case INTRN_I4BITS:
  4225. case INTRN_I8BITS:
  4226. /*
  4227. * extract bits [i ... i+len-1] (n, i, len)
  4228. *
  4229. * (0 <= i && i < NUMBERBITS) &&
  4230. * (0 <= len && len <= NUMBERBITS) &&
  4231. * ((i+len) <= NUMBERBITS) (n>>i) & (1<<len -1)
  4232. * else n
  4233. */
  4234. return em_bits(block, by_value(tree, 0), by_value(tree, 1), by_value(tree, 2));
  4235. case INTRN_I1SHL:
  4236. case INTRN_I2SHL:
  4237. /*
  4238. * shift n << i
  4239. *
  4240. * |i| < NUMBERBITS
  4241. * (n<<i)
  4242. * else 0
  4243. */
  4244. function= em_shl(block, by_value(tree, 0), by_value(tree, 1));
  4245. return return_conversion(id, rtype, function);
  4246. case INTRN_I1SHR:
  4247. case INTRN_I2SHR:
  4248. function= em_lshr(block, by_value(tree, 0), by_value(tree, 1));
  4249. return return_conversion(id, rtype, function);
  4250. case INTRN_I1SHFT:
  4251. case INTRN_I2SHFT:
  4252. case INTRN_I4SHFT:
  4253. case INTRN_I8SHFT:
  4254. /*
  4255. * shift n i places
  4256. *
  4257. * |i| < NUMBERBITS
  4258. * i>=0 (n<<i)
  4259. * i<0 (n>>(-i)) & ( (1<<(NUMBERBITS-i)) - 1)
  4260. * else 0
  4261. */
  4262. function= em_shft(block, by_value(tree, 0), by_value(tree, 1));
  4263. return return_conversion(id, rtype, function);
  4264. case INTRN_I1SHFTC:
  4265. case INTRN_I2SHFTC:
  4266. case INTRN_I4SHFTC:
  4267. case INTRN_I8SHFTC:
  4268. function= em_shftc(block, by_value(tree, 0), by_value(tree, 1), by_value(tree, 2));
  4269. return return_conversion(id, rtype, function);
  4270. case INTRN_I4POPPAR:
  4271. case INTRN_I8POPPAR:
  4272. case INTRN_PARITY:
  4273. function= em_parity(block, by_value(tree, 0));
  4274. return return_conversion(id, rtype, function);
  4275. case INTRN_I1POPCNT:
  4276. case INTRN_I2POPCNT:
  4277. case INTRN_I4POPCNT:
  4278. case INTRN_I8POPCNT:
  4279. case INTRN_POPCOUNT:
  4280. {
  4281. INT bitsize = MTYPE_size_reg(WN_rtype(by_value(tree, 0)));
  4282. switch (id) {
  4283. case INTRN_I1POPCNT: bitsize = 8; break;
  4284. case INTRN_I2POPCNT: bitsize = 16; break;
  4285. case INTRN_I4POPCNT: bitsize = 32; break;
  4286. case INTRN_I8POPCNT: bitsize = 64; break;
  4287. }
  4288. function = em_popcount(block, by_value(tree, 0), bitsize);
  4289. }
  4290. return return_conversion(id, rtype, function);
  4291. case INTRN_CLGE:
  4292. case INTRN_CLGT:
  4293. case INTRN_CLLE:
  4294. case INTRN_CLLT:
  4295. case INTRN_CEQEXPR:
  4296. case INTRN_CNEEXPR:
  4297. case INTRN_CGEEXPR:
  4298. case INTRN_CGTEXPR:
  4299. case INTRN_CLEEXPR:
  4300. case INTRN_CLTEXPR:
  4301. /*
  4302. * interpreted bt lower_intrinsic_op()
  4303. */
  4304. break;
  4305. case INTRN_SUBSTRINGEXPR:
  4306. case INTRN_CONCATEXPR:
  4307. case INTRN_CASSIGNSTMT:
  4308. case INTRN_F4EXP:
  4309. case INTRN_F8EXP:
  4310. case INTRN_FQEXP:
  4311. case INTRN_F16EXP:
  4312. break;
  4313. case INTRN_C4EXP:
  4314. case INTRN_C8EXP:
  4315. case INTRN_CQEXP:
  4316. case INTRN_C16EXP:
  4317. /*
  4318. * real = e**(rz) * cos(iz);
  4319. * imag = e**(rz) * sin(iz);
  4320. */
  4321. return em_complex_exp(block, by_value(tree, 0));
  4322. case INTRN_F4LOG:
  4323. case INTRN_F8LOG:
  4324. case INTRN_FQLOG:
  4325. case INTRN_F16LOG:
  4326. break;
  4327. case INTRN_C4LOG:
  4328. case INTRN_C8LOG:
  4329. case INTRN_CQLOG:
  4330. case INTRN_C16LOG:
  4331. /*
  4332. * real = log ( sqrt(rz**2 + iz**2) )
  4333. * imag = fatan2(iz, rz)
  4334. */
  4335. return em_complex_log(block, by_value(tree, 0));
  4336. case INTRN_F4LOG10:
  4337. case INTRN_F8LOG10:
  4338. case INTRN_FQLOG10:
  4339. case INTRN_F16LOG10:
  4340. /*
  4341. * log(x) * (M_LOG10E = 0.43429448190325182765)
  4342. */
  4343. return em_alog10(block, by_value(tree, 0));
  4344. case INTRN_F4COS:
  4345. case INTRN_F8COS:
  4346. case INTRN_FQCOS:
  4347. case INTRN_F16COS:
  4348. case INTRN_F4SIN:
  4349. case INTRN_F8SIN:
  4350. case INTRN_FQSIN:
  4351. case INTRN_F16SIN:
  4352. case INTRN_F4TAN:
  4353. case INTRN_F8TAN:
  4354. case INTRN_FQTAN:
  4355. case INTRN_F16TAN:
  4356. case INTRN_F4COSD:
  4357. case INTRN_F8COSD:
  4358. case INTRN_FQCOSD:
  4359. case INTRN_F16COSD:
  4360. case INTRN_F4SIND:
  4361. case INTRN_F8SIND:
  4362. case INTRN_FQSIND:
  4363. case INTRN_F16SIND:
  4364. case INTRN_F4TAND:
  4365. case INTRN_F8TAND:
  4366. case INTRN_FQTAND:
  4367. case INTRN_F16TAND:
  4368. case INTRN_F4COSH:
  4369. case INTRN_F8COSH:
  4370. case INTRN_FQCOSH:
  4371. case INTRN_F16COSH:
  4372. case INTRN_F4SINH:
  4373. case INTRN_F8SINH:
  4374. case INTRN_FQSINH:
  4375. case INTRN_F16SINH:
  4376. case INTRN_F4TANH:
  4377. case INTRN_F8TANH:
  4378. case INTRN_FQTANH:
  4379. case INTRN_F16TANH:
  4380. case INTRN_F4ACOS:
  4381. case INTRN_F8ACOS:
  4382. case INTRN_FQACOS:
  4383. case INTRN_F16ACOS:
  4384. case INTRN_F4ASIN:
  4385. case INTRN_F8ASIN:
  4386. case INTRN_FQASIN:
  4387. case INTRN_F16ASIN:
  4388. case INTRN_F4ATAN:
  4389. case INTRN_F8ATAN:
  4390. case INTRN_FQATAN:
  4391. case INTRN_F16ATAN:
  4392. case INTRN_F4ACOSD:
  4393. case INTRN_F8ACOSD:
  4394. case INTRN_FQACOSD:
  4395. case INTRN_F16ACOSD:
  4396. case INTRN_F4ASIND:
  4397. case INTRN_F8ASIND:
  4398. case INTRN_FQASIND:
  4399. case INTRN_F16ASIND:
  4400. case INTRN_F4ATAND:
  4401. case INTRN_F8ATAND:
  4402. case INTRN_FQATAND:
  4403. case INTRN_F16ATAND:
  4404. case INTRN_F4ATAN2:
  4405. case INTRN_F8ATAN2:
  4406. case INTRN_FQATAN2:
  4407. case INTRN_F16ATAN2:
  4408. case INTRN_F4ATAN2D:
  4409. case INTRN_F8ATAN2D:
  4410. case INTRN_FQATAN2D:
  4411. case INTRN_F16ATAN2D:
  4412. break;
  4413. case INTRN_C4COS:
  4414. case INTRN_C8COS:
  4415. case INTRN_CQCOS:
  4416. case INTRN_C16COS:
  4417. /*
  4418. * real = cos(rz) * cosh(iz);
  4419. * imag = -sin(rz) * sinh(iz);
  4420. */
  4421. return em_complex_cos(block, by_value(tree, 0));
  4422. case INTRN_F4C4ABS:
  4423. case INTRN_F8C8ABS:
  4424. case INTRN_FQCQABS:
  4425. case INTRN_F16C16ABS:
  4426. return em_complex_abs(block, by_value(tree, 0));
  4427. case INTRN_C4SIN:
  4428. case INTRN_C8SIN:
  4429. case INTRN_CQSIN:
  4430. case INTRN_C16SIN:
  4431. /*
  4432. * real = sin(rz) * cosh(iz);
  4433. * imag = cos(rz) * sinh(iz);
  4434. */
  4435. return em_complex_sin(block, by_value(tree, 0));
  4436. case INTRN_F4CIS:
  4437. case INTRN_F8CIS:
  4438. case INTRN_FQCIS:
  4439. case INTRN_F16CIS:
  4440. /*
  4441. * cos(x) + i*sin(x)
  4442. * do not expand this as the library function is much more
  4443. * efficient than calling cos() and sin()
  4444. */
  4445. break;
  4446. case INTRN_U4I4MALLOC:
  4447. if ( DEBUG_Trap_Uv )
  4448. WN_intrinsic(tree) = INTRN_U4I4TRAPUV_MALLOC;
  4449. break;
  4450. case INTRN_U8I8MALLOC:
  4451. if ( DEBUG_Trap_Uv )
  4452. WN_intrinsic(tree) = INTRN_U8I8TRAPUV_MALLOC;
  4453. break;
  4454. case INTRN_U4FREE:
  4455. case INTRN_U8FREE:
  4456. case INTRN_MDATE:
  4457. case INTRN_I4DATE:
  4458. case INTRN_I8DATE:
  4459. case INTRN_I4ERRSNS:
  4460. case INTRN_I8ERRSNS:
  4461. case INTRN_VEXIT:
  4462. case INTRN_I4EXIT:
  4463. case INTRN_I8EXIT:
  4464. case INTRN_TIME:
  4465. case INTRN_F4SECNDS:
  4466. case INTRN_F8SECNDS:
  4467. case INTRN_PAUSE:
  4468. case INTRN_STOP:
  4469. case INTRN_F77_BOUNDS_ERR:
  4470. case INTRN_F4I4RAN:
  4471. case INTRN_F4I8RAN:
  4472. case INTRN_F8I4RAN:
  4473. case INTRN_F8I8RAN:
  4474. case INTRN_FQI4RAN:
  4475. case INTRN_FQI8RAN:
  4476. case INTRN_F16I4RAN:
  4477. case INTRN_F16I8RAN:
  4478. break;
  4479. case INTRN_I8DIVFLOOR:
  4480. case INTRN_I4DIVFLOOR:
  4481. case INTRN_U4DIVFLOOR:
  4482. case INTRN_U8DIVFLOOR:
  4483. return em_divfloor(block, rtype, by_value(tree, 0), by_value(tree, 1));
  4484. case INTRN_I4DIVCEIL:
  4485. case INTRN_I8DIVCEIL:
  4486. case INTRN_U4DIVCEIL:
  4487. case INTRN_U8DIVCEIL:
  4488. return em_divceil(block, rtype, by_value(tree, 0), by_value(tree, 1));
  4489. case INTRN_I4MODFLOOR:
  4490. case INTRN_I8MODFLOOR:
  4491. case INTRN_U4MODFLOOR:
  4492. case INTRN_U8MODFLOOR:
  4493. case INTRN_I4MODCEIL:
  4494. case INTRN_I8MODCEIL:
  4495. case INTRN_U4MODCEIL:
  4496. case INTRN_U8MODCEIL:
  4497. Is_True(FALSE,
  4498. ("%s not yet implemented. Micheal Wolf said this would never, ever, ever. be needed. Go away", INTR_intrinsic_name(tree)));
  4499. break;
  4500. case INTRN_I1MVBITS:
  4501. case INTRN_I2MVBITS:
  4502. case INTRN_I4MVBITS:
  4503. case INTRN_I8MVBITS:
  4504. case INTRN_I4CINDEX:
  4505. break;
  4506. case INTRN_BZERO:
  4507. if (CG_mem_intrinsics)
  4508. {
  4509. return em_bzero(block, tree, WN_arg(tree, 0), WN_arg(tree, 1));
  4510. }
  4511. break;
  4512. case INTRN_MEMSET:
  4513. #ifndef KEY // Don't emulate memset; call PathScale memset instead.
  4514. if (CG_mem_intrinsics)
  4515. {
  4516. return em_memset(block, tree, WN_arg(tree, 0), WN_arg(tree, 1), WN_arg(tree, 2));
  4517. }
  4518. #endif
  4519. break;
  4520. case INTRN_BCOPY:
  4521. if (CG_mem_intrinsics)
  4522. {
  4523. return em_bcopy(block, tree, WN_arg(tree, 0), WN_arg(tree, 1), WN_arg(tree, 2));
  4524. }
  4525. break;
  4526. case INTRN_MEMCPY:
  4527. if (CG_mem_intrinsics)
  4528. {
  4529. return em_memcpy(block, tree, WN_arg(tree, 0), WN_arg(tree, 1), WN_arg(tree, 2));
  4530. }
  4531. break;
  4532. case INTRN_MEMMOVE:
  4533. if (CG_mem_intrinsics)
  4534. {
  4535. return em_memmove(block, tree, WN_arg(tree, 0), WN_arg(tree, 1), WN_arg(tree, 2));
  4536. }
  4537. break;
  4538. #ifdef TARG_MIPS
  4539. case INTRN_ISGREATER:
  4540. case INTRN_ISGREATEREQUAL:
  4541. case INTRN_ISLESS:
  4542. case INTRN_ISLESSEQUAL:
  4543. case INTRN_ISLESSGREATER:
  4544. case INTRN_ISORDERED:
  4545. case INTRN_ISUNORDERED:
  4546. if (MTYPE_is_quad(WN_rtype(WN_kid0(tree))))
  4547. return em_quad_intrn_cmp(block, tree);
  4548. break;
  4549. #endif
  4550. #ifdef TARG_X8664
  4551. case INTRN_VA_START:
  4552. if (strcmp(Get_Error_Phase(), "VHO Processing") == 0)
  4553. break; // bug 8525: cannot lower va_start at VHO time
  4554. return em_x8664_va_start(block, WN_arg(tree, 0));
  4555. break;
  4556. #endif
  4557. #ifdef KEY
  4558. case INTRN_F4CBRT:
  4559. case INTRN_F8CBRT:
  4560. break;
  4561. #endif
  4562. default:
  4563. break;
  4564. }
  4565. return NULL;
  4566. }
  4567. #ifdef TARG_MIPS
  4568. extern WN * em_quad_cvt(WN *, WN *);
  4569. extern WN * em_quad_arith(WN *, WN *);
  4570. extern WN * em_quad_trunc(WN *, WN *);
  4571. extern WN * em_quad_cmp(WN *, WN *);
  4572. extern WN * em_quad_minmax(WN *, WN *);
  4573. #endif
  4574. extern WN *emulate(WN *block, WN *tree)
  4575. {
  4576. WN *wn = NULL;
  4577. if (OPCODE_is_intrinsic(WN_opcode(tree)))
  4578. {
  4579. if (Inline_Intrinsics_Allowed)
  4580. {
  4581. wn = emulate_intrinsic_op(block, tree);
  4582. }
  4583. #ifdef TARG_MIPS
  4584. // Bug 14529: Some libscstr functions return wrong value.
  4585. // To workaround, convert
  4586. // r = memcpy(p,q,n) --> r=p; sc_memcpy(p,q,n);
  4587. if (wn == NULL && OPT_Fast_Stdlib && WN_rtype(tree) != MTYPE_V &&
  4588. ((INTRINSIC) WN_intrinsic(tree) == INTRN_MEMCPY ||
  4589. (INTRINSIC) WN_intrinsic(tree) == INTRN_STRCPY ||
  4590. (INTRINSIC) WN_intrinsic(tree) == INTRN_STRCAT ||
  4591. (INTRINSIC) WN_intrinsic(tree) == INTRN_MEMMOVE)) {
  4592. TYPE_ID rtype = WN_rtype(tree);
  4593. WN *dst = WN_kid0(WN_kid0(tree));
  4594. PREG_NUM preg_dst = AssignExpr(block, dst, rtype);
  4595. WN_kid0(WN_kid0(tree)) = WN_LdidPreg(rtype, preg_dst);
  4596. tree = WN_CopyNode(tree); // Required, or will be deleted twice
  4597. WN_set_rtype(tree, MTYPE_V);
  4598. WN_INSERT_BlockLast(block, tree);
  4599. wn = WN_LdidPreg(rtype, preg_dst);
  4600. }
  4601. #endif
  4602. }
  4603. else
  4604. {
  4605. switch(WN_operator(tree))
  4606. {
  4607. case OPR_NEG:
  4608. if (MTYPE_is_quad(WN_rtype(tree)))
  4609. {
  4610. wn = em_quad_neg(block, tree);
  4611. }
  4612. break;
  4613. case OPR_ABS:
  4614. if (MTYPE_is_quad(WN_rtype(tree)))
  4615. {
  4616. wn = em_quad_abs(block, tree);
  4617. }
  4618. break;
  4619. case OPR_SELECT:
  4620. if (MTYPE_is_quad(WN_rtype(tree)))
  4621. {
  4622. wn = em_split_select(block, tree);
  4623. }
  4624. break;
  4625. case OPR_SQRT:
  4626. if (MTYPE_is_complex(WN_rtype(tree)))
  4627. {
  4628. if (Inline_Intrinsics_Allowed)
  4629. {
  4630. wn = em_complex_sqrt(block, WN_kid0(tree));
  4631. }
  4632. }
  4633. break;
  4634. case OPR_ALLOCA:
  4635. wn = em_alloca(block,tree);
  4636. break;
  4637. #ifdef TARG_MIPS
  4638. case OPR_CVT:
  4639. if (MTYPE_is_quad(WN_rtype(tree)) ||
  4640. MTYPE_is_quad(WN_desc(tree)))
  4641. wn = em_quad_cvt(block, tree);
  4642. break;
  4643. case OPR_ADD:
  4644. case OPR_SUB:
  4645. case OPR_DIV:
  4646. case OPR_MPY:
  4647. if (MTYPE_is_quad(WN_rtype(tree)))
  4648. wn = em_quad_arith(block, tree);
  4649. break;
  4650. case OPR_TRUNC:
  4651. Is_True (!MTYPE_is_quad(WN_rtype(tree)),
  4652. ("emulate: truncate to quad type not supported"));
  4653. if (MTYPE_is_quad(WN_desc(tree)))
  4654. wn = em_quad_trunc(block, tree);
  4655. break;
  4656. case OPR_NE:
  4657. case OPR_EQ:
  4658. case OPR_GT:
  4659. case OPR_GE:
  4660. case OPR_LT:
  4661. case OPR_LE:
  4662. if (MTYPE_is_quad(WN_desc(tree)))
  4663. wn = em_quad_cmp(block, tree);
  4664. break;
  4665. case OPR_MIN:
  4666. case OPR_MAX:
  4667. if (MTYPE_is_quad(WN_rtype(tree)))
  4668. wn = em_quad_minmax(block, tree);
  4669. break;
  4670. #endif
  4671. default:
  4672. break;
  4673. }
  4674. }
  4675. return wn;
  4676. }
  4677. #ifdef KEY // bug 6938
  4678. extern WN *emulate_fast_exp(WN *block, WN *tree)
  4679. {
  4680. if (! Inline_Intrinsics_Allowed)
  4681. return NULL;
  4682. INTRINSIC id = (INTRINSIC) WN_intrinsic(tree);
  4683. TYPE_ID rtype = WN_rtype(tree);
  4684. switch(id) {
  4685. case INTRN_I4EXPEXPR:
  4686. case INTRN_I8EXPEXPR:
  4687. /*
  4688. * do these regardless of flags, as they are always safe
  4689. */
  4690. return em_exp_int(block, by_value(tree, 0), by_value(tree,1), rtype);
  4691. case INTRN_F4I4EXPEXPR:
  4692. case INTRN_F4I8EXPEXPR:
  4693. case INTRN_F8I4EXPEXPR:
  4694. case INTRN_F8I8EXPEXPR:
  4695. case INTRN_FQI4EXPEXPR:
  4696. case INTRN_FQI8EXPEXPR:
  4697. case INTRN_F16I4EXPEXPR:
  4698. case INTRN_F16I8EXPEXPR:
  4699. case INTRN_C4I4EXPEXPR:
  4700. case INTRN_C4I8EXPEXPR:
  4701. case INTRN_C8I4EXPEXPR:
  4702. case INTRN_C8I8EXPEXPR:
  4703. case INTRN_CQI4EXPEXPR:
  4704. case INTRN_CQI8EXPEXPR:
  4705. case INTRN_C16I4EXPEXPR:
  4706. case INTRN_C16I8EXPEXPR:
  4707. /*
  4708. * The consensus is we allow constants (-1, 0, 1, 2) as
  4709. * always safe , regardless of the Fast_Exp_Allowed
  4710. */
  4711. return em_exp_int(block, by_value(tree, 0), by_value(tree,1), rtype);
  4712. case INTRN_F4EXPEXPR:
  4713. case INTRN_F8EXPEXPR:
  4714. case INTRN_FQEXPEXPR:
  4715. case INTRN_F16EXPEXPR:
  4716. case INTRN_C4EXPEXPR:
  4717. case INTRN_C8EXPEXPR:
  4718. case INTRN_CQEXPEXPR:
  4719. case INTRN_C16EXPEXPR:
  4720. if (Fast_Exp_Allowed)
  4721. return em_exp_float(block, by_value(tree, 0), by_value(tree, 1), rtype);
  4722. break;
  4723. default: ;
  4724. break;
  4725. }
  4726. return NULL;
  4727. }
  4728. #endif