/pathscale/be/com/emulate.cxx
C++ | 5500 lines | 3562 code | 761 blank | 1177 comment | 398 complexity | 585ce9277290458cdce41b68e1891e67 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0
Large files files are truncated, but you can click here to view the full file
- /*
- * Copyright (C) 2007, 2008, 2009 PathScale, LLC. All Rights Reserved.
- */
- /*
- * Copyright (C) 2006, 2007. QLogic Corporation. All Rights Reserved.
- */
- /*
- * Copyright 2003, 2004, 2005, 2006 PathScale, Inc. All Rights Reserved.
- */
- /*
- Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved.
- Path64 is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- Path64 is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- License for more details.
- You should have received a copy of the GNU General Public License
- along with Path64; see the file COPYING. If not, write to the Free
- Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA.
- Special thanks goes to SGI for their continued support to open source
- */
- #ifdef USE_PCH
- #include "be_com_pch.h"
- #endif /* USE_PCH */
- #pragma hdrstop
- #include <math.h>
- #if defined(BUILD_OS_DARWIN)
- #include <limits.h>
- #else /* defined(BUILD_OS_DARWIN) */
- #include <values.h>
- #endif /* defined(BUILD_OS_DARWIN) */
- #include <alloca.h>
- #include "defs.h"
- #include "config.h"
- #include "config_debug.h"
- #include "config_opt.h"
- #include "config_targ_opt.h"
- #include "errors.h"
- #include "erglob.h"
- #include "tracing.h"
- #include "stab.h"
- #include "data_layout.h"
- #include "wn.h"
- #include "wn_util.h"
- #include "const.h"
- #include "targ_const.h"
- #include "targ_sim.h"
- #include "fb_whirl.h"
- #include "be_symtab.h"
- #include "intrn_info.h"
- #if (__GNUC__ == 2)
- //
- // Provide trunc(), which doesn't exist in the GNU library. This is a
- // quick and dirty hack, and should be handled elsehow.
- //
- static inline double trunc(double d)
- {
- if (d < 0.0) {
- return 1.0 + floor(d);
- }
- else {
- return floor(d);
- }
- }
- #endif
- /*
- ** For lack of a better word, these emulations are run time
- ** routines that supply functionality to whirl expression nodes
- **
- ** The list was ripped off from ragnarok and may be
- ** incomplete/NYI
- */
- typedef enum
- {
- EM_TRAPUV, /* sets fpc_csr to interrupt on NaN */
- EM_RTS_CHECKSTACK, /* checks for stack overflow */
-
- EM_LL_MUL, /* double-word multiply */
- EM_LL_DIV, /* double-word divide */
- EM_ULL_DIV, /* unsigned double-word divide */
- EM_LL_MOD, /* double-word mod */
- EM_LL_REM, /* double-word remainder */
- EM_ULL_REM, /* unsigned double-word remainder */
- EM_LL_LSHIFT, /* double-word left shift */
- EM_LL_RSHIFT, /* double-word right shift */
- EM_ULL_RSHIFT, /* unsigned double-word right shift */
- EM_LL_M3_DSLLV, /* mips 3 simulation of dsllv */
- EM_LL_M3_DSRAV, /* mips 3 simulation of dsrav */
- EM_LL_M3_DSRLV, /* mips 3 simulation of dsrlv */
- EM_LL_TO_F, /* cvt double-word to float */
- EM_ULL_TO_F, /* cvt unsigned double-word to float */
- EM_LL_TO_D, /* cvt double-word to double float */
- EM_ULL_TO_D, /* cvt unsigned double-word to double float */
- EM_F_TO_LL, /* cvt float to double-word */
- EM_F_TO_ULL, /* cvt float to unsigned double-word */
- EM_F_ROUND_LL_F, /* round float to float */
- EM_F_TRUNC_LL_F, /* trunc float to float */
- EM_D_TO_LL, /* cvt double float to double-word */
- EM_D_TO_ULL, /* cvt double float to unsigned double-word */
- EM_D_ROUND_LL_D, /* round double to double */
- EM_D_TRUNC_LL_D, /* trunc double to double */
- EM_LL_BIT_EXTRACT , /* double-word bit-field extraction */
- EM_LL_BIT_INSERT , /* double-word bit-field insertion */
-
- EM_Q_ABS, /* quad absolute value */
- EM_Q_SQRT, /* quad square root */
- EM_Q_ADD, /* quad plus */
- EM_Q_SUB, /* quad minus */
- EM_Q_MPY, /* quad multiply */
- EM_Q_DIV, /* quad divide */
- EM_Q_MAX1, /* quad max */
- EM_Q_MIN1, /* quad min */
- EM_Q_EQ, /* quad equal */
- EM_Q_NE, /* quad not equal */
- EM_Q_GE, /* quad greater equal */
- EM_Q_GT, /* quad greater than */
- EM_Q_LE, /* quad less equal */
- EM_Q_LT, /* quad less than */
- EM_SNGL_Q, /* convert quad to single */
- EM_DBLE_Q, /* convert quad to double */
- EM_KI_QINT, /* convert quad to 64 bits int */
- EM_JI_QINT, /* convert quad to 32 bits int */
- EM_Q_EXT, /* convert float to quad */
- EM_Q_EXTD, /* convert double to quad */
- EM_Q_FLOTK, /* convert to quad from 64 bits int */
- EM_Q_FLOTKU, /* convert to quad from unsigned 64 bits int */
- EM_Q_FLOTJ, /* convert to quad from 32 bits int */
- EM_Q_FLOTJU, /* convert to quad from unsigned 32 bits int */
- EM_KIQNNT, /* round quad to closest 64 bits int value */
- EM_JIQNNT, /* round quad to closest 32 bits int value */
- EM_C4_SQRT, /* float complex sqrt */
- EM_C8_SQRT, /* double complex sqrt */
- EM_CQ_SQRT, /* quad complex sqrt */
- EM_C4_RSQRT, /* float complex recipricol sqrt */
- EM_C8_RSQRT, /* double complex recipricol sqrt */
- EM_CQ_RSQRT, /* quad complex recipricol sqrt */
- EM_C4_ABS, /* float complex abs */
- EM_C8_ABS, /* double complex abs */
- EM_CQ_ABS, /* quad complex abs */
- EM_KI_QCEIL, /* ceil quad to 64 bits int (f90 only) */
- EM_JI_QCEIL, /* ceil quad to 32 bits in( f90 only)t */
- EM_KI_QFLOOR, /* floor quad to 64 bits int (f90 only) */
- EM_JI_QFLOOR, /* floor quad to 32 bits int (f90 only) */
- EM_LAST /* sentinel */
- } EMULATION;
- /*
- ** describe calling semantics for FE and runtime
- ** intrinsics and expression
- */
- typedef enum
- {
- COERCE_none,
- COERCE_by_reference,
- COERCE_by_value,
- COERCE_struct_by_value,
- COERCE_struct_by_reference,
- COERCE_split_complex
- } COERCE, *COERCEp;
- typedef struct EM_ROUTINES
- {
- EMULATION id;
- const char *functionName;
- INT32 functionAttributes;
- COERCE runtimeArg0coercion;
- } EM_ROUTINES, *EM_ROUTINESp;
- #define EM_id(x) em_routines[x].id
- #define EM_rt_name(x) em_routines[x].functionName
- #define EM_attributes(x) em_routines[x].functionAttributes
- #define EM_coerce0(x) em_routines[x].runtimeArg0coercion
- /*
- ** Keep track of intrinsic/emulation arguments
- ** Problems we are trying to solve
- **
- ** COERCE_by_reference
- ** are (unfortunately) provided by the FE to match the
- ** run time routine. When we get the argument we might have
- ** an address (anonymous pointer) and hence, lost the
- ** type to dereference (if we are trying to inline it)
- **
- ** COERCE_split_complex
- ** complex are split into real/imaginary pairs doubling
- ** the number of argumemts
- **
- ** This entire mechanism should be provided by the FE
- ** as part of wtable.h
- */
- #define NSE PU_NO_SIDE_EFFECTS
- #define PURE_NSE (PU_IS_PURE | NSE)
- #define INVALID NULL
- /*
- ** The emulation table may not yet be complete (or used)
- ** The fields are
- **
- ** EMULATION id;
- ** The table must be kept in order with the enumeration
- ** as it is a direct lookup
- **
- ** char *functionName;
- ** The exact external name, no underbars
- **
- ** INT32 functionAttributes;
- **
- ** COERCEp functionArgCoercion;
- ** Actual to runtime formal conversion
- ** The child of an expression/intrinsic WN needs to be
- ** converted to call it's runtime function.
- ** ex.
- ** complex routines are now split-by_value
- **
- ** These routines are all by value so we already know the
- ** argument type
- */
- #define NONE 0
- const EM_ROUTINES em_routines[]=
- {
- EM_TRAPUV, "__trapuv", PURE_NSE, COERCE_none,
- EM_RTS_CHECKSTACK,"_RtlCheckStack",PURE_NSE, COERCE_none,
- EM_LL_MUL, "__ll_mul", PURE_NSE, COERCE_none,
- EM_LL_DIV, "__ll_div", PURE_NSE, COERCE_none,
- EM_ULL_DIV, "__ull_div", PURE_NSE, COERCE_none,
- EM_LL_MOD, "__ll_mod", PURE_NSE, COERCE_none,
- EM_LL_REM, "__ll_rem", PURE_NSE, COERCE_none,
- EM_ULL_REM, "__ull_rem", PURE_NSE, COERCE_none,
- EM_LL_LSHIFT, "__ll_lshift", PURE_NSE, COERCE_none,
- EM_LL_RSHIFT, "__ll_rshift", PURE_NSE, COERCE_none,
- EM_ULL_RSHIFT, "__ull_rshift", PURE_NSE, COERCE_none,
- EM_LL_M3_DSLLV, "__dsllv", PURE_NSE, COERCE_none,
- EM_LL_M3_DSRAV, "__dsrav", PURE_NSE, COERCE_none,
- EM_LL_M3_DSRLV, "__dsrlv", PURE_NSE, COERCE_none,
- EM_LL_TO_F, "__ll_to_f", PURE_NSE, COERCE_none,
- EM_ULL_TO_F, "__ull_to_f", PURE_NSE, COERCE_none,
- EM_LL_TO_D, "__ll_to_d", PURE_NSE, COERCE_none,
- EM_ULL_TO_D, "__ull_to_d", PURE_NSE, COERCE_none,
- EM_F_TO_LL, "__f_to_ll", PURE_NSE, COERCE_none,
- EM_F_TO_ULL, "__f_to_ull", PURE_NSE, COERCE_none,
- EM_F_ROUND_LL_F, "__f_round_ll_f",PURE_NSE, COERCE_none,
- EM_F_TRUNC_LL_F, "__f_trunc_ll_f",PURE_NSE, COERCE_none,
- EM_D_TO_LL, "__d_to_ll", PURE_NSE, COERCE_none,
- EM_D_TO_ULL, "__d_to_ull", PURE_NSE, COERCE_none,
- EM_D_ROUND_LL_D, "__d_round_ll_d",PURE_NSE, COERCE_none,
- EM_D_TRUNC_LL_D, "__d_trunc_ll_d",PURE_NSE, COERCE_none,
- EM_LL_BIT_EXTRACT,"__ll_bit_extract",PURE_NSE,COERCE_none,
- EM_LL_BIT_INSERT, "__ll_bit_insert",PURE_NSE, COERCE_none,
- EM_Q_ABS, "__qabs", PURE_NSE, COERCE_none,
- EM_Q_SQRT, "__qsqrt", PURE_NSE, COERCE_none,
- EM_Q_ADD, "__q_add", PURE_NSE, COERCE_none,
- EM_Q_SUB, "__q_sub", PURE_NSE, COERCE_none,
- EM_Q_MPY, "__q_mul", PURE_NSE, COERCE_none,
- EM_Q_DIV, "__q_div", PURE_NSE, COERCE_none,
- EM_Q_MAX1, "__q_max1", PURE_NSE, COERCE_none,
- EM_Q_MIN1, "__q_min1", PURE_NSE, COERCE_none,
- EM_Q_EQ, "__q_eq", PURE_NSE, COERCE_none,
- EM_Q_NE, "__q_ne", PURE_NSE, COERCE_none,
- EM_Q_GE, "__q_ge", PURE_NSE, COERCE_none,
- EM_Q_GT, "__q_gt", PURE_NSE, COERCE_none,
- EM_Q_LE, "__q_le", PURE_NSE, COERCE_none,
- EM_Q_LT, "__q_lt", PURE_NSE, COERCE_none,
- EM_SNGL_Q, "__sngl_q", PURE_NSE, COERCE_none,
- EM_DBLE_Q, "__dble_q", PURE_NSE, COERCE_none,
- EM_KI_QINT, "__ki_qint", PURE_NSE, COERCE_none,
- EM_JI_QINT, "__ji_qint", PURE_NSE, COERCE_none,
- EM_Q_EXT, "__q_ext", PURE_NSE, COERCE_none,
- EM_Q_EXTD, "__q_extd", PURE_NSE, COERCE_none,
- EM_Q_FLOTK, "__q_flotk", PURE_NSE, COERCE_none,
- EM_Q_FLOTKU, "__q_flotku", PURE_NSE, COERCE_none,
- EM_Q_FLOTJ, "__q_flotj", PURE_NSE, COERCE_none,
- EM_Q_FLOTJU, "__q_flotju", PURE_NSE, COERCE_none,
- EM_KIQNNT, "__kiqnnt", PURE_NSE, COERCE_none,
- EM_JIQNNT, "__jiqnnt", PURE_NSE, COERCE_none,
- EM_C4_SQRT, "__csqrt", PURE_NSE, COERCE_split_complex,
- EM_C8_SQRT, "__zsqrt", PURE_NSE, COERCE_split_complex,
- EM_CQ_SQRT, "__cqsqrt", PURE_NSE, COERCE_split_complex,
- EM_C4_RSQRT, INVALID, NONE, COERCE_none,
- EM_C8_RSQRT, INVALID, NONE, COERCE_none,
- EM_CQ_RSQRT, INVALID, NONE, COERCE_none,
- EM_C4_ABS, INVALID, NONE, COERCE_none,
- EM_C8_ABS, INVALID, NONE, COERCE_none,
- EM_CQ_ABS, INVALID, NONE, COERCE_none,
- EM_KI_QCEIL, "_CEILING_16_8", PURE_NSE, COERCE_none,
- EM_JI_QCEIL, "_CEILING_16_4", PURE_NSE, COERCE_none,
- EM_KI_QFLOOR, "_FLOOR_16_8", PURE_NSE, COERCE_none,
- EM_JI_QFLOOR, "_FLOOR_16_4", PURE_NSE, COERCE_none,
- };
- typedef struct
- {
- INTRINSIC id;
- COERCE runtimeArg0;
- COERCE runtimeArg1;
- } INTRINSIC_RUNTIME_FORMALS;
- #define INTR_id(x) intrinsic_runtime_formals[(x)].id
- #define INTR_coerce0(x) intrinsic_runtime_formals[(x)].runtimeArg0
- #define INTR_coerce1(x) intrinsic_runtime_formals[(x)].runtimeArg1
- /*
- ** TODO
- ** eventually the FE will supply this information
- ** from the intrinsic table, when we finish the implementation
- */
- INTRINSIC_RUNTIME_FORMALS intrinsic_runtime_formals[]=
- {
- INTRN_C4I4EXPEXPR, COERCE_split_complex, COERCE_none,
- INTRN_C4I8EXPEXPR, COERCE_split_complex, COERCE_none,
- INTRN_C8I4EXPEXPR, COERCE_split_complex, COERCE_none,
- INTRN_C8I8EXPEXPR, COERCE_split_complex, COERCE_none,
- INTRN_CQI4EXPEXPR, COERCE_split_complex, COERCE_none,
- INTRN_CQI8EXPEXPR, COERCE_split_complex, COERCE_none,
- INTRN_C16I4EXPEXPR, COERCE_split_complex, COERCE_none,
- INTRN_C16I8EXPEXPR, COERCE_split_complex, COERCE_none,
- INTRN_C4EXPEXPR, COERCE_split_complex, COERCE_split_complex,
- INTRN_C8EXPEXPR, COERCE_split_complex, COERCE_split_complex,
- INTRN_CQEXPEXPR, COERCE_split_complex, COERCE_split_complex,
- INTRN_C16EXPEXPR, COERCE_split_complex, COERCE_split_complex,
- INTRN_F4C4ABS, COERCE_split_complex, COERCE_none,
- INTRN_F8C8ABS, COERCE_split_complex, COERCE_none,
- INTRN_FQCQABS, COERCE_split_complex, COERCE_none,
- INTRN_F16C16ABS, COERCE_split_complex, COERCE_none,
- INTRN_C4EXP, COERCE_split_complex, COERCE_none,
- INTRN_C8EXP, COERCE_split_complex, COERCE_none,
- INTRN_CQEXP, COERCE_split_complex, COERCE_none,
- INTRN_C16EXP, COERCE_split_complex, COERCE_none,
- INTRN_C4LOG, COERCE_split_complex, COERCE_none,
- INTRN_C8LOG, COERCE_split_complex, COERCE_none,
- INTRN_CQLOG, COERCE_split_complex, COERCE_none,
- INTRN_C16LOG, COERCE_split_complex, COERCE_none,
- INTRN_C4COS, COERCE_split_complex, COERCE_none,
- INTRN_C8COS, COERCE_split_complex, COERCE_none,
- INTRN_CQCOS, COERCE_split_complex, COERCE_none,
- INTRN_C16COS, COERCE_split_complex, COERCE_none,
- INTRN_C4SIN, COERCE_split_complex, COERCE_none,
- INTRN_C8SIN, COERCE_split_complex, COERCE_none,
- INTRN_CQSIN, COERCE_split_complex, COERCE_none,
- INTRN_C16SIN, COERCE_split_complex, COERCE_none
- };
- INT32 intrinsic_runtime_formals_size = sizeof(intrinsic_runtime_formals) /
- sizeof( INTRINSIC_RUNTIME_FORMALS);
- typedef struct
- {
- INTRINSIC id;
- TYPE_ID parameterType0;
- TYPE_ID parameterType1;
- TYPE_ID parameterType2;
- } INTRINSIC_PARAMETER_TYPE;
- #define INTR_parm_id(x) intrinsic_parameter_type[(x)].id
- #define INTR_parmtype0(x) intrinsic_parameter_type[(x)].parameterType0
- #define INTR_parmtype1(x) intrinsic_parameter_type[(x)].parameterType1
- #define INTR_parmtype2(x) intrinsic_parameter_type[(x)].parameterType2
- INTRINSIC_PARAMETER_TYPE intrinsic_parameter_type[]=
- {
- INTRN_I1DIM, MTYPE_I1, MTYPE_I1, MTYPE_V,
- INTRN_I2DIM, MTYPE_I2, MTYPE_I2, MTYPE_V,
- INTRN_I4DIM, MTYPE_I4, MTYPE_I4, MTYPE_V,
- INTRN_I8DIM, MTYPE_I8, MTYPE_I8, MTYPE_V,
- INTRN_F4DIM, MTYPE_F4, MTYPE_F4, MTYPE_V,
- INTRN_F8DIM, MTYPE_F8, MTYPE_F8, MTYPE_V,
- INTRN_FQDIM, MTYPE_FQ, MTYPE_FQ, MTYPE_V,
- INTRN_F16DIM, MTYPE_F16, MTYPE_F16, MTYPE_V,
- INTRN_F4MOD, MTYPE_F4, MTYPE_F4, MTYPE_V,
- INTRN_F8MOD, MTYPE_F8, MTYPE_F8, MTYPE_V,
- INTRN_FQMOD, MTYPE_FQ, MTYPE_FQ, MTYPE_V,
- INTRN_F16MOD, MTYPE_F16, MTYPE_F16, MTYPE_V,
- INTRN_F8F4PROD, MTYPE_F4, MTYPE_F4, MTYPE_V,
- INTRN_FQF8PROD, MTYPE_F8, MTYPE_F8, MTYPE_V,
- INTRN_F16F8PROD, MTYPE_F8, MTYPE_F8, MTYPE_V,
- INTRN_I1SIGN, MTYPE_I1, MTYPE_I1, MTYPE_V,
- INTRN_I2SIGN, MTYPE_I2, MTYPE_I2, MTYPE_V,
- INTRN_I4SIGN, MTYPE_I4, MTYPE_I4, MTYPE_V,
- INTRN_I8SIGN, MTYPE_I8, MTYPE_I8, MTYPE_V,
- INTRN_F4SIGN, MTYPE_F4, MTYPE_F4, MTYPE_V,
- INTRN_F8SIGN, MTYPE_F8, MTYPE_F8, MTYPE_V,
- INTRN_FQSIGN, MTYPE_FQ, MTYPE_FQ, MTYPE_V,
- INTRN_F16SIGN, MTYPE_F16, MTYPE_F16, MTYPE_V,
- INTRN_F4AINT, MTYPE_F4, MTYPE_V, MTYPE_V,
- INTRN_F8AINT, MTYPE_F8, MTYPE_V, MTYPE_V,
- INTRN_FQAINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
- INTRN_F16AINT, MTYPE_F16, MTYPE_V, MTYPE_V,
- INTRN_I2F4NINT, MTYPE_F4, MTYPE_V, MTYPE_V,
- INTRN_I4F4NINT, MTYPE_F4, MTYPE_V, MTYPE_V,
- INTRN_I8F4NINT, MTYPE_F4, MTYPE_V, MTYPE_V,
- INTRN_I2F8IDNINT, MTYPE_F8, MTYPE_V, MTYPE_V,
- INTRN_I4F8IDNINT, MTYPE_F8, MTYPE_V, MTYPE_V,
- INTRN_I8F8IDNINT, MTYPE_F8, MTYPE_V, MTYPE_V,
- INTRN_I2FQIQNINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
- INTRN_I4FQIQNINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
- INTRN_I8FQIQNINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
- INTRN_I2F16IQNINT, MTYPE_F16, MTYPE_V, MTYPE_V,
- INTRN_I4F16IQNINT, MTYPE_F16, MTYPE_V, MTYPE_V,
- INTRN_I8F16IQNINT, MTYPE_F16, MTYPE_V, MTYPE_V,
- INTRN_F4ANINT, MTYPE_F4, MTYPE_V, MTYPE_V,
- INTRN_F8ANINT, MTYPE_F8, MTYPE_V, MTYPE_V,
- INTRN_FQANINT, MTYPE_FQ, MTYPE_V, MTYPE_V,
- INTRN_F16ANINT, MTYPE_F16, MTYPE_V, MTYPE_V,
- INTRN_F4LOG10, MTYPE_F4, MTYPE_V, MTYPE_V,
- INTRN_F8LOG10, MTYPE_F8, MTYPE_V, MTYPE_V,
- INTRN_FQLOG10, MTYPE_FQ, MTYPE_V, MTYPE_V,
- INTRN_F16LOG10, MTYPE_F16, MTYPE_V, MTYPE_V,
- INTRN_I1BTEST, MTYPE_I1, MTYPE_I1, MTYPE_V,
- INTRN_I2BTEST, MTYPE_I2, MTYPE_I2, MTYPE_V,
- INTRN_I4BTEST, MTYPE_I4, MTYPE_I4, MTYPE_V,
- INTRN_I8BTEST, MTYPE_I8, MTYPE_I8, MTYPE_V,
- INTRN_I1BSET, MTYPE_I1, MTYPE_I1, MTYPE_V,
- INTRN_I2BSET, MTYPE_I2, MTYPE_I2, MTYPE_V,
- INTRN_I4BSET, MTYPE_I4, MTYPE_I4, MTYPE_V,
- INTRN_I8BSET, MTYPE_I8, MTYPE_I8, MTYPE_V,
- INTRN_I1BCLR, MTYPE_I1, MTYPE_I1, MTYPE_V,
- INTRN_I2BCLR, MTYPE_I2, MTYPE_I2, MTYPE_V,
- INTRN_I4BCLR, MTYPE_I4, MTYPE_I4, MTYPE_V,
- INTRN_I8BCLR, MTYPE_I8, MTYPE_I8, MTYPE_V,
- INTRN_I1BITS, MTYPE_I1, MTYPE_I1, MTYPE_I1,
- INTRN_I2BITS, MTYPE_I2, MTYPE_I2, MTYPE_I2,
- INTRN_I4BITS, MTYPE_I4, MTYPE_I4, MTYPE_I4,
- INTRN_I8BITS, MTYPE_I8, MTYPE_I8, MTYPE_I8,
- INTRN_I1SHL, MTYPE_I1, MTYPE_I1, MTYPE_V,
- INTRN_I2SHL, MTYPE_I2, MTYPE_I2, MTYPE_V,
- INTRN_I1SHR, MTYPE_I1, MTYPE_I1, MTYPE_V,
- INTRN_I2SHR, MTYPE_I2, MTYPE_I2, MTYPE_V,
- INTRN_I1SHFT, MTYPE_I1, MTYPE_I1, MTYPE_V,
- INTRN_I2SHFT, MTYPE_I2, MTYPE_I2, MTYPE_V,
- INTRN_I4SHFT, MTYPE_I4, MTYPE_I4, MTYPE_V,
- INTRN_I8SHFT, MTYPE_I8, MTYPE_I8, MTYPE_V,
- INTRN_I1SHFTC, MTYPE_I1, MTYPE_I1, MTYPE_I1,
- INTRN_I2SHFTC, MTYPE_I2, MTYPE_I2, MTYPE_I2,
- INTRN_I4SHFTC, MTYPE_I4, MTYPE_I4, MTYPE_I4,
- INTRN_I8SHFTC, MTYPE_I8, MTYPE_I8, MTYPE_I8,
- INTRN_I1MVBITS, MTYPE_I1, MTYPE_I1, MTYPE_I1,
- INTRN_I2MVBITS, MTYPE_I2, MTYPE_I2, MTYPE_I2,
- INTRN_I4MVBITS, MTYPE_I4, MTYPE_I4, MTYPE_I4,
- INTRN_I8MVBITS, MTYPE_I8, MTYPE_I8, MTYPE_I8,
- };
- INT32 intrinsic_parameter_type_size = sizeof(intrinsic_parameter_type) /
- sizeof( INTRINSIC_PARAMETER_TYPE);
- #define WN_has_ty(x) (OPCODE_has_1ty(WN_opcode(x)) || OPCODE_has_2ty(WN_opcode(x)))
- #define WN_is_pointer(x) (WN_has_ty(x) && (TY_kind(WN_ty(x)) == KIND_POINTER))
- #define Is_Integer_Constant(x) (WN_operator(x) == OPR_INTCONST)
- #define Is_Constant(x) (WN_operator(x) == OPR_CONST)
- #define OPCODE_is_intrinsic(op) \
- ((OPCODE_operator((op)) == OPR_INTRINSIC_CALL) || \
- (OPCODE_operator((op)) == OPR_INTRINSIC_OP))
- #define ABS(x) (((x)<0) ? -(x) : (x))
- /* ====================================================================
- * Exported Functions
- * ====================================================================
- */
- extern const char * INTR_intrinsic_name(WN *tree);
- extern WN * make_pointer_to_node(WN *block, WN *tree);
- /* ====================================================================
- * Imported Functions
- * ====================================================================
- */
- extern PREG_NUM AssignExpr(WN *block, WN *tree, TYPE_ID type);
- extern TY_IDX compute_alignment_type(WN *tree, TY_IDX, INT64 offset);
- extern INT32 compute_copy_alignment(TY_IDX, TY_IDX, INT32 offset);
- extern BOOL lower_is_aliased(WN *wn1, WN *wn2, INT64 size);
- extern TYPE_ID compute_copy_quantum(INT32 );
- extern WN *WN_I1const(TYPE_ID type, INT64 con);
- extern void WN_annotate_call_flags(WN *call, ST *sym);
- extern BOOL CG_bcopy_cannot_overlap;
- extern BOOL CG_memcpy_cannot_overlap;
- extern BOOL CG_memmove_cannot_overlap;
- extern INT32 CG_memmove_inst_count;
- #ifdef KEY
- extern INT32 CG_memmove_align_inst_count;
- #endif
- /* ====================================================================
- * Forward Declarations
- * ====================================================================
- */
- static EMULATION WN_emulation(WN *tree);
- static WN *em_exp_int(WN *block, WN *x, WN *pow, TYPE_ID type);
- static WN *em_exp_float(WN *block, WN *x, WN *pow, TYPE_ID type);
- static WN *em_mod_float(WN *block, WN *x, WN *y);
- static WN *em_complex_exp(WN *block, WN *x);
- static WN *em_complex_cos(WN *block, WN *x);
- static COERCE INTR_coerce_runtime(WN *tree, INT32 arg);
- static TYPE_ID INTR_parameter_type(WN *tree, INT32 arg);
- static TY_IDX aux_compute_alignment(WN *tree);
- /* ====================================================================
- * private variables
- * ====================================================================
- */
- static INT32 em_exp_int_max = 256;
- #define MAX_INTRINSIC_ARGS 20
- /* ====================================================================
- *
- * TYPE_ID INTR_return_mtype(id)
- *
- *
- *
- * ==================================================================== */
- TYPE_ID INTR_return_mtype(INTRINSIC id)
- {
- INTRN_RETKIND rtype = INTRN_return_kind(id);
- switch(rtype)
- {
- case IRETURN_I1: return MTYPE_I1;
- case IRETURN_I2: return MTYPE_I2;
- case IRETURN_I4: return MTYPE_I4;
- case IRETURN_I8: return MTYPE_I8;
- case IRETURN_U1: return MTYPE_U1;
- case IRETURN_U2: return MTYPE_U2;
- case IRETURN_U4: return MTYPE_U4;
- case IRETURN_U8: return MTYPE_U8;
- case IRETURN_F4: return MTYPE_F4;
- case IRETURN_F8: return MTYPE_F8;
- case IRETURN_FQ: return MTYPE_FQ;
- case IRETURN_F16: return MTYPE_F16;
- case IRETURN_C4: return MTYPE_C4;
- case IRETURN_C8: return MTYPE_C8;
- case IRETURN_CQ: return MTYPE_CQ;
- case IRETURN_C16: return MTYPE_C16;
- case IRETURN_V: return MTYPE_V;
- case IRETURN_PV:
- case IRETURN_PU1:
- case IRETURN_DA1:
- case IRETURN_SZT:
- case IRETURN_PC :
- case IRETURN_UNKNOWN:
- return MTYPE_UNKNOWN;
- }
- return MTYPE_UNKNOWN;
- }
- /* ====================================================================
- *
- * EMULATION WN_emulation(WN *tree)
- *
- * Provide the correct emulation enum for a given WN
- *
- * TODO: cache most frequently used id's
- *
- * ==================================================================== */
- static EMULATION WN_emulation(WN *tree)
- {
- OPCODE op = WN_opcode(tree);
- TYPE_ID type = OPCODE_rtype(op);
- switch (WN_operator(tree)) {
- case OPR_SQRT:
- switch(type) {
- case MTYPE_C4: return EM_C4_SQRT;
- case MTYPE_C8: return EM_C8_SQRT;
- case MTYPE_CQ: return EM_CQ_SQRT;
- case MTYPE_C16: return EM_CQ_SQRT;
- case MTYPE_FQ: return EM_Q_SQRT;
- case MTYPE_F16: return EM_Q_SQRT;
- }
- break;
- case OPR_RSQRT:
- switch(type) {
- case MTYPE_C4: return EM_C4_RSQRT;
- case MTYPE_C8: return EM_C8_RSQRT;
- case MTYPE_CQ: return EM_CQ_RSQRT;
- case MTYPE_C16: return EM_CQ_RSQRT;
- }
- break;
- case OPR_CVT:
- {
- TYPE_ID desc = WN_desc(tree);
- if (desc == MTYPE_FQ || desc == MTYPE_F16)
- {
- switch(type) {
- case MTYPE_I4: return EM_JI_QINT;
- case MTYPE_I8: return EM_KI_QINT;
- case MTYPE_F4: return EM_SNGL_Q;
- case MTYPE_F8: return EM_DBLE_Q;
- }
- break;
- }
- else if (type == MTYPE_FQ || type == MTYPE_F16)
- {
- switch(desc) {
- case MTYPE_U4: return EM_Q_FLOTJU;
- case MTYPE_I4: return EM_Q_FLOTJ;
- case MTYPE_U8: return EM_Q_FLOTKU;
- case MTYPE_I8: return EM_Q_FLOTK;
- case MTYPE_F8: return EM_Q_EXTD;
- case MTYPE_F4: return EM_Q_EXT;
- }
- }
- }
- break;
- case OPR_RND:
- {
- TYPE_ID desc = WN_desc(tree);
- if (desc == MTYPE_FQ || desc == MTYPE_F16)
- {
- switch(type)
- {
- case MTYPE_I4: return EM_JIQNNT;
- case MTYPE_I8: return EM_KIQNNT;
- }
- break;
- }
- }
- break;
- default:
- if (type == MTYPE_FQ || type == MTYPE_F16)
- {
- switch(WN_operator(tree)) {
- case OPR_ISTORE:
- case OPR_ISTOREX:
- case OPR_STID:
- case OPR_ILOAD:
- case OPR_ILOADX:
- case OPR_SELECT:
- case OPR_LDID:
- case OPR_CONST:
- case OPR_NEG:
- break;
- case OPR_ABS: return EM_Q_ABS;
- case OPR_ADD: return EM_Q_ADD;
- case OPR_SUB: return EM_Q_SUB;
- case OPR_MPY: return EM_Q_MPY;
- case OPR_DIV: return EM_Q_DIV;
- case OPR_MAX: return EM_Q_MAX1;
- case OPR_MIN: return EM_Q_MIN1;
- case OPR_RECIP:
- case OPR_RSQRT:
- case OPR_MADD:
- case OPR_MSUB:
- case OPR_NMADD:
- case OPR_NMSUB:
- case OPR_RND:
- case OPR_TRUNC:
- case OPR_CVT:
- case OPR_SQRT:
- Is_True(FALSE, ("WN_emulation() %s should be already processed", OPCODE_name(WN_opcode(tree))));
- break;
- case OPR_CEIL:
- case OPR_FLOOR:
- case OPR_MOD:
- case OPR_REM:
- case OPR_CVTL:
- case OPR_CALL:
- case OPR_INTRINSIC_CALL:
- Is_True(FALSE, ("WN_emulation() %s invalid context for op", OPCODE_name(WN_opcode(tree))));
- }
- }
- else if (WN_desc(tree)== MTYPE_FQ || WN_desc(tree)== MTYPE_F16)
- {
- switch(WN_operator(tree)) {
- case OPR_EQ: return EM_Q_EQ;
- case OPR_NE: return EM_Q_NE;
- case OPR_GT: return EM_Q_GT;
- case OPR_GE: return EM_Q_GE;
- case OPR_LT: return EM_Q_LT;
- case OPR_LE: return EM_Q_LE;
- case OPR_TRUNC:
- switch(type)
- {
- case MTYPE_I4: return EM_JI_QINT;
- case MTYPE_I8: return EM_KI_QINT;
- }
- break;
- case OPR_CEIL:
- switch(type)
- {
- case MTYPE_I4: return EM_JI_QCEIL;
- case MTYPE_I8: return EM_KI_QCEIL;
- }
- break;
- case OPR_FLOOR:
- switch(type)
- {
- case MTYPE_I4: return EM_JI_QFLOOR;
- case MTYPE_I8: return EM_KI_QFLOOR;
- }
- break;
- }
- }
- break;
- }
- FmtAssert(FALSE, ("WN_emulation() %s not recognized", OPCODE_name(WN_opcode(tree))));
- return EM_LAST;
- }
- /* ====================================================================
- *
- * WN *checkForZero(WN *block, TYPE_ID type, PREG_NUM xN, WN *if_else, WN *value)
- *
- * Create test block for zero
- * if (x==0)
- * { ret = 0; }
- * else
- * { ret = value }
- * return ret;
- *
- * ==================================================================== */
- static WN *checkForZero(WN *block, TYPE_ID type, PREG_NUM xN, WN *if_else, WN *value)
- {
- TYPE_ID rtype = WN_rtype(value);
- WN *if_then;
- PREG_NUM retN;
- if_then = WN_CreateBlock();
- retN = AssignExpr(if_then, WN_Zerocon(rtype), rtype);
- {
- WN *st;
- st = WN_StidIntoPreg(rtype, retN, MTYPE_To_PREG(rtype), value);
- WN_INSERT_BlockLast(if_else, st);
- }
- {
- WN *cond, *IF;
- Is_True(MTYPE_is_float(type), ("unexpected type"));
- cond = WN_EQ(type,
- WN_LdidPreg(type, xN),
- WN_Zerocon(type));
- IF = WN_CreateIf(cond, if_then, if_else);
- WN_INSERT_BlockLast(block, IF);
- }
- return WN_LdidPreg(rtype, retN);
- }
- /* ====================================================================
- *
- * WN * WN_arg(WN *tree, INT32 arg)
- *
- * return Nth kid , skiping PARM
- * ==================================================================== */
- static WN *WN_arg(WN *tree, INT32 arg)
- {
- WN *child= WN_kid(tree, arg);
- if (WN_operator_is(child, OPR_PARM))
- {
- return WN_kid0(child);
- }
- return child;
- }
- static WN *em_clen(WN *block, WN *len)
- {
- return len;
- }
- /*
- **
- ** Auxillary routine to implement ( x + .5 * sign(x) )
- */
- static WN *aux_nearest(TYPE_ID rtype, PREG_NUM xN)
- {
- WN *rel, *select;
- rel = WN_GE(rtype, WN_LdidPreg(rtype, xN), WN_Zerocon(rtype));
- select = WN_Select(rtype,
- rel,
- WN_Floatconst(rtype, .5),
- WN_Floatconst(rtype, -.5));
- return WN_Add(rtype, WN_LdidPreg(rtype, xN), select);
- }
- /*
- ** Auxillary routine for Convert ( {Round,Trunc}(rtype) )
- */
- static WN *aux_CvtRnd(TYPE_ID rtype, WN *x)
- {
- WN *rnd;
- TYPE_ID intToFloat = (Slow_CVTDL) ? MTYPE_I4 : MTYPE_I8;
-
- // Needed for correctness, no matter how slow the truncate
- if (WN_rtype(x) != MTYPE_F4) {
- intToFloat = MTYPE_I8;
- }
- rnd = WN_Rnd(rtype, intToFloat, x);
- return WN_Cvt(intToFloat, rtype, rnd);
- }
- static WN *aux_CvtTrunc(TYPE_ID rtype, WN *x)
- {
- WN *trunc;
- TYPE_ID intToFloat = (Slow_CVTDL) ? MTYPE_I4 : MTYPE_I8;
- /*
- * this is em_aint()
- */
- // Needed for correctness, no matter how slow the truncate
- if (WN_rtype(x) != MTYPE_F4) {
- intToFloat = MTYPE_I8;
- }
- trunc = WN_Trunc(rtype, intToFloat, x);
- return WN_Cvt(intToFloat, rtype, trunc);
- }
- /*
- ** Optimizer cannot deal with zero length mstore so return BLOCK
- */
- static WN *aux_CreateMstore(WN_OFFSET offset, TY_IDX type, WN *value, WN *addr,
- WN *size)
- {
- if (Is_Integer_Constant(size) && WN_const_val(size) <= 0)
- {
- /* Cannot delete these nodes, since they are used later (bug 623566)
- WN_Delete(value);
- WN_Delete(addr);
- WN_Delete(size);
- */
- return WN_CreateBlock();
- }
- UINT64 ty_size = TY_size(TY_pointed(type));
- if (ty_size != 0 && WN_const_val (size) % ty_size != 0) {
- // size copied is not a multiple of the size of the type, which means
- // that we are copying part of the type. We then change the pointer
- // to (void*)
- static TY_IDX void_star = TY_IDX_ZERO;
- if (void_star == TY_IDX_ZERO)
- void_star = Make_Pointer_Type (MTYPE_To_TY (MTYPE_V));
- Set_TY_IDX_index (type, TY_IDX_index (void_star));
- }
- return WN_CreateMstore(offset, type, value, addr, size);
- }
- /*
- **
- ** Notes for the following functions:
- **
- ** [1] Fast_trunc_Allowed (currently when Roundoff_Level >= ROUNDOFF_SIMPLE)
- ** generate trunc. This will fail when (-2**63 <= |x| < 2**63-1)
- **
- ** [2] Test x against TWO_EXP
- ** Floating point value is such that (x+1 == x), ie. there is no
- ** possible fractional value ie.
- ** 2**23 <= |x| return x
- **
- ** It is possible (if necessary) to special case MTYPE_F4 and generate
- ** a trunc to MTYPE_I4.
- **/
- #define TWO_EXP_23 8388608.0
- #define TWO_EXP_52 4503599627370496.0
- /*
- **
- ** INTRN_I2F4NINT:
- ** INTRN_I4F4NINT:
- ** INTRN_I8F4NINT:
- ** INTRN_I2F8IDNINT:
- ** INTRN_I4F8IDNINT:
- ** INTRN_I8F8IDNINT:
- ** INTRN_I2FQIQNINT:
- ** INTRN_I4FQIQNINT:
- ** INTRN_I8FQIQNINT:
- ** INTRN_I2F16IQNINT:
- ** INTRN_I4F16IQNINT:
- ** INTRN_I8F16IQNINT:
- **
- ** change into
- ** rnd(x) roundoff >= 3
- ** trunc( x + .5 * sign(x) )
- */
- static WN *em_nearest_int(WN *block, TYPE_ID rtype, WN *x)
- {
- TYPE_ID type = WN_rtype(x);
- if (Fast_NINT_Allowed)
- {
- return WN_Rnd(type, rtype, x);
- }
- else if ((type == MTYPE_F4) || (type == MTYPE_F8))
- {
- WN *add;
- PREG_NUM xN;
- xN = AssignExpr(block, x, type);
- add = aux_nearest(type, xN);
- if (Fast_trunc_Allowed)
- {
- return WN_Trunc(type, rtype, add);
- }
- else
- {
- WN *rel, *select;
- double con= (type==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;
- rel = WN_GE(type,
- WN_Abs(type, WN_LdidPreg(type, xN)),
- WN_Floatconst(type, con));
- select = WN_Select(type, rel, WN_LdidPreg(type, xN), add);
- return WN_Trunc(type, rtype, select);
- }
- }
- else
- {
- return NULL;
- }
- }
- /*
- **
- ** INTRN_F4ANINT:
- ** INTRN_F8ANINT:
- ** INTRN_FQANINT:
- ** INTRN_F16ANINT:
- **
- ** change into
- ** cvt (float, trunc( x + .5 * sign(x) )) roundoff>= 3
- */
- static WN *em_nearest_aint(WN *block, TYPE_ID rtype, WN *x)
- {
- if (Fast_NINT_Allowed)
- {
- return aux_CvtRnd(rtype, x);
- }
- else if ((rtype == MTYPE_F4) || (rtype == MTYPE_F8))
- {
- PREG_NUM xN;
- WN *add, *cvt;
- xN = AssignExpr(block, x, rtype);
- add = aux_nearest(rtype, xN);
- /*
- * this is em_aint()
- */
- cvt = aux_CvtTrunc(rtype, add);
- if (Fast_trunc_Allowed)
- {
- return cvt;
- }
- else
- {
- WN *rel;
- double con= (rtype==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;
- rel = WN_GE(rtype,
- WN_Abs(rtype, WN_LdidPreg(rtype, xN)),
- WN_Floatconst(rtype, con));
- return WN_Select(rtype, rel, WN_LdidPreg(rtype, xN), cvt);
- }
- }
- return NULL;
- }
- /*
- **
- ** INTRN_F4AINT
- ** INTRN_F8AINT
- ** INTRN_FQAINT
- ** INTRN_F16AINT
- **
- ** change into
- ** cvt (float, trunc(x))
- */
- static WN *em_aint(WN *block, TYPE_ID rtype, WN *x)
- {
- if (Fast_trunc_Allowed)
- {
- return aux_CvtTrunc(rtype, x);
- }
- else if ((rtype == MTYPE_F4) || (rtype == MTYPE_F8))
- {
- PREG_NUM xN;
- WN *rel, *cvt;
- double con= (rtype==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;
- xN = AssignExpr(block, x, rtype);
- rel = WN_GE(rtype,
- WN_Abs(rtype, WN_LdidPreg(rtype, xN)),
- WN_Floatconst(rtype, con));
- cvt = aux_CvtTrunc(rtype, WN_LdidPreg(rtype, xN));
- return WN_Select(rtype, rel, WN_LdidPreg(rtype, xN), cvt);
- }
- else
- {
- return NULL;
- }
- }
- /*
- **
- ** change into
- ** | x | if y >= 0
- ** - | x | if y < 0
- **
- ** --> absN = | x |;
- ** --> (y>=0) ? absN : -absN;
- */
- static WN *em_sign(WN *block, WN *x, WN *y)
- {
- PREG_NUM absN;
- TYPE_ID type = WN_rtype(x);
- WN *abs, *select;
- #ifdef KEY // bug 9660
- if (MTYPE_is_integral(type) && ! MTYPE_signed(type))
- type = Mtype_TransferSign(MTYPE_I4, type);
- #endif
- #ifdef KEY // bug 12052
- if (MTYPE_is_integral(type) &&
- MTYPE_byte_size(type) < MTYPE_byte_size(WN_rtype(y)))
- type = Mtype_TransferSize(WN_rtype(y), type);
- #endif
- abs = WN_Abs(type, x);
- absN = AssignExpr(block, abs, type);
- select = WN_Select(type,
- WN_GE(type, y, WN_Zerocon(type)),
- WN_LdidPreg(type, absN),
- WN_Neg(type, WN_LdidPreg(type, absN)));
- return select;
- }
- /*
- **
- ** change into
- ** cvt (x) * cvt(y)
- */
- static WN *em_prod(WN *block, TYPE_ID rtype, WN *x, WN *y)
- {
- TYPE_ID type = WN_rtype(x);
- WN *mpy;
- mpy = WN_Mpy(rtype,
- WN_Cvt(type, rtype, x),
- WN_Cvt(type, rtype, y));
- return mpy;
- }
- /*
- **
- ** change into
- ** (x>y) ? (x-y) : 0
- */
- static WN *em_dim(WN *block, WN *x, WN *y)
- {
- PREG_NUM xN, yN;
- TYPE_ID type = WN_rtype(x);
- WN *rel, *sub, *select;
- xN = AssignExpr(block, x, type);
- yN = AssignExpr(block, y, type);
- rel = WN_GT(type,
- WN_LdidPreg(type, xN),
- WN_LdidPreg(type, yN));
- sub = WN_Sub(type,
- WN_LdidPreg(type, xN),
- WN_LdidPreg(type, yN));
- select = WN_Select(type,
- rel,
- sub,
- WN_Zerocon(type));
- return select;
- }
- /*
- **
- ** change into
- ** x - y * ( FLOAT ( |(x / y)| ))
- */
- static WN *em_mod_float(WN *block, WN *x, WN *y)
- {
- PREG_NUM xN, yN;
- TYPE_ID type = WN_rtype(x);
- WN *div, *cvt, *mpy, *sub;
- if ((type == MTYPE_F4) || (type == MTYPE_F8)) {
- xN = AssignExpr(block, x, type);
- yN = AssignExpr(block, y, type);
- div = WN_Div(type,
- WN_LdidPreg(type, xN),
- WN_LdidPreg(type, yN));
- cvt = em_aint(block, type, div);
- mpy = WN_Mpy(type,
- WN_LdidPreg(type, yN),
- cvt);
- sub = WN_Sub(type,
- WN_LdidPreg(type, xN),
- mpy);
- return sub;
- } else {
- return NULL;
- }
- }
- /*
- ** WN *build_mult_tree(block, TYPE_ID type, PREG_NUM xN, int pow)
- **
- ** Build a multiply tree to make shipiro happy.
- **
- ** Actually, create a series of temporaries to hold the powers that be.
- **
- ** ex. x ** 9 (= 1001)
- ** t0= x;
- ** t1= t0*t0; (x**2)
- ** t2= t1*t1; (x**4)
- ** t3= t2*t2; (x**8)
- ** ans = t3 * t0;
- **
- */
- #define BIT_IS_ON(x,i) ((x) & (1<<(i)))
- static WN *build_mult_tree(WN *block, TYPE_ID type, PREG_NUM xN, INT32 pow)
- {
- PREG_NUM powers[16]; /* could handle pow = 64k */
- INT32 i, n = 0;
- PREG_NUM xNm1;
- WN *tree = NULL;
- Is_True((pow>0), ("expected pow>0"));
- powers[n++] = xN;
- xNm1 = xN;
- for(i= 1; ((1<<i) <= pow); i++)
- {
- WN *mpy;
- mpy = WN_Mpy(type, WN_LdidPreg(type, xNm1), WN_LdidPreg(type, xNm1));
- xNm1 = AssignExpr(block, mpy, type);
- powers[n++] = xNm1;
- }
- for(i= 0; ((1<<i) <= pow); i++)
- {
- if (BIT_IS_ON(pow, i))
- {
- PREG_NUM powerN = powers[i];
- if (tree)
- {
- tree = WN_Mpy(type, tree, WN_LdidPreg(type, powerN));
- }
- else
- {
- tree = WN_LdidPreg(type, powerN);
- }
- }
- }
- return tree;
- }
- static WN *em_exp_float(WN *block, WN *x, WN *pow, TYPE_ID type)
- {
- if (Is_Constant(pow))
- {
- TCON con = Const_Val(pow);
- BOOL sqrt, rsqrt;
- #ifdef KEY
- BOOL sqrt_25, rsqrt_25, sqrt_75, rsqrt_75;
- BOOL cbrt_33, cbrt_66;
- #endif
- WN *tree, *x_copy;
- double n;
- /*
- * for complex x verify the power is a real number
- * (TODO) general complex ** complex
- */
- if (MTYPE_is_complex(type))
- {
- TCON Ipow;
- Ipow = Extract_Complex_Imag(con);
- if (Targ_To_Host_Float(Ipow) == 0.0)
- {
- con = Extract_Complex_Real(con);
- }
- else
- {
- return NULL;
- }
- }
- n = Targ_To_Host_Float(con);
- sqrt = rsqrt = FALSE;
- cbrt_33 = cbrt_66 = FALSE;
- #ifdef KEY
- sqrt_25 = rsqrt_25 = sqrt_75 = rsqrt_75 = FALSE;
- #endif
- if (trunc(n) == n)
- {
- ;
- }
- else if ((trunc(ABS(n))+.5) == ABS(n))
- {
- /*
- * if we need to multiply by sqrt we need a copy of x
- * as it might get changed underneath us.
- */
- if (n<0)
- rsqrt = TRUE;
- else
- sqrt = TRUE;
- x_copy = WN_COPY_Tree(x);
- }
- #ifdef KEY
- else if ((trunc(ABS(n))+.25) == ABS(n))
- {
- /*
- * if we need to multiply by sqrt we need a copy of x
- * as it might get changed underneath us.
- */
- if (n<0)
- rsqrt_25 = TRUE;
- else
- sqrt_25 = TRUE;
- x_copy = WN_COPY_Tree(x);
- }
- else if ((trunc(ABS(n))+.75) == ABS(n))
- {
- /*
- * if we need to multiply by sqrt we need a copy of x
- * as it might get changed underneath us.
- */
- if (n<0)
- rsqrt_75 = TRUE;
- else
- sqrt_75 = TRUE;
- x_copy = WN_COPY_Tree(x);
- }
- #ifdef TARG_X8664
- else if (ABS((trunc(n)+1.0/3) - n) < .0000001 &&
- ! (Is_Target_64bit() && !Is_Target_Anyx86() && OPT_Fast_Math))
- { // the pow in fast_math is faster than cbrt, so no point converting
- cbrt_33 = TRUE;
- x_copy = WN_COPY_Tree(x);
- }
- else if (ABS((trunc(n)+2.0/3) - n) < .0000001 &&
- ! (Is_Target_64bit() && !Is_Target_Anyx86() && OPT_Fast_Math))
- { // the pow in fast_math is faster than cbrt, so no point converting
- cbrt_66 = TRUE;
- x_copy = WN_COPY_Tree(x);
- }
- #endif
- #endif
- else
- {
- return NULL;
- }
- {
- WN *ipow = WN_Intconst(MTYPE_I4, (INT64) trunc(n));
- tree = em_exp_int(block, x, ipow, type);
- }
- if (sqrt || rsqrt)
- {
- #ifdef KEY
- // bug 4824: non-constant float x could be negative
- // bug 4990: Do the check only for C/C++ and if
- // -fmath-errno (-LANG:math_errno=on)
- if (!PU_f77_lang (Get_Current_PU()) &&
- !PU_f90_lang (Get_Current_PU()) && // ! Fortran
- LANG_Math_Errno && // -fmath-errno
- MTYPE_is_float (WN_rtype (x_copy)) &&
- (!Is_Constant (x_copy) ||
- Targ_To_Host_Float (Const_Val (x_copy)) < 0))
- return NULL;
- #endif // KEY
- #ifdef TARG_X8664
- // Bug 5935 - rsqrtsd or rsqrtpd is absent.
- if (rsqrt && (type == MTYPE_F8 || type == MTYPE_V16F8))
- return NULL;
- #endif
- if (tree)
- {
- /*
- * x ** n+.5 -> (x**n) * (x**.5)
- * where the function em_exp_int has already evaluated
- */
- PREG_NUM xN, treeN;
- WN *fractional;
- xN = AssignExpr(block, x_copy, type);
- treeN = AssignExpr(block, tree, type);
- fractional = (sqrt) ? WN_Sqrt(type, WN_LdidPreg(type, xN)) :
- WN_Rsqrt(type, WN_LdidPreg(type, xN));
- tree = WN_Mpy(type,
- WN_LdidPreg(type, treeN),
- fractional);
- }
- }
- #ifdef KEY // bug 6932
- // evaluate (x**0.25) as sqrt(sqrt(x))
- if (sqrt_25 || rsqrt_25)
- {
- if (!PU_f77_lang (Get_Current_PU()) &&
- !PU_f90_lang (Get_Current_PU()) && // ! Fortran
- LANG_Math_Errno && // -fmath-errno
- MTYPE_is_float (WN_rtype (x_copy)) &&
- (!Is_Constant (x_copy) ||
- Targ_To_Host_Float (Const_Val (x_copy)) < 0))
- return NULL;
- #ifdef TARG_X8664
- // rsqrtsd or rsqrtpd is absent.
- if (rsqrt_25 && (type == MTYPE_F8 || type == MTYPE_V16F8))
- return NULL;
- #endif
- if (tree)
- {
- /*
- * x ** n+.25 -> (x**n) * (x**.25)
- * where the function em_exp_int has already evaluated
- */
- PREG_NUM xN, treeN;
- WN *fractional;
- xN = AssignExpr(block, x_copy, type);
- treeN = AssignExpr(block, tree, type);
- if (sqrt_25)
- fractional = WN_Sqrt(type, WN_Sqrt(type, WN_LdidPreg(type, xN)));
- else
- fractional = WN_Sqrt(type, WN_Rsqrt(type, WN_LdidPreg(type, xN)));
- tree = WN_Mpy(type,
- WN_LdidPreg(type, treeN),
- fractional);
- }
- }
- // evaluate (x**0.75) as sqrt(x)*sqrt(sqrt(x))
- if (sqrt_75 || rsqrt_75)
- {
- if (!PU_f77_lang (Get_Current_PU()) &&
- !PU_f90_lang (Get_Current_PU()) && // ! Fortran
- LANG_Math_Errno && // -fmath-errno
- MTYPE_is_float (WN_rtype (x_copy)) &&
- (!Is_Constant (x_copy) ||
- Targ_To_Host_Float (Const_Val (x_copy)) < 0))
- return NULL;
- #ifdef TARG_X8664
- // rsqrtsd or rsqrtpd is absent.
- if (rsqrt_75 && (type == MTYPE_F8 || type == MTYPE_V16F8))
- return NULL;
- #endif
- if (tree)
- {
- /*
- * x ** n+.75 -> (x**n) * (x**.75)
- * where the function em_exp_int has already evaluated
- */
- PREG_NUM xN, treeN;
- WN *fractional;
- xN = AssignExpr(block, x_copy, type);
- treeN = AssignExpr(block, tree, type);
- if (sqrt_75)
- fractional = WN_Mpy(type,
- WN_Sqrt(type, WN_LdidPreg(type, xN)),
- WN_Sqrt(type,
- WN_Sqrt(type, WN_LdidPreg(type, xN))));
- else
- fractional = WN_Mpy(type,
- WN_Rsqrt(type, WN_LdidPreg(type, xN)),
- WN_Rsqrt(type,
- WN_Sqrt(type, WN_LdidPreg(type, xN))));
-
- tree = WN_Mpy(type,
- WN_LdidPreg(type, treeN),
- fractional);
- }
- }
- // evaluate (x**0.333333) by calling cbrt()/cbrtf()
- if (cbrt_33 || cbrt_66)
- {
- if (type != MTYPE_F4 && type != MTYPE_F8)
- return NULL;
- if (tree)
- {
- /*
- * x ** n+1/3 -> (x**n) * (x**1/3)
- * where the function em_exp_int has already evaluated
- */
- PREG_NUM xN = AssignExpr(block, x_copy, type);
- WN *kid = WN_CreateParm(type, WN_LdidPreg(type, xN), Be_Type_Tbl(type),
- WN_PARM_BY_VALUE | WN_PARM_READ_ONLY);
- WN* fraction = WN_Create_Intrinsic(
- OPCODE_make_op(OPR_INTRINSIC_OP, type, MTYPE_V),
- type == MTYPE_F4 ? INTRN_F4CBRT : INTRN_F8CBRT,
- 1, &kid);
- if (cbrt_66) {
- PREG_NUM x13 = AssignExpr(block, fraction, type);
- fraction = WN_Mpy(type, WN_LdidPreg(type, x13),
- WN_LdidPreg(type, x13));
- }
- tree = WN_Mpy(type, tree, fraction);
- }
- }
- #endif
- return tree;
- }
-
- return NULL;
- }
- static WN *em_exp_int(WN *block, WN *x, WN *pow, TYPE_ID type)
- {
- if (Is_Integer_Constant(pow))
- {
- INT32 n = WN_const_val(pow);
- INT32 absN = ABS(n);
- WN *exp= NULL;
- if (em_exp_int_max < absN)
- return NULL;
- switch(n) {
- case 1:
- exp = x;
- break;
- case -1:
- exp = WN_Inverse(type, x);
- break;
- case 0:
- if (MTYPE_type_class(type) & MTYPE_CLASS_INTEGER)
- exp = WN_Intconst(type, 1);
- else
- exp = WN_Floatconst(type, 1.0);
- break;
- case 2:
- {
- PREG_NUM xN;
- xN = AssignExpr(block, x, type);
-
- exp = WN_Mpy(type,
- WN_LdidPreg(type, xN),
- WN_LdidPreg(type, xN));
- break;
- }
- default:
- {
- PREG_NUM xN;
- if (Fast_Exp_Allowed)
- {
- xN = AssignExpr(block, x, type);
-
- exp = build_mult_tree(block, type, xN, absN);
- WN_Delete(pow);
- if (n < 0)
- exp = WN_Inverse(type, exp);
- }
- }
- }
- return exp;
- }
- else if (Is_Integer_Constant(x))
- {
- /*
- * Optimize {-2,-1,0,1,2} ** n
- */
- INT32 val = WN_const_val(x);
- switch(val)
- {
- case -2:
- {
- /*
- * (n>=0) ? ( (n&1) ? - (1<<n) : 1<<n ) : 0
- */
- PREG_NUM powN, shlN;
- WN *shl, *band, *cond, *select, *ge;
- powN = AssignExpr(block, pow, type);
- shl = WN_Shl(type,
- WN_Intconst(type, 1),
- WN_LdidPreg(type, powN));
- shlN = AssignExpr(block, shl, type);
- band = WN_Band(type,
- WN_LdidPreg(type, powN),
- WN_Intconst(type, 1));
- cond = WN_EQ(type, band, WN_Zerocon(type));
- select = WN_Select(type,
- cond,
- WN_LdidPreg(type, shlN),
- WN_Neg(type, WN_LdidPreg(type, shlN)));
- ge = WN_GE(type,
- WN_LdidPreg(type, powN),
- WN_Zerocon(type));
- return WN_Select(type,
- ge,
- select,
- WN_Zerocon(type));
- }
- case -1:
- {
- /*
- * (n&1) ? -1 : 1;
- */
- WN *band;
- band = WN_Band(type, pow, WN_Intconst(type, 1));
- return WN_Select(type,
- WN_EQ(type, band, WN_Zerocon(type)),
- WN_Intconst(type, 1),
- WN_Intconst(type, -1));
- }
- case 0:
- /*
- * (n==0) ? 1 : 0
- * simpler is (n==0)
- */
- return WN_EQ(type, pow, WN_Zerocon(type));
- case 1:
- /*
- * always and forever 1
- */
- return WN_Intconst(type, 1);
- case 2:
- {
- /*
- * (n>=0) ? 1<<n : 0
- * simpler is (n>=0) << n
- */
- WN *ge;
- PREG_NUM powN;
- powN = AssignExpr(block, pow, type);
- ge = WN_GE(type,
- WN_LdidPreg(type, powN),
- WN_Zerocon(type));
- return WN_Shl(type,
- ge,
- WN_LdidPreg(type, powN));
-
- }
- }
- }
-
- return NULL;
- }
- /*
- ** quad negate looks like complex negate
- **
- ** if q = (x,y) then
- ** -q = (-x, -y)
- **
- ** TODO nail down preg offset interface
- ** Bug 12895: MIPS quad represents ieee 128, so -q = (-x, y)
- */
- static WN *em_quad_neg(WN *block, WN *tree)
- {
- TYPE_ID newType;
- TYPE_ID type = WN_rtype(tree);
- PREG_NUM qN, qNlo;
- /*
- * assign a quad preg temp as we will be referencing twice (sortof)
- */
- qN = AssignExpr(block, WN_kid0(tree), type);
- if (MTYPE_is_complex(type))
- {
- newType = MTYPE_C8;
- qNlo = qN+2;
- }
- else /* assume MTYPE_FQ or MTYPE_F16 */
- {
- newType = MTYPE_F8;
- qNlo = qN+1;
- }
-
- {
- WN *wn, *st;
- ST *npreg = MTYPE_To_PREG(newType);
- #ifdef TARG_MIPS
- wn = WN_LdidPreg(newType, qN); // Bug 12895
- #else
- wn = WN_Neg(newType, WN_LdidPreg(newType, qN));
- #endif
- st = WN_StidIntoPreg(newType, qN, npreg, wn);
- WN_INSERT_BlockLast(block, st);
- wn = WN_Neg(newType, WN_LdidPreg(newType, qNlo));
- st = WN_StidIntoPreg(newType, qNlo, npreg, wn);
- WN_INSERT_BlockLast(block, st);
- }
- WN_Delete(tree);
- return WN_LdidPreg(type, qN);
- }
- static WN *em_quad_abs(WN *block, WN *tree)
- {
- TYPE_ID newType;
- TYPE_ID type = WN_rtype(tree);
- PREG_NUM qN, qNlo;
- /*
- * assign a quad preg temp as we will be referencing twice (sortof)
- */
- qN = AssignExpr(block, WN_kid0(tree), type);
- Is_True(! MTYPE_is_complex(type), ("em_quad_abs emulates FQ not CQ"));
- newType = MTYPE_F8;
- qNlo = qN+1;
- {
- WN *wn, *st;
- ST *npreg = MTYPE_To_PREG(newType);
- #ifdef TARG_MIPS
- wn = WN_LdidPreg(newType, qN); // Bug 12895
- #else
- wn = WN_Abs(newType, WN_LdidPreg(newType, qN));
- #endif
- st = WN_StidIntoPreg(newType, qN, npreg, wn);
- WN_INSERT_BlockLast(block, st);
- wn = WN_Abs(newType, WN_LdidPreg(newType, qNlo));
- st = WN_StidIntoPreg(newType, qNlo, npreg, wn);
- WN_INSERT_BlockLast(block, st);
- }
- WN_Delete(tree);
- return WN_LdidPreg(type, qN);
- }
- /*
- ** There is no no native quad select, so we must turn the
- ** expression back into an if/else block
- **
- ** select: (cond) ? exp1 : exp2
- **
- ** --> if (cond) qN = exp1;
- ** else qN = exp2;
- ** return qN
- **
- */
- static WN *em_split_select(WN *block, WN *tree)
- {
- TYPE_ID rtype = WN_rtype(tree);
- PREG_NUM qN;
- WN *if_then, *if_else;
- if_then = WN_CreateBlock();
- if_else = WN_CreateBlock();
- {
- WN *exp1 = WN_kid1(tree);
- qN = AssignExpr(if_then, exp1, rtype);
- }
- {
- WN *wn;
- WN *exp2 = WN_kid2(tree);
- ST *preg = MTYPE_To_PREG(rtype);
- wn = WN_StidIntoPreg(rtype, qN, preg, exp2);
- WN_INSERT_BlockLast(if_else, wn);
- }
- {
- WN *IF;
- WN *cond = WN_kid0(tree);
- IF = WN_CreateIf(cond, if_then, if_else);
- WN_INSERT_BlockLast(block, IF);
- }
- WN_Delete(tree);
- return WN_LdidPreg(rtype, qN);
- }
- /*
- ** Evaluate the following function
- **
- ** Definition
- ** x y INTRN_DIVFLOOR INTRN_DIVCEIL
- ** --- -------------- -------------
- ** + + x / y (x+y-1) / y
- **
- ** - - x / y (x+y+1) / y
- **
- ** + - (x+ -1-y)/y x / y
- **
- ** - + (x+ 1-y)/y x / y
- **
- **
- ** The issue was to evaulate (divfloor) without branch code.
- **
- ** Tricks
- ** f(x) = -1 (x<0)
- ** +1 (x>=0)
- ** {
- ** t= x>>31;
- ** f= t+t+1
- ** }
- ** MASK(x,y,v)= 0 (x>=0, y>=0), (x<0, y<0) ++, --
- ** v (x>=0, y<0), (x<0, y>=0) +-, -+
- ** {
- ** t= (x^y)>>31
- ** MASK= t & v
- ** }
- **
- ** The cleverness (shapiro's) was the composition of these functions
- ** to evaluate divfloor.
- **
- ** DIVFLOOR(x,y)=
- ** v = f(y) - y; (-1-y) [+-], (+1-y) [-+]
- ** (x + MASK(x,y,v)) / y
- **
- ** DIVCEIL(x,y) = -DIVFLOOR(-x,y)
- **
- ** x,y are assumed integral or we could just do a divide/floor
- **
- **
- */
- static WN *em_divfloor(WN *block, TYPE_ID type, WN *x, WN *y)
- {
- PREG_NUM xN, yN;
- WN *numer, *div;
- Is_True((MTYPE_is_integral(WN_rtype(x)) &&
- MTYPE_is_integral(WN_rtype(y))),
- ("em_divfloor() arguments should be type integral"));
- xN = AssignExpr(block, x, type);
- yN = AssignExpr(block, y, type);
-
- {
- /*
- * one = 1 (y >= 0)
- * -1 (y < 0)
- */
- TYPE_ID ytype = WN_rtype(y);
- WN *sra, *add, *one, *bxor, *mask, *sub, *band;
- #ifdef TARG_X8664
- // Bug 3264 - This algorithm requires that byte size be identical for
- // ytype and type, for zero-extended 64-bit target ISA.
- if (MTYPE_is_unsigned(ytype) &&
- MTYPE_byte_size(ytype) < MTYPE_byte_size(type))
- ytype = type;
- #endif
- sra = WN_Ashr(type,
- WN_LdidPreg(type, yN),
- WN_Intconst(type, MTYPE_size_reg(ytype)-1));
- add = WN_Add(type,
- sra,
- WN_COPY_Tree(sra));
- one = WN_Add(ytype,
- add,
- WN_Intconst(ytype, 1));
- /*
- * mask = 0 (x,y)= ++ --
- * mask = -1 (x,y)= +- +-
- */
- bxor = WN_Bxor(ytype,
- WN_LdidPreg(type, xN),
- WN_LdidPreg(type, yN));
- mask = WN_Ashr(type,
- bxor,
- WN_Intconst(type, MTYPE_size_reg(type)-1));
- /*
- * sub = 1 - y (y >= 0)
- * -1 - y (y < 0)
- */
- sub = WN_Sub(type, one, WN_LdidPreg…
Large files files are truncated, but you can click here to view the full file