emulate.cxx | searchcode

/pathscale/be/com/emulate.cxx

https://github.com/somian/Path64 · C++ · 5500 lines · 3562 code · 761 blank · 1177 comment · 428 complexity · 585ce9277290458cdce41b68e1891e67 MD5 · raw file
Large files are truncated click here to view the full file

/*
 * Copyright (C) 2007, 2008, 2009 PathScale, LLC.  All Rights Reserved.
 */

/*
 *  Copyright (C) 2006, 2007. QLogic Corporation. All Rights Reserved.
 */

/*
 * Copyright 2003, 2004, 2005, 2006 PathScale, Inc.  All Rights Reserved.
 */

/*

  Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.

   Path64 is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   Path64 is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
   License for more details.

   You should have received a copy of the GNU General Public License
   along with Path64; see the file COPYING.  If not, write to the Free
   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
   02110-1301, USA.

   Special thanks goes to SGI for their continued support to open source

*/



#ifdef USE_PCH
#include "be_com_pch.h"
#endif /* USE_PCH */
#pragma hdrstop
#include <math.h>
#if defined(BUILD_OS_DARWIN)
#include <limits.h>
#else /* defined(BUILD_OS_DARWIN) */
#include <values.h>
#endif /* defined(BUILD_OS_DARWIN) */
#include <alloca.h>

#include "defs.h"
#include "config.h"
#include "config_debug.h"
#include "config_opt.h"
#include "config_targ_opt.h"
#include "errors.h"
#include "erglob.h"
#include "tracing.h"
#include "stab.h"
#include "data_layout.h"
#include "wn.h"
#include "wn_util.h"
#include "const.h"
#include "targ_const.h"
#include "targ_sim.h"
#include "fb_whirl.h"
#include "be_symtab.h"
#include "intrn_info.h"


#if (__GNUC__ == 2)
//
// Provide trunc(), which doesn't exist in the GNU library. This is a
// quick and dirty hack, and should be handled elsehow.
//
static inline double trunc(double d)
{
  if (d < 0.0) {
    return 1.0 + floor(d);
  }
  else {
    return floor(d);
  }
}
#endif


/*
**	For lack of a better word, these emulations are run time
**	routines that supply functionality to whirl expression nodes
**
**	The list was ripped off from ragnarok and may be
**	incomplete/NYI
*/
typedef enum
{
  EM_TRAPUV,		/* sets fpc_csr to interrupt on NaN */
  EM_RTS_CHECKSTACK,	/* checks for stack overflow */
  
  EM_LL_MUL,		/* double-word multiply */
  EM_LL_DIV,		/* double-word divide */
  EM_ULL_DIV,		/* unsigned double-word divide */
  EM_LL_MOD,		/* double-word mod */
  EM_LL_REM,		/* double-word remainder */
  EM_ULL_REM,		/* unsigned double-word remainder */
  EM_LL_LSHIFT,		/* double-word left shift */
  EM_LL_RSHIFT,		/* double-word right shift */
  EM_ULL_RSHIFT,	/* unsigned double-word right shift */
  EM_LL_M3_DSLLV,	/* mips 3 simulation of dsllv */
  EM_LL_M3_DSRAV,	/* mips 3 simulation of dsrav */
  EM_LL_M3_DSRLV,	/* mips 3 simulation of dsrlv */
  EM_LL_TO_F,		/* cvt double-word to float */
  EM_ULL_TO_F,		/* cvt unsigned double-word to float */
  EM_LL_TO_D,		/* cvt double-word to double float */
  EM_ULL_TO_D,		/* cvt unsigned double-word to double float */
  EM_F_TO_LL,		/* cvt float to double-word */
  EM_F_TO_ULL,		/* cvt float to unsigned double-word */
  EM_F_ROUND_LL_F,	/* round float to float */
  EM_F_TRUNC_LL_F,	/* trunc float to float */
  EM_D_TO_LL,		/* cvt double float to double-word */
  EM_D_TO_ULL,		/* cvt double float to unsigned double-word */
  EM_D_ROUND_LL_D,	/* round double to double */
  EM_D_TRUNC_LL_D,	/* trunc double to double */
  EM_LL_BIT_EXTRACT ,	/* double-word bit-field extraction */
  EM_LL_BIT_INSERT ,	/* double-word bit-field insertion */
  
  EM_Q_ABS,		/* quad absolute value */
  EM_Q_SQRT,		/* quad square root */
  EM_Q_ADD,		/* quad plus */
  EM_Q_SUB,		/* quad minus */
  EM_Q_MPY,		/* quad multiply */
  EM_Q_DIV,		/* quad divide */
  EM_Q_MAX1,		/* quad max */
  EM_Q_MIN1,		/* quad min */
  EM_Q_EQ,		/* quad equal */
  EM_Q_NE,		/* quad not equal */
  EM_Q_GE,		/* quad greater equal */
  EM_Q_GT,		/* quad greater than */
  EM_Q_LE,		/* quad less equal */
  EM_Q_LT,		/* quad less than */
  EM_SNGL_Q,		/* convert quad to single */
  EM_DBLE_Q,		/* convert quad to double */
  EM_KI_QINT,		/* convert quad to 64 bits int */
  EM_JI_QINT,		/* convert quad to 32 bits int */
  EM_Q_EXT,		/* convert float to quad */
  EM_Q_EXTD,		/* convert double to quad */
  EM_Q_FLOTK,		/* convert to quad from 64 bits int */
  EM_Q_FLOTKU,		/* convert to quad from unsigned 64 bits int */
  EM_Q_FLOTJ,		/* convert to quad from 32 bits int */
  EM_Q_FLOTJU,		/* convert to quad from unsigned 32 bits int */
  EM_KIQNNT,		/* round quad to closest 64 bits int value */
  EM_JIQNNT,		/* round quad to closest 32 bits int value */

  EM_C4_SQRT,		/* float complex sqrt */
  EM_C8_SQRT,		/* double complex sqrt */
  EM_CQ_SQRT,		/* quad complex sqrt */
  EM_C4_RSQRT,		/* float complex recipricol sqrt */
  EM_C8_RSQRT,		/* double complex recipricol sqrt */
  EM_CQ_RSQRT,		/* quad complex recipricol sqrt */

  EM_C4_ABS,		/* float complex abs */
  EM_C8_ABS,		/* double complex abs */
  EM_CQ_ABS,		/* quad complex abs */
  EM_KI_QCEIL,		/* ceil quad to 64 bits int (f90 only) */
  EM_JI_QCEIL,		/* ceil quad to 32 bits in( f90 only)t */
  EM_KI_QFLOOR,		/* floor quad to 64 bits int (f90 only) */
  EM_JI_QFLOOR,		/* floor quad to 32 bits int (f90 only) */
  EM_LAST		/* sentinel */
} EMULATION;


/*
**	describe calling semantics for FE and runtime 
**	intrinsics and expression
*/
typedef enum
{
  COERCE_none,
  COERCE_by_reference,
  COERCE_by_value,
  COERCE_struct_by_value,
  COERCE_struct_by_reference,
  COERCE_split_complex
} COERCE, *COERCEp;

typedef struct EM_ROUTINES
{
  EMULATION	id;
  const char	*functionName;
  INT32		functionAttributes;
  COERCE	runtimeArg0coercion;
} EM_ROUTINES,  *EM_ROUTINESp;

#define	EM_id(x)		em_routines[x].id
#define	EM_rt_name(x)		em_routines[x].functionName
#define	EM_attributes(x)	em_routines[x].functionAttributes
#define	EM_coerce0(x)		em_routines[x].runtimeArg0coercion

/*
**	Keep track of intrinsic/emulation arguments
**	Problems we are trying to solve
**
**	COERCE_by_reference
**		are (unfortunately) provided by the FE to match the
**		run time routine. When we get the argument we might have
**		an address (anonymous pointer) and hence, lost the
**		type to dereference (if we are trying to inline it)
**
**	COERCE_split_complex
**		complex are split into real/imaginary pairs doubling
**		the number of argumemts
**	
**	This entire mechanism should be provided by the FE
**	as part of wtable.h
*/

#define NSE		PU_NO_SIDE_EFFECTS
#define	PURE_NSE	(PU_IS_PURE | NSE)
#define	INVALID		NULL

/*
**	The emulation table may not yet be complete (or used)
**	The fields are
**
**	EMULATION id;
**		The table must be kept in order with the enumeration
**		as it is a direct lookup
**	
**	char	*functionName;
**		The exact external name, no underbars 
**
**	INT32	functionAttributes;
**
**	COERCEp	functionArgCoercion;
**		Actual to runtime formal conversion
**		The child of an expression/intrinsic WN needs to be
**		converted to call it's runtime function.
**		ex.
**		  complex routines are now split-by_value
**
**	These routines are all by value so we already know the
**	argument type
*/

#define NONE 0
const EM_ROUTINES em_routines[]=
{
  EM_TRAPUV,        "__trapuv",      PURE_NSE,  COERCE_none,
  EM_RTS_CHECKSTACK,"_RtlCheckStack",PURE_NSE,  COERCE_none,
  EM_LL_MUL,        "__ll_mul",      PURE_NSE,  COERCE_none,
  EM_LL_DIV,        "__ll_div",      PURE_NSE,  COERCE_none,
  EM_ULL_DIV,       "__ull_div",     PURE_NSE,  COERCE_none,
  EM_LL_MOD,        "__ll_mod",      PURE_NSE,  COERCE_none,
  EM_LL_REM,        "__ll_rem",      PURE_NSE,  COERCE_none,
  EM_ULL_REM,       "__ull_rem",     PURE_NSE,  COERCE_none,
  EM_LL_LSHIFT,     "__ll_lshift",   PURE_NSE,  COERCE_none,
  EM_LL_RSHIFT,     "__ll_rshift",   PURE_NSE,  COERCE_none,
  EM_ULL_RSHIFT,    "__ull_rshift",  PURE_NSE,  COERCE_none,
  EM_LL_M3_DSLLV,   "__dsllv",       PURE_NSE,  COERCE_none,
  EM_LL_M3_DSRAV,   "__dsrav",       PURE_NSE,  COERCE_none,
  EM_LL_M3_DSRLV,   "__dsrlv",       PURE_NSE,  COERCE_none,
  EM_LL_TO_F,       "__ll_to_f",     PURE_NSE,  COERCE_none,
  EM_ULL_TO_F,      "__ull_to_f",    PURE_NSE,  COERCE_none,
  EM_LL_TO_D,       "__ll_to_d",     PURE_NSE,  COERCE_none,
  EM_ULL_TO_D,      "__ull_to_d",    PURE_NSE,  COERCE_none,
  EM_F_TO_LL,       "__f_to_ll",     PURE_NSE,  COERCE_none,
  EM_F_TO_ULL,      "__f_to_ull",    PURE_NSE,  COERCE_none,
  EM_F_ROUND_LL_F,  "__f_round_ll_f",PURE_NSE,  COERCE_none,
  EM_F_TRUNC_LL_F,  "__f_trunc_ll_f",PURE_NSE,  COERCE_none,
  EM_D_TO_LL,       "__d_to_ll",     PURE_NSE,  COERCE_none,
  EM_D_TO_ULL,      "__d_to_ull",    PURE_NSE,  COERCE_none,
  EM_D_ROUND_LL_D,  "__d_round_ll_d",PURE_NSE,  COERCE_none,
  EM_D_TRUNC_LL_D,  "__d_trunc_ll_d",PURE_NSE,  COERCE_none,
  EM_LL_BIT_EXTRACT,"__ll_bit_extract",PURE_NSE,COERCE_none,
  EM_LL_BIT_INSERT, "__ll_bit_insert",PURE_NSE, COERCE_none,
  EM_Q_ABS,         "__qabs",        PURE_NSE,  COERCE_none,
  EM_Q_SQRT,        "__qsqrt",       PURE_NSE,  COERCE_none,
  EM_Q_ADD,         "__q_add",       PURE_NSE,  COERCE_none,
  EM_Q_SUB,         "__q_sub",       PURE_NSE,  COERCE_none,
  EM_Q_MPY,         "__q_mul",       PURE_NSE,  COERCE_none,
  EM_Q_DIV,         "__q_div",       PURE_NSE,  COERCE_none,
  EM_Q_MAX1,        "__q_max1",      PURE_NSE,  COERCE_none,
  EM_Q_MIN1,        "__q_min1",      PURE_NSE,  COERCE_none,
  EM_Q_EQ,          "__q_eq",        PURE_NSE,  COERCE_none,
  EM_Q_NE,          "__q_ne",        PURE_NSE,  COERCE_none,
  EM_Q_GE,          "__q_ge",        PURE_NSE,  COERCE_none,
  EM_Q_GT,          "__q_gt",        PURE_NSE,  COERCE_none,
  EM_Q_LE,          "__q_le",        PURE_NSE,  COERCE_none,
  EM_Q_LT,          "__q_lt",        PURE_NSE,  COERCE_none,
  EM_SNGL_Q,        "__sngl_q",      PURE_NSE,  COERCE_none,
  EM_DBLE_Q,        "__dble_q",      PURE_NSE,  COERCE_none,
  EM_KI_QINT,       "__ki_qint",     PURE_NSE,  COERCE_none,
  EM_JI_QINT,       "__ji_qint",     PURE_NSE,  COERCE_none,
  EM_Q_EXT,         "__q_ext",       PURE_NSE,  COERCE_none,
  EM_Q_EXTD,        "__q_extd",      PURE_NSE,  COERCE_none,
  EM_Q_FLOTK,       "__q_flotk",     PURE_NSE,  COERCE_none,
  EM_Q_FLOTKU,      "__q_flotku",    PURE_NSE,  COERCE_none,
  EM_Q_FLOTJ,       "__q_flotj",     PURE_NSE,  COERCE_none,
  EM_Q_FLOTJU,      "__q_flotju",    PURE_NSE,  COERCE_none,
  EM_KIQNNT,        "__kiqnnt",      PURE_NSE,  COERCE_none,
  EM_JIQNNT,        "__jiqnnt",      PURE_NSE,  COERCE_none,
  EM_C4_SQRT,       "__csqrt",       PURE_NSE,  COERCE_split_complex,
  EM_C8_SQRT,       "__zsqrt",       PURE_NSE,  COERCE_split_complex,
  EM_CQ_SQRT,       "__cqsqrt",      PURE_NSE,  COERCE_split_complex,
  EM_C4_RSQRT,      INVALID,         NONE,      COERCE_none,
  EM_C8_RSQRT,      INVALID,         NONE,      COERCE_none,
  EM_CQ_RSQRT,      INVALID,         NONE,      COERCE_none,
  EM_C4_ABS,        INVALID,         NONE,      COERCE_none,
  EM_C8_ABS,        INVALID,         NONE,      COERCE_none,
  EM_CQ_ABS,        INVALID,         NONE,      COERCE_none,

  EM_KI_QCEIL,	    "_CEILING_16_8", PURE_NSE,  COERCE_none,
  EM_JI_QCEIL,	    "_CEILING_16_4", PURE_NSE,  COERCE_none,
  EM_KI_QFLOOR,	    "_FLOOR_16_8",   PURE_NSE,  COERCE_none,
  EM_JI_QFLOOR,	    "_FLOOR_16_4",   PURE_NSE,  COERCE_none,
};

typedef struct
{
  INTRINSIC	id;
  COERCE	runtimeArg0;
  COERCE	runtimeArg1;
} INTRINSIC_RUNTIME_FORMALS;

#define	INTR_id(x)		intrinsic_runtime_formals[(x)].id
#define	INTR_coerce0(x)		intrinsic_runtime_formals[(x)].runtimeArg0
#define	INTR_coerce1(x)		intrinsic_runtime_formals[(x)].runtimeArg1

/*
**	TODO
**	eventually the FE will supply this information
**	from the intrinsic table, when we finish the implementation
*/
INTRINSIC_RUNTIME_FORMALS intrinsic_runtime_formals[]=
{
  INTRN_C4I4EXPEXPR,	COERCE_split_complex,	COERCE_none,
  INTRN_C4I8EXPEXPR,	COERCE_split_complex,	COERCE_none,
  INTRN_C8I4EXPEXPR,	COERCE_split_complex,	COERCE_none,
  INTRN_C8I8EXPEXPR,	COERCE_split_complex,	COERCE_none,
  INTRN_CQI4EXPEXPR,	COERCE_split_complex,	COERCE_none,
  INTRN_CQI8EXPEXPR,	COERCE_split_complex,	COERCE_none,
  INTRN_C16I4EXPEXPR,	COERCE_split_complex,	COERCE_none,
  INTRN_C16I8EXPEXPR,	COERCE_split_complex,	COERCE_none,
  INTRN_C4EXPEXPR,	COERCE_split_complex,	COERCE_split_complex,
  INTRN_C8EXPEXPR,	COERCE_split_complex,	COERCE_split_complex,
  INTRN_CQEXPEXPR,	COERCE_split_complex,	COERCE_split_complex,
  INTRN_C16EXPEXPR,	COERCE_split_complex,	COERCE_split_complex,
  INTRN_F4C4ABS,	COERCE_split_complex,	COERCE_none,
  INTRN_F8C8ABS,	COERCE_split_complex,	COERCE_none,
  INTRN_FQCQABS,	COERCE_split_complex,	COERCE_none,
  INTRN_F16C16ABS,	COERCE_split_complex,	COERCE_none,
  INTRN_C4EXP,		COERCE_split_complex,	COERCE_none,
  INTRN_C8EXP,		COERCE_split_complex,	COERCE_none,
  INTRN_CQEXP,		COERCE_split_complex,	COERCE_none,
  INTRN_C16EXP,		COERCE_split_complex,	COERCE_none,
  INTRN_C4LOG,		COERCE_split_complex,	COERCE_none,
  INTRN_C8LOG,		COERCE_split_complex,	COERCE_none,
  INTRN_CQLOG,		COERCE_split_complex,	COERCE_none,
  INTRN_C16LOG,		COERCE_split_complex,	COERCE_none,
  INTRN_C4COS,		COERCE_split_complex,	COERCE_none,
  INTRN_C8COS,		COERCE_split_complex,	COERCE_none,
  INTRN_CQCOS,		COERCE_split_complex,	COERCE_none,
  INTRN_C16COS,		COERCE_split_complex,	COERCE_none,
  INTRN_C4SIN,		COERCE_split_complex,	COERCE_none,
  INTRN_C8SIN,		COERCE_split_complex,	COERCE_none,
  INTRN_CQSIN,		COERCE_split_complex,	COERCE_none,
  INTRN_C16SIN,		COERCE_split_complex,	COERCE_none
};

INT32 intrinsic_runtime_formals_size = sizeof(intrinsic_runtime_formals) /
				       sizeof( INTRINSIC_RUNTIME_FORMALS);

typedef struct
{
  INTRINSIC	id;
  TYPE_ID	parameterType0;
  TYPE_ID	parameterType1;
  TYPE_ID	parameterType2;
} INTRINSIC_PARAMETER_TYPE;

#define	INTR_parm_id(x)		intrinsic_parameter_type[(x)].id
#define	INTR_parmtype0(x)	intrinsic_parameter_type[(x)].parameterType0
#define	INTR_parmtype1(x)	intrinsic_parameter_type[(x)].parameterType1
#define	INTR_parmtype2(x)	intrinsic_parameter_type[(x)].parameterType2

INTRINSIC_PARAMETER_TYPE intrinsic_parameter_type[]=
{
  INTRN_I1DIM,		MTYPE_I1,	MTYPE_I1,	MTYPE_V,
  INTRN_I2DIM,		MTYPE_I2,	MTYPE_I2,	MTYPE_V,
  INTRN_I4DIM,		MTYPE_I4,	MTYPE_I4,	MTYPE_V,
  INTRN_I8DIM,		MTYPE_I8,	MTYPE_I8,	MTYPE_V,
  INTRN_F4DIM,		MTYPE_F4,	MTYPE_F4,	MTYPE_V,
  INTRN_F8DIM,		MTYPE_F8,	MTYPE_F8,	MTYPE_V,
  INTRN_FQDIM,		MTYPE_FQ,	MTYPE_FQ,	MTYPE_V,
  INTRN_F16DIM,		MTYPE_F16,	MTYPE_F16,	MTYPE_V,

  INTRN_F4MOD,		MTYPE_F4,	MTYPE_F4,	MTYPE_V,
  INTRN_F8MOD,		MTYPE_F8,	MTYPE_F8,	MTYPE_V,
  INTRN_FQMOD,		MTYPE_FQ,	MTYPE_FQ,	MTYPE_V,
  INTRN_F16MOD,		MTYPE_F16,	MTYPE_F16,	MTYPE_V,

  INTRN_F8F4PROD,	MTYPE_F4,	MTYPE_F4,	MTYPE_V,
  INTRN_FQF8PROD,	MTYPE_F8,	MTYPE_F8,	MTYPE_V,
  INTRN_F16F8PROD,	MTYPE_F8,	MTYPE_F8,	MTYPE_V,

  INTRN_I1SIGN,		MTYPE_I1,	MTYPE_I1,	MTYPE_V,
  INTRN_I2SIGN,		MTYPE_I2,	MTYPE_I2,	MTYPE_V,
  INTRN_I4SIGN,		MTYPE_I4,	MTYPE_I4,	MTYPE_V,
  INTRN_I8SIGN,		MTYPE_I8,	MTYPE_I8,	MTYPE_V,
  INTRN_F4SIGN,		MTYPE_F4,	MTYPE_F4,	MTYPE_V,
  INTRN_F8SIGN,		MTYPE_F8,	MTYPE_F8,	MTYPE_V,
  INTRN_FQSIGN,		MTYPE_FQ,	MTYPE_FQ,	MTYPE_V,
  INTRN_F16SIGN,	MTYPE_F16,	MTYPE_F16,	MTYPE_V,

  INTRN_F4AINT,		MTYPE_F4,	MTYPE_V,	MTYPE_V,
  INTRN_F8AINT,		MTYPE_F8,	MTYPE_V,	MTYPE_V,
  INTRN_FQAINT,		MTYPE_FQ,	MTYPE_V,	MTYPE_V,
  INTRN_F16AINT,	MTYPE_F16,	MTYPE_V,	MTYPE_V,

  INTRN_I2F4NINT,	MTYPE_F4,	MTYPE_V,	MTYPE_V,
  INTRN_I4F4NINT,	MTYPE_F4,	MTYPE_V,	MTYPE_V,
  INTRN_I8F4NINT,	MTYPE_F4,	MTYPE_V,	MTYPE_V,
  INTRN_I2F8IDNINT,	MTYPE_F8,	MTYPE_V,	MTYPE_V,
  INTRN_I4F8IDNINT,	MTYPE_F8,	MTYPE_V,	MTYPE_V,
  INTRN_I8F8IDNINT,	MTYPE_F8,	MTYPE_V,	MTYPE_V,
  INTRN_I2FQIQNINT,	MTYPE_FQ,	MTYPE_V,	MTYPE_V,
  INTRN_I4FQIQNINT,	MTYPE_FQ,	MTYPE_V,	MTYPE_V,
  INTRN_I8FQIQNINT,	MTYPE_FQ,	MTYPE_V,	MTYPE_V,
  INTRN_I2F16IQNINT,	MTYPE_F16,	MTYPE_V,	MTYPE_V,
  INTRN_I4F16IQNINT,	MTYPE_F16,	MTYPE_V,	MTYPE_V,
  INTRN_I8F16IQNINT,	MTYPE_F16,	MTYPE_V,	MTYPE_V,

  INTRN_F4ANINT,	MTYPE_F4,	MTYPE_V,	MTYPE_V,
  INTRN_F8ANINT,	MTYPE_F8,	MTYPE_V,	MTYPE_V,
  INTRN_FQANINT,	MTYPE_FQ,	MTYPE_V,	MTYPE_V,
  INTRN_F16ANINT,	MTYPE_F16,	MTYPE_V,	MTYPE_V,

  INTRN_F4LOG10,	MTYPE_F4,	MTYPE_V,	MTYPE_V,
  INTRN_F8LOG10,	MTYPE_F8,	MTYPE_V,	MTYPE_V,
  INTRN_FQLOG10,	MTYPE_FQ,	MTYPE_V,	MTYPE_V,
  INTRN_F16LOG10,	MTYPE_F16,	MTYPE_V,	MTYPE_V,

  INTRN_I1BTEST,	MTYPE_I1,	MTYPE_I1,	MTYPE_V,
  INTRN_I2BTEST,	MTYPE_I2,	MTYPE_I2,	MTYPE_V,
  INTRN_I4BTEST,	MTYPE_I4,	MTYPE_I4,	MTYPE_V,
  INTRN_I8BTEST,	MTYPE_I8,	MTYPE_I8,	MTYPE_V,

  INTRN_I1BSET,		MTYPE_I1,	MTYPE_I1,	MTYPE_V,
  INTRN_I2BSET,		MTYPE_I2,	MTYPE_I2,	MTYPE_V,
  INTRN_I4BSET,		MTYPE_I4,	MTYPE_I4,	MTYPE_V,
  INTRN_I8BSET,		MTYPE_I8,	MTYPE_I8,	MTYPE_V,

  INTRN_I1BCLR,		MTYPE_I1,	MTYPE_I1,	MTYPE_V,
  INTRN_I2BCLR,		MTYPE_I2,	MTYPE_I2,	MTYPE_V,
  INTRN_I4BCLR,		MTYPE_I4,	MTYPE_I4,	MTYPE_V,
  INTRN_I8BCLR,		MTYPE_I8,	MTYPE_I8,	MTYPE_V,

  INTRN_I1BITS,		MTYPE_I1,	MTYPE_I1,	MTYPE_I1,
  INTRN_I2BITS,		MTYPE_I2,	MTYPE_I2,	MTYPE_I2,
  INTRN_I4BITS,		MTYPE_I4,	MTYPE_I4,	MTYPE_I4,
  INTRN_I8BITS,		MTYPE_I8,	MTYPE_I8,	MTYPE_I8,

  INTRN_I1SHL,		MTYPE_I1,	MTYPE_I1,	MTYPE_V,
  INTRN_I2SHL,		MTYPE_I2,	MTYPE_I2,	MTYPE_V,

  INTRN_I1SHR,		MTYPE_I1,	MTYPE_I1,	MTYPE_V,
  INTRN_I2SHR,		MTYPE_I2,	MTYPE_I2,	MTYPE_V,

  INTRN_I1SHFT,		MTYPE_I1,	MTYPE_I1,	MTYPE_V,
  INTRN_I2SHFT,		MTYPE_I2,	MTYPE_I2,	MTYPE_V,
  INTRN_I4SHFT,		MTYPE_I4,	MTYPE_I4,	MTYPE_V,
  INTRN_I8SHFT,		MTYPE_I8,	MTYPE_I8,	MTYPE_V,

  INTRN_I1SHFTC,	MTYPE_I1,	MTYPE_I1,	MTYPE_I1,
  INTRN_I2SHFTC,	MTYPE_I2,	MTYPE_I2,	MTYPE_I2,
  INTRN_I4SHFTC,	MTYPE_I4,	MTYPE_I4,	MTYPE_I4,
  INTRN_I8SHFTC,	MTYPE_I8,	MTYPE_I8,	MTYPE_I8,

  INTRN_I1MVBITS,	MTYPE_I1,	MTYPE_I1,	MTYPE_I1,
  INTRN_I2MVBITS,	MTYPE_I2,	MTYPE_I2,	MTYPE_I2,
  INTRN_I4MVBITS,	MTYPE_I4,	MTYPE_I4,	MTYPE_I4,
  INTRN_I8MVBITS,	MTYPE_I8,	MTYPE_I8,	MTYPE_I8,

};
INT32 intrinsic_parameter_type_size = sizeof(intrinsic_parameter_type) /
			              sizeof( INTRINSIC_PARAMETER_TYPE);


#define	WN_has_ty(x)		(OPCODE_has_1ty(WN_opcode(x)) || OPCODE_has_2ty(WN_opcode(x)))

#define	WN_is_pointer(x)	(WN_has_ty(x) && (TY_kind(WN_ty(x)) == KIND_POINTER))

#define Is_Integer_Constant(x)	(WN_operator(x) == OPR_INTCONST)

#define Is_Constant(x)		(WN_operator(x) == OPR_CONST)

#define OPCODE_is_intrinsic(op)                                         \
                ((OPCODE_operator((op)) == OPR_INTRINSIC_CALL) ||       \
                (OPCODE_operator((op)) == OPR_INTRINSIC_OP))

#define	ABS(x)		(((x)<0) ? -(x) : (x))

/* ====================================================================
*			 Exported Functions
* ====================================================================
*/
extern const char * INTR_intrinsic_name(WN *tree);

extern WN * make_pointer_to_node(WN *block, WN *tree);

/* ====================================================================
*			 Imported Functions
* ====================================================================
*/
extern PREG_NUM AssignExpr(WN *block, WN *tree, TYPE_ID type);

extern TY_IDX compute_alignment_type(WN *tree, TY_IDX, INT64 offset);

extern INT32 compute_copy_alignment(TY_IDX, TY_IDX, INT32 offset);

extern BOOL lower_is_aliased(WN *wn1, WN *wn2, INT64 size);

extern TYPE_ID compute_copy_quantum(INT32 );

extern WN *WN_I1const(TYPE_ID type, INT64 con);
extern void WN_annotate_call_flags(WN *call, ST *sym);

extern BOOL CG_bcopy_cannot_overlap;
extern BOOL CG_memcpy_cannot_overlap;
extern BOOL CG_memmove_cannot_overlap;

extern INT32 CG_memmove_inst_count;
#ifdef KEY
extern INT32 CG_memmove_align_inst_count;
#endif


/* ====================================================================
*			 Forward Declarations
* ====================================================================
*/
static EMULATION WN_emulation(WN *tree);

static WN *em_exp_int(WN *block, WN *x, WN *pow, TYPE_ID type);
static WN *em_exp_float(WN *block, WN *x, WN *pow, TYPE_ID type);
static WN *em_mod_float(WN *block, WN *x, WN *y);

static WN *em_complex_exp(WN *block, WN *x);
static WN *em_complex_cos(WN *block, WN *x);

static COERCE INTR_coerce_runtime(WN *tree, INT32 arg);
static TYPE_ID INTR_parameter_type(WN *tree, INT32 arg);
static TY_IDX aux_compute_alignment(WN *tree);



/* ====================================================================
 *                       private variables
 * ====================================================================
 */
static INT32 em_exp_int_max = 256;

#define MAX_INTRINSIC_ARGS      20





/* ====================================================================
 *
 * TYPE_ID  INTR_return_mtype(id)
 *
 *
 *
 * ==================================================================== */
TYPE_ID INTR_return_mtype(INTRINSIC id)
{
  INTRN_RETKIND rtype = INTRN_return_kind(id);

  switch(rtype)
  {
  case IRETURN_I1:	return MTYPE_I1;
  case IRETURN_I2:	return MTYPE_I2;
  case IRETURN_I4:	return MTYPE_I4;
  case IRETURN_I8:	return MTYPE_I8;
  case IRETURN_U1:	return MTYPE_U1;
  case IRETURN_U2:	return MTYPE_U2;
  case IRETURN_U4:	return MTYPE_U4;
  case IRETURN_U8:	return MTYPE_U8;
  case IRETURN_F4:	return MTYPE_F4;
  case IRETURN_F8:	return MTYPE_F8;
  case IRETURN_FQ:	return MTYPE_FQ;
  case IRETURN_F16:	return MTYPE_F16;
  case IRETURN_C4:	return MTYPE_C4;
  case IRETURN_C8:	return MTYPE_C8;
  case IRETURN_CQ:	return MTYPE_CQ;
  case IRETURN_C16:	return MTYPE_C16;
  case IRETURN_V:	return MTYPE_V;
  case IRETURN_PV:
  case IRETURN_PU1:
  case IRETURN_DA1:
  case IRETURN_SZT:
  case IRETURN_PC     :
  case IRETURN_UNKNOWN:
    return MTYPE_UNKNOWN;
  }
  return MTYPE_UNKNOWN;
}




/* ====================================================================
 *
 * EMULATION WN_emulation(WN *tree)
 *
 * Provide the correct emulation enum for a given WN
 *
 * TODO: cache most frequently used id's
 *
 * ==================================================================== */

static EMULATION WN_emulation(WN *tree)
{
  OPCODE	op = WN_opcode(tree);
  TYPE_ID	type = OPCODE_rtype(op);

  switch (WN_operator(tree)) {
  case OPR_SQRT:
    switch(type) {
    case MTYPE_C4:	return EM_C4_SQRT;
    case MTYPE_C8:	return EM_C8_SQRT;
    case MTYPE_CQ:	return EM_CQ_SQRT;
    case MTYPE_C16:	return EM_CQ_SQRT;
    case MTYPE_FQ:	return EM_Q_SQRT;
    case MTYPE_F16:	return EM_Q_SQRT;
    }
    break;

  case OPR_RSQRT:
    switch(type) {
    case MTYPE_C4:	return EM_C4_RSQRT;
    case MTYPE_C8:	return EM_C8_RSQRT;
    case MTYPE_CQ:	return EM_CQ_RSQRT;
    case MTYPE_C16:	return EM_CQ_RSQRT;
    }
    break;

  case OPR_CVT:
    {
      TYPE_ID	desc = WN_desc(tree);
      if (desc == MTYPE_FQ || desc == MTYPE_F16)
      {
	switch(type) {
	case MTYPE_I4:	return EM_JI_QINT;
	case MTYPE_I8:	return EM_KI_QINT;
	case MTYPE_F4:	return EM_SNGL_Q;
	case MTYPE_F8:	return EM_DBLE_Q;
	}
	break;
      }
      else if (type == MTYPE_FQ || type == MTYPE_F16)
      {
	switch(desc) {
	case MTYPE_U4: return EM_Q_FLOTJU;
	case MTYPE_I4: return EM_Q_FLOTJ;
	case MTYPE_U8: return EM_Q_FLOTKU;
	case MTYPE_I8: return EM_Q_FLOTK;
	case MTYPE_F8: return EM_Q_EXTD;
	case MTYPE_F4: return  EM_Q_EXT;
	}
      }
    }
    break;

  case OPR_RND:
    {
      TYPE_ID	desc = WN_desc(tree);
      if (desc == MTYPE_FQ || desc == MTYPE_F16)
      {
	switch(type)
	{
	case MTYPE_I4:	return EM_JIQNNT;
	case MTYPE_I8:	return EM_KIQNNT;
	}
	break;
      }
    }
    break;

  default:
    if (type == MTYPE_FQ || type == MTYPE_F16)
    {
      switch(WN_operator(tree)) {
      case OPR_ISTORE:
      case OPR_ISTOREX:
      case OPR_STID:
      case OPR_ILOAD:
      case OPR_ILOADX:
      case OPR_SELECT:
      case OPR_LDID:
      case OPR_CONST:
      case OPR_NEG:
	break;

      case OPR_ABS:	return EM_Q_ABS;
      case OPR_ADD:	return EM_Q_ADD;
      case OPR_SUB:	return EM_Q_SUB;
      case OPR_MPY:	return EM_Q_MPY;
      case OPR_DIV:	return EM_Q_DIV;
      case OPR_MAX:	return EM_Q_MAX1;
      case OPR_MIN:	return EM_Q_MIN1;

      case OPR_RECIP:
      case OPR_RSQRT:
      case OPR_MADD:
      case OPR_MSUB:
      case OPR_NMADD:
      case OPR_NMSUB:
      case OPR_RND:
      case OPR_TRUNC:
      case OPR_CVT:
      case OPR_SQRT:
	Is_True(FALSE, ("WN_emulation() %s should be already processed", OPCODE_name(WN_opcode(tree))));
	break;

      case OPR_CEIL:
      case OPR_FLOOR:
      case OPR_MOD:
      case OPR_REM:
      case OPR_CVTL:
      case OPR_CALL:
      case OPR_INTRINSIC_CALL:
	Is_True(FALSE, ("WN_emulation() %s invalid context for op", OPCODE_name(WN_opcode(tree))));
      }
    }
    else if (WN_desc(tree)== MTYPE_FQ || WN_desc(tree)== MTYPE_F16)
    {
      switch(WN_operator(tree)) {
      case OPR_EQ:	return EM_Q_EQ;
      case OPR_NE:	return EM_Q_NE;
      case OPR_GT:	return EM_Q_GT;
      case OPR_GE:	return EM_Q_GE;
      case OPR_LT:	return EM_Q_LT;
      case OPR_LE:	return EM_Q_LE;

      case OPR_TRUNC:
	switch(type)
	{
	case MTYPE_I4:	return EM_JI_QINT;
	case MTYPE_I8:	return EM_KI_QINT;
	}
	break;
      case OPR_CEIL:
	switch(type)
	{
	case MTYPE_I4:	return EM_JI_QCEIL;
	case MTYPE_I8:	return EM_KI_QCEIL;
	}
	break;
      case OPR_FLOOR:
	switch(type)
	{
	case MTYPE_I4:	return EM_JI_QFLOOR;
	case MTYPE_I8:	return EM_KI_QFLOOR;
	}
	break;
      }
    }
    break;
  }
  FmtAssert(FALSE, ("WN_emulation() %s not recognized", OPCODE_name(WN_opcode(tree))));
  return EM_LAST;
}


/* ====================================================================
 *
 *  WN *checkForZero(WN *block, TYPE_ID type, PREG_NUM xN, WN *if_else, WN *value)
 *
 *  Create test block for zero  
 *	if (x==0)
 *	{ 	ret = 0;	}
 *      else
 *	{ 	ret = value	}
 *	return ret;	
 *
 * ==================================================================== */
static WN *checkForZero(WN *block, TYPE_ID type, PREG_NUM xN, WN *if_else, WN *value)
{
  TYPE_ID	rtype = WN_rtype(value);
  WN		*if_then;
  PREG_NUM	retN;

  if_then = WN_CreateBlock();

  retN = AssignExpr(if_then, WN_Zerocon(rtype), rtype);

  {
    WN	*st;

    st = WN_StidIntoPreg(rtype, retN, MTYPE_To_PREG(rtype), value);
    WN_INSERT_BlockLast(if_else, st);
  }

  {
    WN	*cond, *IF;

    Is_True(MTYPE_is_float(type), ("unexpected type"));

    cond =  WN_EQ(type, 
		  WN_LdidPreg(type, xN),
		  WN_Zerocon(type));

    IF = WN_CreateIf(cond, if_then, if_else);
    WN_INSERT_BlockLast(block, IF);
  }
  return WN_LdidPreg(rtype, retN);
}



/* ====================================================================
 *
 * WN * WN_arg(WN *tree, INT32 arg)
 *
 * return Nth kid , skiping PARM
 * ==================================================================== */

static WN *WN_arg(WN *tree, INT32 arg)
{
  WN	*child= WN_kid(tree, arg);

  if (WN_operator_is(child, OPR_PARM))
  {
    return WN_kid0(child);
  }

  return child;
}




static WN *em_clen(WN *block, WN *len)
{
  return len;
}



/*
**
**  Auxillary routine to implement ( x + .5 * sign(x) )
*/
static WN *aux_nearest(TYPE_ID rtype, PREG_NUM xN)
{
  WN	*rel, *select;

  rel =   WN_GE(rtype, WN_LdidPreg(rtype, xN), WN_Zerocon(rtype));

  select = WN_Select(rtype,
		     rel,
		     WN_Floatconst(rtype, .5),
		     WN_Floatconst(rtype, -.5));

  return WN_Add(rtype, WN_LdidPreg(rtype, xN), select);
}

/*
**  Auxillary routine for Convert ( {Round,Trunc}(rtype) )
*/
static WN *aux_CvtRnd(TYPE_ID rtype, WN *x)
{
  WN		*rnd;
  TYPE_ID	intToFloat = (Slow_CVTDL) ? MTYPE_I4 : MTYPE_I8;
  
  // Needed for correctness, no matter how slow the truncate
  if (WN_rtype(x) != MTYPE_F4) {
     intToFloat = MTYPE_I8;
  }
  rnd = WN_Rnd(rtype, intToFloat, x);

  return WN_Cvt(intToFloat, rtype, rnd);
}

static WN *aux_CvtTrunc(TYPE_ID rtype, WN *x)
{
  WN		*trunc;
  TYPE_ID	intToFloat = (Slow_CVTDL) ? MTYPE_I4 : MTYPE_I8;

 /*
  *  this is em_aint()
  */
  // Needed for correctness, no matter how slow the truncate
  if (WN_rtype(x) != MTYPE_F4) {
     intToFloat = MTYPE_I8;
  }
  trunc = WN_Trunc(rtype, intToFloat, x);

  return WN_Cvt(intToFloat, rtype, trunc);
}

/*
**  Optimizer cannot deal with zero length mstore so return BLOCK
*/
static WN *aux_CreateMstore(WN_OFFSET offset, TY_IDX type, WN *value, WN *addr,
			    WN *size)
{
  if (Is_Integer_Constant(size) && WN_const_val(size) <= 0)
  {
    /* Cannot delete these nodes, since they are used later (bug 623566)
    WN_Delete(value);
    WN_Delete(addr);
    WN_Delete(size);
    */
    return WN_CreateBlock();
  }

  UINT64 ty_size = TY_size(TY_pointed(type));
  if (ty_size != 0 && WN_const_val (size) % ty_size != 0) {
      // size copied is not a multiple of the size of the type, which means 
      // that we are copying part of the type.  We then change the pointer
      // to (void*)
      static TY_IDX void_star = TY_IDX_ZERO;
      if (void_star == TY_IDX_ZERO)
	  void_star = Make_Pointer_Type (MTYPE_To_TY (MTYPE_V));
      Set_TY_IDX_index (type, TY_IDX_index (void_star));
  }
  return WN_CreateMstore(offset, type, value, addr, size);
}

/*
**
**  Notes for the following functions:
**
**  [1] Fast_trunc_Allowed	(currently when Roundoff_Level >= ROUNDOFF_SIMPLE)
**	generate trunc. This will fail when   (-2**63 <= |x| < 2**63-1)
**
**  [2]	Test x against TWO_EXP
**	Floating point value is such that (x+1 == x), ie. there is no
**	possible fractional value ie.
**		2**23 <=  |x| 		return x
**	
**  It is possible (if necessary) to special case MTYPE_F4 and generate
**  a trunc to MTYPE_I4.	
**/
#define TWO_EXP_23	8388608.0
#define TWO_EXP_52 	4503599627370496.0

/*
**
**	INTRN_I2F4NINT:
**	INTRN_I4F4NINT:
**	INTRN_I8F4NINT:
**	INTRN_I2F8IDNINT:
**	INTRN_I4F8IDNINT:
**	INTRN_I8F8IDNINT:
**	INTRN_I2FQIQNINT:
**	INTRN_I4FQIQNINT:
**	INTRN_I8FQIQNINT:
**	INTRN_I2F16IQNINT:
**	INTRN_I4F16IQNINT:
**	INTRN_I8F16IQNINT:
**
**	change into
**		rnd(x)				roundoff >= 3
**		trunc( x + .5 * sign(x) )
*/
static WN *em_nearest_int(WN *block, TYPE_ID rtype, WN *x)
{
  TYPE_ID	type = WN_rtype(x);

  if (Fast_NINT_Allowed)
  {
    return WN_Rnd(type, rtype, x);
  }
  else if ((type == MTYPE_F4) || (type == MTYPE_F8))
  {
    WN		*add;
    PREG_NUM	xN;

    xN = AssignExpr(block, x, type);

    add = aux_nearest(type, xN);

    if (Fast_trunc_Allowed)
    {
      return  WN_Trunc(type, rtype, add);
    }
    else
    {
      WN	*rel, *select;
      double	con= (type==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;

      rel = WN_GE(type,
		  WN_Abs(type, WN_LdidPreg(type, xN)),
		  WN_Floatconst(type, con));

      select = WN_Select(type, rel, WN_LdidPreg(type, xN), add);

      return WN_Trunc(type, rtype, select);
    }
  }
  else
  {
    return NULL;
  }
}

/*
**
**	INTRN_F4ANINT:
**	INTRN_F8ANINT:
**	INTRN_FQANINT:
**	INTRN_F16ANINT:
**
**	change into
**		cvt (float, trunc( x + .5 * sign(x) ))	roundoff>= 3
*/
static WN *em_nearest_aint(WN *block, TYPE_ID rtype, WN *x)
{
  if (Fast_NINT_Allowed)
  {
    return aux_CvtRnd(rtype, x);
  }
  else if ((rtype == MTYPE_F4) || (rtype == MTYPE_F8))
  {
    PREG_NUM	xN;
    WN		*add, *cvt;

    xN = AssignExpr(block, x, rtype);

    add = aux_nearest(rtype, xN);

   /*
    *  this is em_aint()
    */
    cvt = aux_CvtTrunc(rtype, add);

    if (Fast_trunc_Allowed)
    {
      return cvt;
    }
    else
    {
      WN        *rel;
      double    con= (rtype==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;

      rel = WN_GE(rtype,
                  WN_Abs(rtype, WN_LdidPreg(rtype, xN)),
                  WN_Floatconst(rtype, con));

      return WN_Select(rtype, rel, WN_LdidPreg(rtype, xN), cvt);
    }
  }
  return NULL;
}

/*
**
**	INTRN_F4AINT
**	INTRN_F8AINT
**	INTRN_FQAINT
**	INTRN_F16AINT
**
**	change into
**		cvt (float, trunc(x))
*/
static WN *em_aint(WN *block, TYPE_ID rtype, WN *x)
{
  if (Fast_trunc_Allowed)
  {
    return aux_CvtTrunc(rtype, x);
  }
  else if ((rtype == MTYPE_F4) || (rtype == MTYPE_F8))
  {
    PREG_NUM	xN;
    WN		*rel, *cvt;
    double	con= (rtype==MTYPE_F4) ? TWO_EXP_23 : TWO_EXP_52;

    xN = AssignExpr(block, x, rtype);

    rel = WN_GE(rtype,
		WN_Abs(rtype, WN_LdidPreg(rtype, xN)),
		WN_Floatconst(rtype, con));

    cvt = aux_CvtTrunc(rtype, WN_LdidPreg(rtype, xN));

    return WN_Select(rtype, rel, WN_LdidPreg(rtype, xN), cvt);
  }
  else
  {
    return NULL;
  }
}

/*
**
**	change into
**		| x |  if y >= 0
**	      - | x |  if y < 0
**
**	-->	absN = | x |;
**	-->	(y>=0) ? absN : -absN;		
*/
static WN *em_sign(WN *block, WN *x, WN *y)
{
  PREG_NUM	absN;
  TYPE_ID	type = WN_rtype(x);
  WN		*abs, *select;

#ifdef KEY // bug 9660
  if (MTYPE_is_integral(type) && ! MTYPE_signed(type))
    type = Mtype_TransferSign(MTYPE_I4, type);
#endif
#ifdef KEY // bug 12052
  if (MTYPE_is_integral(type) && 
      MTYPE_byte_size(type) < MTYPE_byte_size(WN_rtype(y)))
    type = Mtype_TransferSize(WN_rtype(y), type);
#endif
  abs = WN_Abs(type, x);
  absN = AssignExpr(block, abs, type);


  select = WN_Select(type,
		     WN_GE(type, y, WN_Zerocon(type)),
		     WN_LdidPreg(type, absN),
		     WN_Neg(type, WN_LdidPreg(type, absN)));
  return select;
}

/*
**
**	change into
**		cvt (x) * cvt(y)
*/
static WN *em_prod(WN *block, TYPE_ID rtype, WN *x, WN *y)
{
  TYPE_ID  type = WN_rtype(x);
  WN	   *mpy;

  mpy = WN_Mpy(rtype,
	       WN_Cvt(type, rtype, x),
	       WN_Cvt(type, rtype, y));
  return   mpy;
}

/*
**
**	change into
**		(x>y) ? (x-y) : 0
*/
static WN *em_dim(WN *block, WN *x, WN *y)
{
  PREG_NUM	xN, yN;
  TYPE_ID	type = WN_rtype(x);
  WN		*rel, *sub, *select;

  xN = AssignExpr(block, x, type);
  yN = AssignExpr(block, y, type);

  rel =   WN_GT(type, 
		WN_LdidPreg(type, xN),
		WN_LdidPreg(type, yN));

  sub =  WN_Sub(type, 
		WN_LdidPreg(type, xN),
		WN_LdidPreg(type, yN));

  select = WN_Select(type,
		     rel,
		     sub,
		     WN_Zerocon(type));
  return select;
}

/*
**
**	change into
**	    x - y * ( FLOAT ( |(x / y)| ))
*/
static WN *em_mod_float(WN *block, WN *x, WN *y)
{
  PREG_NUM	xN, yN;
  TYPE_ID	type = WN_rtype(x);
  WN		*div, *cvt, *mpy, *sub;

  if ((type == MTYPE_F4) || (type == MTYPE_F8)) {
    xN = AssignExpr(block, x, type);
    yN = AssignExpr(block, y, type);
    div =  WN_Div(type, 
		  WN_LdidPreg(type, xN),
		  WN_LdidPreg(type, yN));
    cvt = em_aint(block, type, div);
    mpy =  WN_Mpy(type, 
		  WN_LdidPreg(type, yN),
		  cvt);
    sub =  WN_Sub(type,
		  WN_LdidPreg(type, xN),
		  mpy);
    return sub;
  } else {
    return NULL;
  }
}

/*
**  WN *build_mult_tree(block, TYPE_ID type, PREG_NUM xN, int pow)
**
**	Build a multiply tree to make shipiro happy.
**	
**	Actually, create a series of temporaries to hold the powers that be.
**
**	ex.	x ** 9	(= 1001)
**		t0=	x;
**		t1=	t0*t0;		(x**2)
**		t2=	t1*t1;		(x**4)
**		t3=	t2*t2;		(x**8)
**		ans =  t3 * t0;
**
*/
#define	BIT_IS_ON(x,i)		((x) & (1<<(i)))

static WN *build_mult_tree(WN *block, TYPE_ID type, PREG_NUM xN, INT32 pow)
{
  PREG_NUM	powers[16];	/* could handle pow = 64k */
  INT32		i, n = 0;
  PREG_NUM	xNm1;
  WN		*tree = NULL;

  Is_True((pow>0), ("expected pow>0"));

  powers[n++] = xN;
  xNm1 = xN;

  for(i= 1; ((1<<i) <= pow); i++)
  {
    WN		*mpy;

    mpy = WN_Mpy(type, WN_LdidPreg(type, xNm1), WN_LdidPreg(type, xNm1));

    xNm1 = AssignExpr(block, mpy, type);

    powers[n++] = xNm1;
  }

  for(i= 0; ((1<<i) <= pow); i++)
  {
    if (BIT_IS_ON(pow, i))
    {
      PREG_NUM  powerN = powers[i];

      if (tree)
      {
	tree = WN_Mpy(type, tree, WN_LdidPreg(type, powerN));
      }
      else
      {
	tree = WN_LdidPreg(type, powerN);
      }
    }
  }
  return tree;
}

static WN *em_exp_float(WN *block, WN *x, WN *pow, TYPE_ID type)
{
  if (Is_Constant(pow))
  {
    TCON	con = Const_Val(pow);
    BOOL	sqrt, rsqrt;
#ifdef KEY
    BOOL       sqrt_25, rsqrt_25, sqrt_75, rsqrt_75;
    BOOL	cbrt_33, cbrt_66;
#endif
    WN		*tree, *x_copy;
    double	n;

   /*
    *  for complex x verify the power is a real number
    *  (TODO) general complex ** complex
    */
    if (MTYPE_is_complex(type))
    {
      TCON  Ipow;

      Ipow = Extract_Complex_Imag(con);

      if (Targ_To_Host_Float(Ipow) == 0.0)
      {
	con = Extract_Complex_Real(con);
      }
      else
      {
	return NULL;
      }
    }
    n = Targ_To_Host_Float(con);
    sqrt = rsqrt = FALSE;
    cbrt_33 = cbrt_66 = FALSE;
#ifdef KEY
    sqrt_25 = rsqrt_25 = sqrt_75 = rsqrt_75 = FALSE;
#endif

    if (trunc(n) == n)
    {
      ;
    }
    else if ((trunc(ABS(n))+.5) == ABS(n))
    {
      /*
       *  if we need to multiply by sqrt we need a copy of x
       *  as it might get changed underneath us.
       */
      if (n<0)
	rsqrt = TRUE;
      else
	sqrt = TRUE;
      x_copy = WN_COPY_Tree(x);
    }
#ifdef KEY
    else if ((trunc(ABS(n))+.25) == ABS(n))
    {
      /*
       *  if we need to multiply by sqrt we need a copy of x
       *  as it might get changed underneath us.
       */
      if (n<0)
	rsqrt_25 = TRUE;
      else
	sqrt_25 = TRUE;
      x_copy = WN_COPY_Tree(x);
    }    
    else if ((trunc(ABS(n))+.75) == ABS(n))
    {
      /*
       *  if we need to multiply by sqrt we need a copy of x
       *  as it might get changed underneath us.
       */
      if (n<0)
	rsqrt_75 = TRUE;
      else
	sqrt_75 = TRUE;
      x_copy = WN_COPY_Tree(x);
    }
#ifdef TARG_X8664
    else if (ABS((trunc(n)+1.0/3) - n) < .0000001 && 
             ! (Is_Target_64bit() && !Is_Target_Anyx86() && OPT_Fast_Math))
    { // the pow in fast_math is faster than cbrt, so no point converting
      cbrt_33 = TRUE;
      x_copy = WN_COPY_Tree(x);
    }
    else if (ABS((trunc(n)+2.0/3) - n) < .0000001 &&
             ! (Is_Target_64bit() && !Is_Target_Anyx86() && OPT_Fast_Math))
    { // the pow in fast_math is faster than cbrt, so no point converting
      cbrt_66 = TRUE;
      x_copy = WN_COPY_Tree(x);
    }
#endif
#endif
    else
    {
      return NULL;
    }

    {
      WN  *ipow = WN_Intconst(MTYPE_I4, (INT64) trunc(n));

      tree = em_exp_int(block, x, ipow, type);
    }

    if (sqrt || rsqrt)
    {
#ifdef KEY
      // bug 4824: non-constant float x could be negative
      // bug 4990: Do the check only for C/C++ and if
      // -fmath-errno (-LANG:math_errno=on)
      if (!PU_f77_lang (Get_Current_PU()) &&
          !PU_f90_lang (Get_Current_PU()) && // ! Fortran
	  LANG_Math_Errno && // -fmath-errno
          MTYPE_is_float (WN_rtype (x_copy)) &&
          (!Is_Constant (x_copy) ||
	   Targ_To_Host_Float (Const_Val (x_copy)) < 0))
        return NULL;
#endif // KEY
#ifdef TARG_X8664
      // Bug 5935 - rsqrtsd or rsqrtpd is absent.
      if (rsqrt && (type == MTYPE_F8 || type == MTYPE_V16F8))
	return NULL;
#endif 
      if (tree)
      {
	/*
	 *  x ** n+.5 	->	(x**n) * (x**.5)
	 *  where the function em_exp_int has already evaluated	
	 */
	PREG_NUM	xN, treeN;
	WN		*fractional;

	xN = AssignExpr(block, x_copy, type);
	treeN = AssignExpr(block, tree, type);

	fractional = (sqrt) ?	WN_Sqrt(type, WN_LdidPreg(type, xN)) :
				WN_Rsqrt(type, WN_LdidPreg(type, xN));

	tree =  WN_Mpy(type,
		       WN_LdidPreg(type, treeN),
		       fractional);
      }
    }
#ifdef KEY // bug 6932 
    // evaluate (x**0.25) as sqrt(sqrt(x))
    if (sqrt_25 || rsqrt_25) 
    {
      if (!PU_f77_lang (Get_Current_PU()) &&
          !PU_f90_lang (Get_Current_PU()) && // ! Fortran
	  LANG_Math_Errno && // -fmath-errno
          MTYPE_is_float (WN_rtype (x_copy)) &&
          (!Is_Constant (x_copy) ||
	   Targ_To_Host_Float (Const_Val (x_copy)) < 0))
        return NULL;
#ifdef TARG_X8664
      // rsqrtsd or rsqrtpd is absent.
      if (rsqrt_25 && (type == MTYPE_F8 || type == MTYPE_V16F8))
	return NULL;
#endif 
      if (tree)
      {
	/*
	 *  x ** n+.25 	->	(x**n) * (x**.25)
	 *  where the function em_exp_int has already evaluated	
	 */
	PREG_NUM	xN, treeN;
	WN		*fractional;

	xN = AssignExpr(block, x_copy, type);
	treeN = AssignExpr(block, tree, type);

	if (sqrt_25) 
	  fractional = WN_Sqrt(type, WN_Sqrt(type, WN_LdidPreg(type, xN)));
	else
	  fractional = WN_Sqrt(type, WN_Rsqrt(type, WN_LdidPreg(type, xN)));

	tree =  WN_Mpy(type,
		       WN_LdidPreg(type, treeN),
		       fractional);
      }      
    }
    // evaluate (x**0.75) as sqrt(x)*sqrt(sqrt(x))
    if (sqrt_75 || rsqrt_75) 
    {
      if (!PU_f77_lang (Get_Current_PU()) &&
          !PU_f90_lang (Get_Current_PU()) && // ! Fortran
	  LANG_Math_Errno && // -fmath-errno
          MTYPE_is_float (WN_rtype (x_copy)) &&
          (!Is_Constant (x_copy) ||
	   Targ_To_Host_Float (Const_Val (x_copy)) < 0))
        return NULL;
#ifdef TARG_X8664
      // rsqrtsd or rsqrtpd is absent.
      if (rsqrt_75 && (type == MTYPE_F8 || type == MTYPE_V16F8))
	return NULL;
#endif 
      if (tree)
      {
	/*
	 *  x ** n+.75 	->	(x**n) * (x**.75)
	 *  where the function em_exp_int has already evaluated	
	 */
	PREG_NUM	xN, treeN;
	WN		*fractional;

	xN = AssignExpr(block, x_copy, type);
	treeN = AssignExpr(block, tree, type);

	if (sqrt_75) 
	  fractional = WN_Mpy(type, 
			      WN_Sqrt(type, WN_LdidPreg(type, xN)), 
			      WN_Sqrt(type, 
				      WN_Sqrt(type, WN_LdidPreg(type, xN))));
	else
	  fractional = WN_Mpy(type, 
			      WN_Rsqrt(type, WN_LdidPreg(type, xN)), 
			      WN_Rsqrt(type, 
				       WN_Sqrt(type, WN_LdidPreg(type, xN))));
	
	tree =  WN_Mpy(type,
		       WN_LdidPreg(type, treeN),
		       fractional);
      }      
    }
    // evaluate (x**0.333333) by calling cbrt()/cbrtf()
    if (cbrt_33 || cbrt_66)
    {
      if (type != MTYPE_F4 && type != MTYPE_F8)
	return NULL;
      if (tree)
      {
	/*
	 *  x ** n+1/3 	->	(x**n) * (x**1/3)
	 *  where the function em_exp_int has already evaluated	
	 */
	 PREG_NUM xN = AssignExpr(block, x_copy, type);
	 WN *kid = WN_CreateParm(type, WN_LdidPreg(type, xN), Be_Type_Tbl(type),
	 			 WN_PARM_BY_VALUE | WN_PARM_READ_ONLY);
	 WN* fraction = WN_Create_Intrinsic(
	 			OPCODE_make_op(OPR_INTRINSIC_OP, type, MTYPE_V),
		      		type == MTYPE_F4 ? INTRN_F4CBRT : INTRN_F8CBRT,
				1, &kid);
	 if (cbrt_66) {
	   PREG_NUM x13 = AssignExpr(block, fraction, type);
	   fraction = WN_Mpy(type, WN_LdidPreg(type, x13), 
	   			   WN_LdidPreg(type, x13));
	 }
	 tree = WN_Mpy(type, tree, fraction);
      }
    }
#endif
    return tree;
  }
 
  return NULL;
}

static WN *em_exp_int(WN *block, WN *x, WN *pow, TYPE_ID type)
{
  if (Is_Integer_Constant(pow))
  {
    INT32	n = WN_const_val(pow);
    INT32	absN = ABS(n);
    WN		*exp=  NULL;

    if (em_exp_int_max < absN)
      return NULL;

    switch(n) {
    case 1:
      exp = x;
      break;
    case -1:
      exp = WN_Inverse(type, x);
      break;
    case 0:
      if (MTYPE_type_class(type) & MTYPE_CLASS_INTEGER)
	exp = WN_Intconst(type, 1);
      else
	exp = WN_Floatconst(type, 1.0);
      break;
    case 2:
      {
	PREG_NUM	xN;

	xN = AssignExpr(block, x, type);
      
        exp = WN_Mpy(type,
		     WN_LdidPreg(type, xN),
		     WN_LdidPreg(type, xN));
	break;
      }
    default:
      {
	PREG_NUM	xN;

	if (Fast_Exp_Allowed)
	{
	  xN = AssignExpr(block, x, type);
    
	  exp = build_mult_tree(block, type, xN, absN);

	  WN_Delete(pow);
	  if (n < 0)
	    exp = WN_Inverse(type, exp);
	}
      }
    }
    return exp;
  }
  else if (Is_Integer_Constant(x))
  {
   /*
    *	Optimize {-2,-1,0,1,2} ** n
    */
    INT32	val = WN_const_val(x);

    switch(val)
    {
    case -2:
      {
       /*
	*  (n>=0) ? ( (n&1) ? - (1<<n) : 1<<n ) : 0
	*/
	PREG_NUM	powN, shlN;
	WN	*shl, *band, *cond, *select, *ge;

	powN = AssignExpr(block, pow, type);

	shl = WN_Shl(type,
		     WN_Intconst(type, 1),
		     WN_LdidPreg(type, powN));

	shlN = AssignExpr(block, shl, type);

	band = WN_Band(type,
		       WN_LdidPreg(type, powN),
		       WN_Intconst(type, 1));

	cond =  WN_EQ(type, band, WN_Zerocon(type));

	select = WN_Select(type,
			   cond,
			   WN_LdidPreg(type, shlN),
			   WN_Neg(type, WN_LdidPreg(type, shlN)));

	ge =   WN_GE(type,
		     WN_LdidPreg(type, powN),
		     WN_Zerocon(type));

	return  WN_Select(type,
			  ge,
			  select,
			  WN_Zerocon(type));
      }
    case -1:
      {
       /*
	*  (n&1) ? -1 : 1;
	*/
	WN	*band;

	band = WN_Band(type, pow, WN_Intconst(type, 1));

	return WN_Select(type,
			 WN_EQ(type, band, WN_Zerocon(type)),
			 WN_Intconst(type, 1),
			 WN_Intconst(type, -1));
      }
    case 0:
     /*
      *  (n==0) ? 1 : 0
      *  simpler is (n==0)
      */
      return WN_EQ(type, pow, WN_Zerocon(type));

    case 1:
     /*
      *  always and forever 1
      */
      return WN_Intconst(type, 1);

    case 2:
      {
       /*
	*  (n>=0) ? 1<<n : 0
	* simpler is (n>=0) << n
	*/
	WN	*ge;
	PREG_NUM powN;

	powN = AssignExpr(block, pow, type);

	ge =   WN_GE(type,
		     WN_LdidPreg(type, powN),
		     WN_Zerocon(type));

	return WN_Shl(type,
		      ge,
		      WN_LdidPreg(type, powN));
	
      }
    }
  }
 
  return NULL;
}

/*
**	quad negate looks like complex negate
**
**	if q = (x,y) then
**	  -q = (-x, -y)
**
**	TODO	nail down preg offset interface
** Bug 12895: MIPS quad represents ieee 128, so  -q = (-x, y)
*/
static WN *em_quad_neg(WN *block, WN *tree)
{
  TYPE_ID	newType;
  TYPE_ID	type = WN_rtype(tree);
  PREG_NUM	qN, qNlo;

  /*
   *  assign a quad preg temp as we will be referencing twice (sortof)
   */
  qN = AssignExpr(block, WN_kid0(tree), type);

  if (MTYPE_is_complex(type))
  {
    newType = MTYPE_C8;
    qNlo = qN+2;
  }
  else /* assume MTYPE_FQ or MTYPE_F16 */
  {
    newType = MTYPE_F8;
    qNlo = qN+1;
  }
 
  {
    WN	*wn, *st;
    ST	*npreg = MTYPE_To_PREG(newType);

#ifdef TARG_MIPS
    wn = WN_LdidPreg(newType, qN);  // Bug 12895
#else
    wn = WN_Neg(newType, WN_LdidPreg(newType, qN));
#endif
    st = WN_StidIntoPreg(newType, qN, npreg, wn);
    WN_INSERT_BlockLast(block, st);

    wn = WN_Neg(newType, WN_LdidPreg(newType, qNlo));
    st = WN_StidIntoPreg(newType, qNlo, npreg, wn);
    WN_INSERT_BlockLast(block, st);
  }
  WN_Delete(tree);

  return WN_LdidPreg(type, qN);
}


static WN *em_quad_abs(WN *block, WN *tree)
{
  TYPE_ID	newType;
  TYPE_ID	type = WN_rtype(tree);
  PREG_NUM	qN, qNlo;

  /*
   *  assign a quad preg temp as we will be referencing twice (sortof)
   */
  qN = AssignExpr(block, WN_kid0(tree), type);

  Is_True(! MTYPE_is_complex(type), ("em_quad_abs emulates FQ not CQ"));
  newType = MTYPE_F8;
  qNlo = qN+1;

  {
    WN	*wn, *st;
    ST	*npreg = MTYPE_To_PREG(newType);

#ifdef TARG_MIPS
    wn = WN_LdidPreg(newType, qN);  // Bug 12895
#else
    wn = WN_Abs(newType, WN_LdidPreg(newType, qN));
#endif
    st = WN_StidIntoPreg(newType, qN, npreg, wn);
    WN_INSERT_BlockLast(block, st);

    wn = WN_Abs(newType, WN_LdidPreg(newType, qNlo));
    st = WN_StidIntoPreg(newType, qNlo, npreg, wn);
    WN_INSERT_BlockLast(block, st);
  }
  WN_Delete(tree);

  return WN_LdidPreg(type, qN);
}


/*
**	There is no no native quad select, so we must turn the
**	expression back into an if/else block
**
**	select:	(cond) ? exp1 : exp2
**
**	-->	if (cond)	qN = exp1;
**		else		qN = exp2;
**		return qN
**		
*/
static WN *em_split_select(WN *block, WN *tree)
{
  TYPE_ID	rtype = WN_rtype(tree);
  PREG_NUM	qN;
  WN		*if_then, *if_else;


  if_then = WN_CreateBlock();
  if_else = WN_CreateBlock();
  {
    WN	*exp1 = WN_kid1(tree);

    qN = AssignExpr(if_then, exp1, rtype);
  }
  {
    WN	*wn;
    WN	*exp2 = WN_kid2(tree);
    ST	*preg = MTYPE_To_PREG(rtype);

    wn = WN_StidIntoPreg(rtype, qN, preg, exp2);
    WN_INSERT_BlockLast(if_else, wn);
  }
  {
    WN 	*IF;
    WN 	*cond = WN_kid0(tree);

    IF = WN_CreateIf(cond, if_then, if_else);
    WN_INSERT_BlockLast(block, IF);
  }
  WN_Delete(tree);

  return WN_LdidPreg(rtype, qN);
}

/*
**  Evaluate the following function
**	
**	Definition
**	x y             INTRN_DIVFLOOR          INTRN_DIVCEIL
**	---             --------------          -------------
**	+ +                 x / y                (x+y-1) / y
**	
**	- -                 x / y                (x+y+1) / y
**	
**	+ -              (x+ -1-y)/y                x / y
**	
**	- +              (x+  1-y)/y                x / y
**	
**	
**	The issue was to evaulate (divfloor) without branch code.
**	
**	Tricks
**	        f(x) =  -1      (x<0)
**	                +1      (x>=0)
**	        {
**	                t= x>>31;
**	                f= t+t+1
**	        }
**	        MASK(x,y,v)=    0       (x>=0, y>=0), (x<0, y<0)    ++, --
**	                        v       (x>=0, y<0),  (x<0, y>=0)   +-, -+
**	        {
**	                t= (x^y)>>31
**	                MASK= t & v
**	        }
**	
**	The cleverness (shapiro's) was the composition of these functions
**	to evaluate divfloor.
**	
**	        DIVFLOOR(x,y)=
**	                v =     f(y) - y;       (-1-y) [+-],   (+1-y) [-+]
**	                (x + MASK(x,y,v)) / y
**	
**	        DIVCEIL(x,y) = -DIVFLOOR(-x,y)
**
**  x,y are assumed integral or we could just do a divide/floor
**
**
*/

static WN *em_divfloor(WN *block, TYPE_ID type, WN *x, WN *y)
{
  PREG_NUM	xN, yN;
  WN		*numer, *div;

  Is_True((MTYPE_is_integral(WN_rtype(x)) &&
	   MTYPE_is_integral(WN_rtype(y))),
	  ("em_divfloor() arguments should be type integral"));

  xN = AssignExpr(block, x, type);
  yN = AssignExpr(block, y, type);
 
  {
   /*
    *	one =  1	(y >= 0)
    *	      -1	(y <  0)
    */
    TYPE_ID	ytype = WN_rtype(y);
    WN		*sra, *add, *one, *bxor, *mask, *sub, *band;
#ifdef TARG_X8664 
    // Bug 3264 - This algorithm requires that byte size be identical for 
    // ytype and type, for zero-extended 64-bit target ISA.
    if (MTYPE_is_unsigned(ytype) &&
	MTYPE_byte_size(ytype) < MTYPE_byte_size(type))
      ytype = type;
#endif

    sra = WN_Ashr(type,
		  WN_LdidPreg(type, yN),
		  WN_Intconst(type, MTYPE_size_reg(ytype)-1));

    add = WN_Add(type,
		 sra,
		 WN_COPY_Tree(sra));

    one = WN_Add(ytype,
		 add,
		 WN_Intconst(ytype, 1));
   /*
    *	mask =	 0	(x,y)= ++ --
    *	mask =	-1	(x,y)= +- +-
    */
    bxor = WN_Bxor(ytype,
		   WN_LdidPreg(type, xN),
		   WN_LdidPreg(type, yN));

    mask = WN_Ashr(type,
		   bxor,
		   WN_Intconst(type, MTYPE_size_reg(type)-1));

   /*
    *	sub =	 1 - y		(y >= 0)
    *		-1 - y		(y <  0)
    */
    sub = WN_Sub(type, one, WN_LdidPreg…
Alerts (9)

Complexity hotspot; line 661 (total complexity: 5)
661
Complexity hotspot; line 977 (total complexity: 5)
977
Complexity hotspot; line 1026 (total complexity: 5)
1026
Complexity hotspot; line 1075 (total complexity: 5)
1075
Complexity hotspot; lines 1347 to 1348 (total complexity: 5)
1347 1348
Complexity hotspot; line 1388 (total complexity: 5)
1388
Complexity hotspot; line 1424 (total complexity: 5)
1424
Complexity hotspot; line 1461 (total complexity: 5)
1461