PageRenderTime 64ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 2ms

/mingw-w64-v2.0.999/gcc/src/gcc/config/alpha/alpha.c

#
C | 9837 lines | 7066 code | 1485 blank | 1286 comment | 1909 complexity | cec2b962a92c448b30d79200c3f77735 MD5 | raw file
Possible License(s): LGPL-2.1, AGPL-1.0, LGPL-3.0, Unlicense, GPL-2.0, LGPL-2.0, BSD-3-Clause, GPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. /* Subroutines used for code generation on the DEC Alpha.
  2. Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
  3. 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
  4. Free Software Foundation, Inc.
  5. Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
  6. This file is part of GCC.
  7. GCC is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 3, or (at your option)
  10. any later version.
  11. GCC is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with GCC; see the file COPYING3. If not see
  17. <http://www.gnu.org/licenses/>. */
  18. #include "config.h"
  19. #include "system.h"
  20. #include "coretypes.h"
  21. #include "tm.h"
  22. #include "rtl.h"
  23. #include "tree.h"
  24. #include "regs.h"
  25. #include "hard-reg-set.h"
  26. #include "insn-config.h"
  27. #include "conditions.h"
  28. #include "output.h"
  29. #include "insn-attr.h"
  30. #include "flags.h"
  31. #include "recog.h"
  32. #include "expr.h"
  33. #include "optabs.h"
  34. #include "reload.h"
  35. #include "obstack.h"
  36. #include "except.h"
  37. #include "function.h"
  38. #include "diagnostic-core.h"
  39. #include "ggc.h"
  40. #include "tm_p.h"
  41. #include "target.h"
  42. #include "target-def.h"
  43. #include "common/common-target.h"
  44. #include "debug.h"
  45. #include "langhooks.h"
  46. #include "splay-tree.h"
  47. #include "gimple.h"
  48. #include "tree-flow.h"
  49. #include "tree-stdarg.h"
  50. #include "tm-constrs.h"
  51. #include "df.h"
  52. #include "libfuncs.h"
  53. #include "opts.h"
  54. #include "params.h"
  55. /* Specify which cpu to schedule for. */
  56. enum processor_type alpha_tune;
  57. /* Which cpu we're generating code for. */
  58. enum processor_type alpha_cpu;
  59. static const char * const alpha_cpu_name[] =
  60. {
  61. "ev4", "ev5", "ev6"
  62. };
  63. /* Specify how accurate floating-point traps need to be. */
  64. enum alpha_trap_precision alpha_tp;
  65. /* Specify the floating-point rounding mode. */
  66. enum alpha_fp_rounding_mode alpha_fprm;
  67. /* Specify which things cause traps. */
  68. enum alpha_fp_trap_mode alpha_fptm;
  69. /* Nonzero if inside of a function, because the Alpha asm can't
  70. handle .files inside of functions. */
  71. static int inside_function = FALSE;
  72. /* The number of cycles of latency we should assume on memory reads. */
  73. int alpha_memory_latency = 3;
  74. /* Whether the function needs the GP. */
  75. static int alpha_function_needs_gp;
  76. /* The assembler name of the current function. */
  77. static const char *alpha_fnname;
  78. /* The next explicit relocation sequence number. */
  79. extern GTY(()) int alpha_next_sequence_number;
  80. int alpha_next_sequence_number = 1;
  81. /* The literal and gpdisp sequence numbers for this insn, as printed
  82. by %# and %* respectively. */
  83. extern GTY(()) int alpha_this_literal_sequence_number;
  84. extern GTY(()) int alpha_this_gpdisp_sequence_number;
  85. int alpha_this_literal_sequence_number;
  86. int alpha_this_gpdisp_sequence_number;
  87. /* Costs of various operations on the different architectures. */
  88. struct alpha_rtx_cost_data
  89. {
  90. unsigned char fp_add;
  91. unsigned char fp_mult;
  92. unsigned char fp_div_sf;
  93. unsigned char fp_div_df;
  94. unsigned char int_mult_si;
  95. unsigned char int_mult_di;
  96. unsigned char int_shift;
  97. unsigned char int_cmov;
  98. unsigned short int_div;
  99. };
  100. static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
  101. {
  102. { /* EV4 */
  103. COSTS_N_INSNS (6), /* fp_add */
  104. COSTS_N_INSNS (6), /* fp_mult */
  105. COSTS_N_INSNS (34), /* fp_div_sf */
  106. COSTS_N_INSNS (63), /* fp_div_df */
  107. COSTS_N_INSNS (23), /* int_mult_si */
  108. COSTS_N_INSNS (23), /* int_mult_di */
  109. COSTS_N_INSNS (2), /* int_shift */
  110. COSTS_N_INSNS (2), /* int_cmov */
  111. COSTS_N_INSNS (97), /* int_div */
  112. },
  113. { /* EV5 */
  114. COSTS_N_INSNS (4), /* fp_add */
  115. COSTS_N_INSNS (4), /* fp_mult */
  116. COSTS_N_INSNS (15), /* fp_div_sf */
  117. COSTS_N_INSNS (22), /* fp_div_df */
  118. COSTS_N_INSNS (8), /* int_mult_si */
  119. COSTS_N_INSNS (12), /* int_mult_di */
  120. COSTS_N_INSNS (1) + 1, /* int_shift */
  121. COSTS_N_INSNS (1), /* int_cmov */
  122. COSTS_N_INSNS (83), /* int_div */
  123. },
  124. { /* EV6 */
  125. COSTS_N_INSNS (4), /* fp_add */
  126. COSTS_N_INSNS (4), /* fp_mult */
  127. COSTS_N_INSNS (12), /* fp_div_sf */
  128. COSTS_N_INSNS (15), /* fp_div_df */
  129. COSTS_N_INSNS (7), /* int_mult_si */
  130. COSTS_N_INSNS (7), /* int_mult_di */
  131. COSTS_N_INSNS (1), /* int_shift */
  132. COSTS_N_INSNS (2), /* int_cmov */
  133. COSTS_N_INSNS (86), /* int_div */
  134. },
  135. };
  136. /* Similar but tuned for code size instead of execution latency. The
  137. extra +N is fractional cost tuning based on latency. It's used to
  138. encourage use of cheaper insns like shift, but only if there's just
  139. one of them. */
  140. static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
  141. {
  142. COSTS_N_INSNS (1), /* fp_add */
  143. COSTS_N_INSNS (1), /* fp_mult */
  144. COSTS_N_INSNS (1), /* fp_div_sf */
  145. COSTS_N_INSNS (1) + 1, /* fp_div_df */
  146. COSTS_N_INSNS (1) + 1, /* int_mult_si */
  147. COSTS_N_INSNS (1) + 2, /* int_mult_di */
  148. COSTS_N_INSNS (1), /* int_shift */
  149. COSTS_N_INSNS (1), /* int_cmov */
  150. COSTS_N_INSNS (6), /* int_div */
  151. };
  152. /* Get the number of args of a function in one of two ways. */
  153. #if TARGET_ABI_OPEN_VMS
  154. #define NUM_ARGS crtl->args.info.num_args
  155. #else
  156. #define NUM_ARGS crtl->args.info
  157. #endif
  158. #define REG_PV 27
  159. #define REG_RA 26
  160. /* Declarations of static functions. */
  161. static struct machine_function *alpha_init_machine_status (void);
  162. static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
  163. #if TARGET_ABI_OPEN_VMS
  164. static void alpha_write_linkage (FILE *, const char *);
  165. static bool vms_valid_pointer_mode (enum machine_mode);
  166. #else
  167. #define vms_patch_builtins() gcc_unreachable()
  168. #endif
  169. #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
  170. /* Implement TARGET_MANGLE_TYPE. */
  171. static const char *
  172. alpha_mangle_type (const_tree type)
  173. {
  174. if (TYPE_MAIN_VARIANT (type) == long_double_type_node
  175. && TARGET_LONG_DOUBLE_128)
  176. return "g";
  177. /* For all other types, use normal C++ mangling. */
  178. return NULL;
  179. }
  180. #endif
  181. /* Parse target option strings. */
  182. static void
  183. alpha_option_override (void)
  184. {
  185. static const struct cpu_table {
  186. const char *const name;
  187. const enum processor_type processor;
  188. const int flags;
  189. const unsigned short line_size; /* in bytes */
  190. const unsigned short l1_size; /* in kb. */
  191. const unsigned short l2_size; /* in kb. */
  192. } cpu_table[] = {
  193. /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
  194. EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
  195. had 64k to 8M 8-byte direct Bcache. */
  196. { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
  197. { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
  198. { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
  199. /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
  200. and 1M to 16M 64 byte L3 (not modeled).
  201. PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
  202. PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
  203. { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
  204. { "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
  205. { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
  206. { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
  207. { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
  208. { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
  209. { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
  210. /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
  211. { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
  212. { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
  213. { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
  214. 64, 64, 16*1024 },
  215. { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
  216. 64, 64, 16*1024 }
  217. };
  218. int const ct_size = ARRAY_SIZE (cpu_table);
  219. int line_size = 0, l1_size = 0, l2_size = 0;
  220. int i;
  221. #ifdef SUBTARGET_OVERRIDE_OPTIONS
  222. SUBTARGET_OVERRIDE_OPTIONS;
  223. #endif
  224. /* Default to full IEEE compliance mode for Go language. */
  225. if (strcmp (lang_hooks.name, "GNU Go") == 0
  226. && !(target_flags_explicit & MASK_IEEE))
  227. target_flags |= MASK_IEEE;
  228. alpha_fprm = ALPHA_FPRM_NORM;
  229. alpha_tp = ALPHA_TP_PROG;
  230. alpha_fptm = ALPHA_FPTM_N;
  231. if (TARGET_IEEE)
  232. {
  233. alpha_tp = ALPHA_TP_INSN;
  234. alpha_fptm = ALPHA_FPTM_SU;
  235. }
  236. if (TARGET_IEEE_WITH_INEXACT)
  237. {
  238. alpha_tp = ALPHA_TP_INSN;
  239. alpha_fptm = ALPHA_FPTM_SUI;
  240. }
  241. if (alpha_tp_string)
  242. {
  243. if (! strcmp (alpha_tp_string, "p"))
  244. alpha_tp = ALPHA_TP_PROG;
  245. else if (! strcmp (alpha_tp_string, "f"))
  246. alpha_tp = ALPHA_TP_FUNC;
  247. else if (! strcmp (alpha_tp_string, "i"))
  248. alpha_tp = ALPHA_TP_INSN;
  249. else
  250. error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
  251. }
  252. if (alpha_fprm_string)
  253. {
  254. if (! strcmp (alpha_fprm_string, "n"))
  255. alpha_fprm = ALPHA_FPRM_NORM;
  256. else if (! strcmp (alpha_fprm_string, "m"))
  257. alpha_fprm = ALPHA_FPRM_MINF;
  258. else if (! strcmp (alpha_fprm_string, "c"))
  259. alpha_fprm = ALPHA_FPRM_CHOP;
  260. else if (! strcmp (alpha_fprm_string,"d"))
  261. alpha_fprm = ALPHA_FPRM_DYN;
  262. else
  263. error ("bad value %qs for -mfp-rounding-mode switch",
  264. alpha_fprm_string);
  265. }
  266. if (alpha_fptm_string)
  267. {
  268. if (strcmp (alpha_fptm_string, "n") == 0)
  269. alpha_fptm = ALPHA_FPTM_N;
  270. else if (strcmp (alpha_fptm_string, "u") == 0)
  271. alpha_fptm = ALPHA_FPTM_U;
  272. else if (strcmp (alpha_fptm_string, "su") == 0)
  273. alpha_fptm = ALPHA_FPTM_SU;
  274. else if (strcmp (alpha_fptm_string, "sui") == 0)
  275. alpha_fptm = ALPHA_FPTM_SUI;
  276. else
  277. error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
  278. }
  279. if (alpha_cpu_string)
  280. {
  281. for (i = 0; i < ct_size; i++)
  282. if (! strcmp (alpha_cpu_string, cpu_table [i].name))
  283. {
  284. alpha_tune = alpha_cpu = cpu_table[i].processor;
  285. line_size = cpu_table[i].line_size;
  286. l1_size = cpu_table[i].l1_size;
  287. l2_size = cpu_table[i].l2_size;
  288. target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
  289. target_flags |= cpu_table[i].flags;
  290. break;
  291. }
  292. if (i == ct_size)
  293. error ("bad value %qs for -mcpu switch", alpha_cpu_string);
  294. }
  295. if (alpha_tune_string)
  296. {
  297. for (i = 0; i < ct_size; i++)
  298. if (! strcmp (alpha_tune_string, cpu_table [i].name))
  299. {
  300. alpha_tune = cpu_table[i].processor;
  301. line_size = cpu_table[i].line_size;
  302. l1_size = cpu_table[i].l1_size;
  303. l2_size = cpu_table[i].l2_size;
  304. break;
  305. }
  306. if (i == ct_size)
  307. error ("bad value %qs for -mtune switch", alpha_tune_string);
  308. }
  309. if (line_size)
  310. maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size,
  311. global_options.x_param_values,
  312. global_options_set.x_param_values);
  313. if (l1_size)
  314. maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size,
  315. global_options.x_param_values,
  316. global_options_set.x_param_values);
  317. if (l2_size)
  318. maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size,
  319. global_options.x_param_values,
  320. global_options_set.x_param_values);
  321. /* Do some sanity checks on the above options. */
  322. if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
  323. && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
  324. {
  325. warning (0, "fp software completion requires -mtrap-precision=i");
  326. alpha_tp = ALPHA_TP_INSN;
  327. }
  328. if (alpha_cpu == PROCESSOR_EV6)
  329. {
  330. /* Except for EV6 pass 1 (not released), we always have precise
  331. arithmetic traps. Which means we can do software completion
  332. without minding trap shadows. */
  333. alpha_tp = ALPHA_TP_PROG;
  334. }
  335. if (TARGET_FLOAT_VAX)
  336. {
  337. if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
  338. {
  339. warning (0, "rounding mode not supported for VAX floats");
  340. alpha_fprm = ALPHA_FPRM_NORM;
  341. }
  342. if (alpha_fptm == ALPHA_FPTM_SUI)
  343. {
  344. warning (0, "trap mode not supported for VAX floats");
  345. alpha_fptm = ALPHA_FPTM_SU;
  346. }
  347. if (target_flags_explicit & MASK_LONG_DOUBLE_128)
  348. warning (0, "128-bit long double not supported for VAX floats");
  349. target_flags &= ~MASK_LONG_DOUBLE_128;
  350. }
  351. {
  352. char *end;
  353. int lat;
  354. if (!alpha_mlat_string)
  355. alpha_mlat_string = "L1";
  356. if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
  357. && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
  358. ;
  359. else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
  360. && ISDIGIT ((unsigned char)alpha_mlat_string[1])
  361. && alpha_mlat_string[2] == '\0')
  362. {
  363. static int const cache_latency[][4] =
  364. {
  365. { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
  366. { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
  367. { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
  368. };
  369. lat = alpha_mlat_string[1] - '0';
  370. if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
  371. {
  372. warning (0, "L%d cache latency unknown for %s",
  373. lat, alpha_cpu_name[alpha_tune]);
  374. lat = 3;
  375. }
  376. else
  377. lat = cache_latency[alpha_tune][lat-1];
  378. }
  379. else if (! strcmp (alpha_mlat_string, "main"))
  380. {
  381. /* Most current memories have about 370ns latency. This is
  382. a reasonable guess for a fast cpu. */
  383. lat = 150;
  384. }
  385. else
  386. {
  387. warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
  388. lat = 3;
  389. }
  390. alpha_memory_latency = lat;
  391. }
  392. /* Default the definition of "small data" to 8 bytes. */
  393. if (!global_options_set.x_g_switch_value)
  394. g_switch_value = 8;
  395. /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
  396. if (flag_pic == 1)
  397. target_flags |= MASK_SMALL_DATA;
  398. else if (flag_pic == 2)
  399. target_flags &= ~MASK_SMALL_DATA;
  400. /* Align labels and loops for optimal branching. */
  401. /* ??? Kludge these by not doing anything if we don't optimize. */
  402. if (optimize > 0)
  403. {
  404. if (align_loops <= 0)
  405. align_loops = 16;
  406. if (align_jumps <= 0)
  407. align_jumps = 16;
  408. }
  409. if (align_functions <= 0)
  410. align_functions = 16;
  411. /* Register variables and functions with the garbage collector. */
  412. /* Set up function hooks. */
  413. init_machine_status = alpha_init_machine_status;
  414. /* Tell the compiler when we're using VAX floating point. */
  415. if (TARGET_FLOAT_VAX)
  416. {
  417. REAL_MODE_FORMAT (SFmode) = &vax_f_format;
  418. REAL_MODE_FORMAT (DFmode) = &vax_g_format;
  419. REAL_MODE_FORMAT (TFmode) = NULL;
  420. }
  421. #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
  422. if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
  423. target_flags |= MASK_LONG_DOUBLE_128;
  424. #endif
  425. }
  426. /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
  427. int
  428. zap_mask (HOST_WIDE_INT value)
  429. {
  430. int i;
  431. for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
  432. i++, value >>= 8)
  433. if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
  434. return 0;
  435. return 1;
  436. }
  437. /* Return true if OP is valid for a particular TLS relocation.
  438. We are already guaranteed that OP is a CONST. */
  439. int
  440. tls_symbolic_operand_1 (rtx op, int size, int unspec)
  441. {
  442. op = XEXP (op, 0);
  443. if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
  444. return 0;
  445. op = XVECEXP (op, 0, 0);
  446. if (GET_CODE (op) != SYMBOL_REF)
  447. return 0;
  448. switch (SYMBOL_REF_TLS_MODEL (op))
  449. {
  450. case TLS_MODEL_LOCAL_DYNAMIC:
  451. return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
  452. case TLS_MODEL_INITIAL_EXEC:
  453. return unspec == UNSPEC_TPREL && size == 64;
  454. case TLS_MODEL_LOCAL_EXEC:
  455. return unspec == UNSPEC_TPREL && size == alpha_tls_size;
  456. default:
  457. gcc_unreachable ();
  458. }
  459. }
  460. /* Used by aligned_memory_operand and unaligned_memory_operand to
  461. resolve what reload is going to do with OP if it's a register. */
  462. rtx
  463. resolve_reload_operand (rtx op)
  464. {
  465. if (reload_in_progress)
  466. {
  467. rtx tmp = op;
  468. if (GET_CODE (tmp) == SUBREG)
  469. tmp = SUBREG_REG (tmp);
  470. if (REG_P (tmp)
  471. && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
  472. {
  473. op = reg_equiv_memory_loc (REGNO (tmp));
  474. if (op == 0)
  475. return 0;
  476. }
  477. }
  478. return op;
  479. }
  480. /* The scalar modes supported differs from the default check-what-c-supports
  481. version in that sometimes TFmode is available even when long double
  482. indicates only DFmode. */
  483. static bool
  484. alpha_scalar_mode_supported_p (enum machine_mode mode)
  485. {
  486. switch (mode)
  487. {
  488. case QImode:
  489. case HImode:
  490. case SImode:
  491. case DImode:
  492. case TImode: /* via optabs.c */
  493. return true;
  494. case SFmode:
  495. case DFmode:
  496. return true;
  497. case TFmode:
  498. return TARGET_HAS_XFLOATING_LIBS;
  499. default:
  500. return false;
  501. }
  502. }
  503. /* Alpha implements a couple of integer vector mode operations when
  504. TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
  505. which allows the vectorizer to operate on e.g. move instructions,
  506. or when expand_vector_operations can do something useful. */
  507. static bool
  508. alpha_vector_mode_supported_p (enum machine_mode mode)
  509. {
  510. return mode == V8QImode || mode == V4HImode || mode == V2SImode;
  511. }
  512. /* Return 1 if this function can directly return via $26. */
  513. int
  514. direct_return (void)
  515. {
  516. return (TARGET_ABI_OSF
  517. && reload_completed
  518. && alpha_sa_size () == 0
  519. && get_frame_size () == 0
  520. && crtl->outgoing_args_size == 0
  521. && crtl->args.pretend_args_size == 0);
  522. }
  523. /* Return the TLS model to use for SYMBOL. */
  524. static enum tls_model
  525. tls_symbolic_operand_type (rtx symbol)
  526. {
  527. enum tls_model model;
  528. if (GET_CODE (symbol) != SYMBOL_REF)
  529. return TLS_MODEL_NONE;
  530. model = SYMBOL_REF_TLS_MODEL (symbol);
  531. /* Local-exec with a 64-bit size is the same code as initial-exec. */
  532. if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
  533. model = TLS_MODEL_INITIAL_EXEC;
  534. return model;
  535. }
  536. /* Return true if the function DECL will share the same GP as any
  537. function in the current unit of translation. */
  538. static bool
  539. decl_has_samegp (const_tree decl)
  540. {
  541. /* Functions that are not local can be overridden, and thus may
  542. not share the same gp. */
  543. if (!(*targetm.binds_local_p) (decl))
  544. return false;
  545. /* If -msmall-data is in effect, assume that there is only one GP
  546. for the module, and so any local symbol has this property. We
  547. need explicit relocations to be able to enforce this for symbols
  548. not defined in this unit of translation, however. */
  549. if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
  550. return true;
  551. /* Functions that are not external are defined in this UoT. */
  552. /* ??? Irritatingly, static functions not yet emitted are still
  553. marked "external". Apply this to non-static functions only. */
  554. return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
  555. }
  556. /* Return true if EXP should be placed in the small data section. */
  557. static bool
  558. alpha_in_small_data_p (const_tree exp)
  559. {
  560. /* We want to merge strings, so we never consider them small data. */
  561. if (TREE_CODE (exp) == STRING_CST)
  562. return false;
  563. /* Functions are never in the small data area. Duh. */
  564. if (TREE_CODE (exp) == FUNCTION_DECL)
  565. return false;
  566. if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
  567. {
  568. const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
  569. if (strcmp (section, ".sdata") == 0
  570. || strcmp (section, ".sbss") == 0)
  571. return true;
  572. }
  573. else
  574. {
  575. HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
  576. /* If this is an incomplete type with size 0, then we can't put it
  577. in sdata because it might be too big when completed. */
  578. if (size > 0 && size <= g_switch_value)
  579. return true;
  580. }
  581. return false;
  582. }
  583. #if TARGET_ABI_OPEN_VMS
  584. static bool
  585. vms_valid_pointer_mode (enum machine_mode mode)
  586. {
  587. return (mode == SImode || mode == DImode);
  588. }
  589. static bool
  590. alpha_linkage_symbol_p (const char *symname)
  591. {
  592. int symlen = strlen (symname);
  593. if (symlen > 4)
  594. return strcmp (&symname [symlen - 4], "..lk") == 0;
  595. return false;
  596. }
  597. #define LINKAGE_SYMBOL_REF_P(X) \
  598. ((GET_CODE (X) == SYMBOL_REF \
  599. && alpha_linkage_symbol_p (XSTR (X, 0))) \
  600. || (GET_CODE (X) == CONST \
  601. && GET_CODE (XEXP (X, 0)) == PLUS \
  602. && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
  603. && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
  604. #endif
  605. /* legitimate_address_p recognizes an RTL expression that is a valid
  606. memory address for an instruction. The MODE argument is the
  607. machine mode for the MEM expression that wants to use this address.
  608. For Alpha, we have either a constant address or the sum of a
  609. register and a constant address, or just a register. For DImode,
  610. any of those forms can be surrounded with an AND that clear the
  611. low-order three bits; this is an "unaligned" access. */
  612. static bool
  613. alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
  614. {
  615. /* If this is an ldq_u type address, discard the outer AND. */
  616. if (mode == DImode
  617. && GET_CODE (x) == AND
  618. && CONST_INT_P (XEXP (x, 1))
  619. && INTVAL (XEXP (x, 1)) == -8)
  620. x = XEXP (x, 0);
  621. /* Discard non-paradoxical subregs. */
  622. if (GET_CODE (x) == SUBREG
  623. && (GET_MODE_SIZE (GET_MODE (x))
  624. < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
  625. x = SUBREG_REG (x);
  626. /* Unadorned general registers are valid. */
  627. if (REG_P (x)
  628. && (strict
  629. ? STRICT_REG_OK_FOR_BASE_P (x)
  630. : NONSTRICT_REG_OK_FOR_BASE_P (x)))
  631. return true;
  632. /* Constant addresses (i.e. +/- 32k) are valid. */
  633. if (CONSTANT_ADDRESS_P (x))
  634. return true;
  635. #if TARGET_ABI_OPEN_VMS
  636. if (LINKAGE_SYMBOL_REF_P (x))
  637. return true;
  638. #endif
  639. /* Register plus a small constant offset is valid. */
  640. if (GET_CODE (x) == PLUS)
  641. {
  642. rtx ofs = XEXP (x, 1);
  643. x = XEXP (x, 0);
  644. /* Discard non-paradoxical subregs. */
  645. if (GET_CODE (x) == SUBREG
  646. && (GET_MODE_SIZE (GET_MODE (x))
  647. < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
  648. x = SUBREG_REG (x);
  649. if (REG_P (x))
  650. {
  651. if (! strict
  652. && NONSTRICT_REG_OK_FP_BASE_P (x)
  653. && CONST_INT_P (ofs))
  654. return true;
  655. if ((strict
  656. ? STRICT_REG_OK_FOR_BASE_P (x)
  657. : NONSTRICT_REG_OK_FOR_BASE_P (x))
  658. && CONSTANT_ADDRESS_P (ofs))
  659. return true;
  660. }
  661. }
  662. /* If we're managing explicit relocations, LO_SUM is valid, as are small
  663. data symbols. Avoid explicit relocations of modes larger than word
  664. mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
  665. else if (TARGET_EXPLICIT_RELOCS
  666. && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
  667. {
  668. if (small_symbolic_operand (x, Pmode))
  669. return true;
  670. if (GET_CODE (x) == LO_SUM)
  671. {
  672. rtx ofs = XEXP (x, 1);
  673. x = XEXP (x, 0);
  674. /* Discard non-paradoxical subregs. */
  675. if (GET_CODE (x) == SUBREG
  676. && (GET_MODE_SIZE (GET_MODE (x))
  677. < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
  678. x = SUBREG_REG (x);
  679. /* Must have a valid base register. */
  680. if (! (REG_P (x)
  681. && (strict
  682. ? STRICT_REG_OK_FOR_BASE_P (x)
  683. : NONSTRICT_REG_OK_FOR_BASE_P (x))))
  684. return false;
  685. /* The symbol must be local. */
  686. if (local_symbolic_operand (ofs, Pmode)
  687. || dtp32_symbolic_operand (ofs, Pmode)
  688. || tp32_symbolic_operand (ofs, Pmode))
  689. return true;
  690. }
  691. }
  692. return false;
  693. }
  694. /* Build the SYMBOL_REF for __tls_get_addr. */
  695. static GTY(()) rtx tls_get_addr_libfunc;
  696. static rtx
  697. get_tls_get_addr (void)
  698. {
  699. if (!tls_get_addr_libfunc)
  700. tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
  701. return tls_get_addr_libfunc;
  702. }
  703. /* Try machine-dependent ways of modifying an illegitimate address
  704. to be legitimate. If we find one, return the new, valid address. */
  705. static rtx
  706. alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode)
  707. {
  708. HOST_WIDE_INT addend;
  709. /* If the address is (plus reg const_int) and the CONST_INT is not a
  710. valid offset, compute the high part of the constant and add it to
  711. the register. Then our address is (plus temp low-part-const). */
  712. if (GET_CODE (x) == PLUS
  713. && REG_P (XEXP (x, 0))
  714. && CONST_INT_P (XEXP (x, 1))
  715. && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
  716. {
  717. addend = INTVAL (XEXP (x, 1));
  718. x = XEXP (x, 0);
  719. goto split_addend;
  720. }
  721. /* If the address is (const (plus FOO const_int)), find the low-order
  722. part of the CONST_INT. Then load FOO plus any high-order part of the
  723. CONST_INT into a register. Our address is (plus reg low-part-const).
  724. This is done to reduce the number of GOT entries. */
  725. if (can_create_pseudo_p ()
  726. && GET_CODE (x) == CONST
  727. && GET_CODE (XEXP (x, 0)) == PLUS
  728. && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
  729. {
  730. addend = INTVAL (XEXP (XEXP (x, 0), 1));
  731. x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
  732. goto split_addend;
  733. }
  734. /* If we have a (plus reg const), emit the load as in (2), then add
  735. the two registers, and finally generate (plus reg low-part-const) as
  736. our address. */
  737. if (can_create_pseudo_p ()
  738. && GET_CODE (x) == PLUS
  739. && REG_P (XEXP (x, 0))
  740. && GET_CODE (XEXP (x, 1)) == CONST
  741. && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
  742. && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
  743. {
  744. addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
  745. x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
  746. XEXP (XEXP (XEXP (x, 1), 0), 0),
  747. NULL_RTX, 1, OPTAB_LIB_WIDEN);
  748. goto split_addend;
  749. }
  750. /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
  751. Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
  752. around +/- 32k offset. */
  753. if (TARGET_EXPLICIT_RELOCS
  754. && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
  755. && symbolic_operand (x, Pmode))
  756. {
  757. rtx r0, r16, eqv, tga, tp, insn, dest, seq;
  758. switch (tls_symbolic_operand_type (x))
  759. {
  760. case TLS_MODEL_NONE:
  761. break;
  762. case TLS_MODEL_GLOBAL_DYNAMIC:
  763. start_sequence ();
  764. r0 = gen_rtx_REG (Pmode, 0);
  765. r16 = gen_rtx_REG (Pmode, 16);
  766. tga = get_tls_get_addr ();
  767. dest = gen_reg_rtx (Pmode);
  768. seq = GEN_INT (alpha_next_sequence_number++);
  769. emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
  770. insn = gen_call_value_osf_tlsgd (r0, tga, seq);
  771. insn = emit_call_insn (insn);
  772. RTL_CONST_CALL_P (insn) = 1;
  773. use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
  774. insn = get_insns ();
  775. end_sequence ();
  776. emit_libcall_block (insn, dest, r0, x);
  777. return dest;
  778. case TLS_MODEL_LOCAL_DYNAMIC:
  779. start_sequence ();
  780. r0 = gen_rtx_REG (Pmode, 0);
  781. r16 = gen_rtx_REG (Pmode, 16);
  782. tga = get_tls_get_addr ();
  783. scratch = gen_reg_rtx (Pmode);
  784. seq = GEN_INT (alpha_next_sequence_number++);
  785. emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
  786. insn = gen_call_value_osf_tlsldm (r0, tga, seq);
  787. insn = emit_call_insn (insn);
  788. RTL_CONST_CALL_P (insn) = 1;
  789. use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
  790. insn = get_insns ();
  791. end_sequence ();
  792. eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
  793. UNSPEC_TLSLDM_CALL);
  794. emit_libcall_block (insn, scratch, r0, eqv);
  795. eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
  796. eqv = gen_rtx_CONST (Pmode, eqv);
  797. if (alpha_tls_size == 64)
  798. {
  799. dest = gen_reg_rtx (Pmode);
  800. emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
  801. emit_insn (gen_adddi3 (dest, dest, scratch));
  802. return dest;
  803. }
  804. if (alpha_tls_size == 32)
  805. {
  806. insn = gen_rtx_HIGH (Pmode, eqv);
  807. insn = gen_rtx_PLUS (Pmode, scratch, insn);
  808. scratch = gen_reg_rtx (Pmode);
  809. emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
  810. }
  811. return gen_rtx_LO_SUM (Pmode, scratch, eqv);
  812. case TLS_MODEL_INITIAL_EXEC:
  813. eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
  814. eqv = gen_rtx_CONST (Pmode, eqv);
  815. tp = gen_reg_rtx (Pmode);
  816. scratch = gen_reg_rtx (Pmode);
  817. dest = gen_reg_rtx (Pmode);
  818. emit_insn (gen_load_tp (tp));
  819. emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
  820. emit_insn (gen_adddi3 (dest, tp, scratch));
  821. return dest;
  822. case TLS_MODEL_LOCAL_EXEC:
  823. eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
  824. eqv = gen_rtx_CONST (Pmode, eqv);
  825. tp = gen_reg_rtx (Pmode);
  826. emit_insn (gen_load_tp (tp));
  827. if (alpha_tls_size == 32)
  828. {
  829. insn = gen_rtx_HIGH (Pmode, eqv);
  830. insn = gen_rtx_PLUS (Pmode, tp, insn);
  831. tp = gen_reg_rtx (Pmode);
  832. emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
  833. }
  834. return gen_rtx_LO_SUM (Pmode, tp, eqv);
  835. default:
  836. gcc_unreachable ();
  837. }
  838. if (local_symbolic_operand (x, Pmode))
  839. {
  840. if (small_symbolic_operand (x, Pmode))
  841. return x;
  842. else
  843. {
  844. if (can_create_pseudo_p ())
  845. scratch = gen_reg_rtx (Pmode);
  846. emit_insn (gen_rtx_SET (VOIDmode, scratch,
  847. gen_rtx_HIGH (Pmode, x)));
  848. return gen_rtx_LO_SUM (Pmode, scratch, x);
  849. }
  850. }
  851. }
  852. return NULL;
  853. split_addend:
  854. {
  855. HOST_WIDE_INT low, high;
  856. low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
  857. addend -= low;
  858. high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
  859. addend -= high;
  860. if (addend)
  861. x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
  862. (!can_create_pseudo_p () ? scratch : NULL_RTX),
  863. 1, OPTAB_LIB_WIDEN);
  864. if (high)
  865. x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
  866. (!can_create_pseudo_p () ? scratch : NULL_RTX),
  867. 1, OPTAB_LIB_WIDEN);
  868. return plus_constant (Pmode, x, low);
  869. }
  870. }
  871. /* Try machine-dependent ways of modifying an illegitimate address
  872. to be legitimate. Return X or the new, valid address. */
  873. static rtx
  874. alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
  875. enum machine_mode mode)
  876. {
  877. rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
  878. return new_x ? new_x : x;
  879. }
  880. /* Primarily this is required for TLS symbols, but given that our move
  881. patterns *ought* to be able to handle any symbol at any time, we
  882. should never be spilling symbolic operands to the constant pool, ever. */
  883. static bool
  884. alpha_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
  885. {
  886. enum rtx_code code = GET_CODE (x);
  887. return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
  888. }
  889. /* We do not allow indirect calls to be optimized into sibling calls, nor
  890. can we allow a call to a function with a different GP to be optimized
  891. into a sibcall. */
  892. static bool
  893. alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
  894. {
  895. /* Can't do indirect tail calls, since we don't know if the target
  896. uses the same GP. */
  897. if (!decl)
  898. return false;
  899. /* Otherwise, we can make a tail call if the target function shares
  900. the same GP. */
  901. return decl_has_samegp (decl);
  902. }
  903. int
  904. some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
  905. {
  906. rtx x = *px;
  907. /* Don't re-split. */
  908. if (GET_CODE (x) == LO_SUM)
  909. return -1;
  910. return small_symbolic_operand (x, Pmode) != 0;
  911. }
  912. static int
  913. split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
  914. {
  915. rtx x = *px;
  916. /* Don't re-split. */
  917. if (GET_CODE (x) == LO_SUM)
  918. return -1;
  919. if (small_symbolic_operand (x, Pmode))
  920. {
  921. x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
  922. *px = x;
  923. return -1;
  924. }
  925. return 0;
  926. }
  927. rtx
  928. split_small_symbolic_operand (rtx x)
  929. {
  930. x = copy_insn (x);
  931. for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
  932. return x;
  933. }
  934. /* Indicate that INSN cannot be duplicated. This is true for any insn
  935. that we've marked with gpdisp relocs, since those have to stay in
  936. 1-1 correspondence with one another.
  937. Technically we could copy them if we could set up a mapping from one
  938. sequence number to another, across the set of insns to be duplicated.
  939. This seems overly complicated and error-prone since interblock motion
  940. from sched-ebb could move one of the pair of insns to a different block.
  941. Also cannot allow jsr insns to be duplicated. If they throw exceptions,
  942. then they'll be in a different block from their ldgp. Which could lead
  943. the bb reorder code to think that it would be ok to copy just the block
  944. containing the call and branch to the block containing the ldgp. */
  945. static bool
  946. alpha_cannot_copy_insn_p (rtx insn)
  947. {
  948. if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
  949. return false;
  950. if (recog_memoized (insn) >= 0)
  951. return get_attr_cannot_copy (insn);
  952. else
  953. return false;
  954. }
  955. /* Try a machine-dependent way of reloading an illegitimate address
  956. operand. If we find one, push the reload and return the new rtx. */
  957. rtx
  958. alpha_legitimize_reload_address (rtx x,
  959. enum machine_mode mode ATTRIBUTE_UNUSED,
  960. int opnum, int type,
  961. int ind_levels ATTRIBUTE_UNUSED)
  962. {
  963. /* We must recognize output that we have already generated ourselves. */
  964. if (GET_CODE (x) == PLUS
  965. && GET_CODE (XEXP (x, 0)) == PLUS
  966. && REG_P (XEXP (XEXP (x, 0), 0))
  967. && CONST_INT_P (XEXP (XEXP (x, 0), 1))
  968. && CONST_INT_P (XEXP (x, 1)))
  969. {
  970. push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
  971. BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
  972. opnum, (enum reload_type) type);
  973. return x;
  974. }
  975. /* We wish to handle large displacements off a base register by
  976. splitting the addend across an ldah and the mem insn. This
  977. cuts number of extra insns needed from 3 to 1. */
  978. if (GET_CODE (x) == PLUS
  979. && REG_P (XEXP (x, 0))
  980. && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
  981. && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
  982. && GET_CODE (XEXP (x, 1)) == CONST_INT)
  983. {
  984. HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
  985. HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
  986. HOST_WIDE_INT high
  987. = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
  988. /* Check for 32-bit overflow. */
  989. if (high + low != val)
  990. return NULL_RTX;
  991. /* Reload the high part into a base reg; leave the low part
  992. in the mem directly. */
  993. x = gen_rtx_PLUS (GET_MODE (x),
  994. gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
  995. GEN_INT (high)),
  996. GEN_INT (low));
  997. push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
  998. BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
  999. opnum, (enum reload_type) type);
  1000. return x;
  1001. }
  1002. return NULL_RTX;
  1003. }
  1004. /* Compute a (partial) cost for rtx X. Return true if the complete
  1005. cost has been computed, and false if subexpressions should be
  1006. scanned. In either case, *TOTAL contains the cost result. */
  1007. static bool
  1008. alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
  1009. bool speed)
  1010. {
  1011. enum machine_mode mode = GET_MODE (x);
  1012. bool float_mode_p = FLOAT_MODE_P (mode);
  1013. const struct alpha_rtx_cost_data *cost_data;
  1014. if (!speed)
  1015. cost_data = &alpha_rtx_cost_size;
  1016. else
  1017. cost_data = &alpha_rtx_cost_data[alpha_tune];
  1018. switch (code)
  1019. {
  1020. case CONST_INT:
  1021. /* If this is an 8-bit constant, return zero since it can be used
  1022. nearly anywhere with no cost. If it is a valid operand for an
  1023. ADD or AND, likewise return 0 if we know it will be used in that
  1024. context. Otherwise, return 2 since it might be used there later.
  1025. All other constants take at least two insns. */
  1026. if (INTVAL (x) >= 0 && INTVAL (x) < 256)
  1027. {
  1028. *total = 0;
  1029. return true;
  1030. }
  1031. /* FALLTHRU */
  1032. case CONST_DOUBLE:
  1033. if (x == CONST0_RTX (mode))
  1034. *total = 0;
  1035. else if ((outer_code == PLUS && add_operand (x, VOIDmode))
  1036. || (outer_code == AND && and_operand (x, VOIDmode)))
  1037. *total = 0;
  1038. else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
  1039. *total = 2;
  1040. else
  1041. *total = COSTS_N_INSNS (2);
  1042. return true;
  1043. case CONST:
  1044. case SYMBOL_REF:
  1045. case LABEL_REF:
  1046. if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
  1047. *total = COSTS_N_INSNS (outer_code != MEM);
  1048. else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
  1049. *total = COSTS_N_INSNS (1 + (outer_code != MEM));
  1050. else if (tls_symbolic_operand_type (x))
  1051. /* Estimate of cost for call_pal rduniq. */
  1052. /* ??? How many insns do we emit here? More than one... */
  1053. *total = COSTS_N_INSNS (15);
  1054. else
  1055. /* Otherwise we do a load from the GOT. */
  1056. *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
  1057. return true;
  1058. case HIGH:
  1059. /* This is effectively an add_operand. */
  1060. *total = 2;
  1061. return true;
  1062. case PLUS:
  1063. case MINUS:
  1064. if (float_mode_p)
  1065. *total = cost_data->fp_add;
  1066. else if (GET_CODE (XEXP (x, 0)) == MULT
  1067. && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
  1068. {
  1069. *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
  1070. (enum rtx_code) outer_code, opno, speed)
  1071. + rtx_cost (XEXP (x, 1),
  1072. (enum rtx_code) outer_code, opno, speed)
  1073. + COSTS_N_INSNS (1));
  1074. return true;
  1075. }
  1076. return false;
  1077. case MULT:
  1078. if (float_mode_p)
  1079. *total = cost_data->fp_mult;
  1080. else if (mode == DImode)
  1081. *total = cost_data->int_mult_di;
  1082. else
  1083. *total = cost_data->int_mult_si;
  1084. return false;
  1085. case ASHIFT:
  1086. if (CONST_INT_P (XEXP (x, 1))
  1087. && INTVAL (XEXP (x, 1)) <= 3)
  1088. {
  1089. *total = COSTS_N_INSNS (1);
  1090. return false;
  1091. }
  1092. /* FALLTHRU */
  1093. case ASHIFTRT:
  1094. case LSHIFTRT:
  1095. *total = cost_data->int_shift;
  1096. return false;
  1097. case IF_THEN_ELSE:
  1098. if (float_mode_p)
  1099. *total = cost_data->fp_add;
  1100. else
  1101. *total = cost_data->int_cmov;
  1102. return false;
  1103. case DIV:
  1104. case UDIV:
  1105. case MOD:
  1106. case UMOD:
  1107. if (!float_mode_p)
  1108. *total = cost_data->int_div;
  1109. else if (mode == SFmode)
  1110. *total = cost_data->fp_div_sf;
  1111. else
  1112. *total = cost_data->fp_div_df;
  1113. return false;
  1114. case MEM:
  1115. *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
  1116. return true;
  1117. case NEG:
  1118. if (! float_mode_p)
  1119. {
  1120. *total = COSTS_N_INSNS (1);
  1121. return false;
  1122. }
  1123. /* FALLTHRU */
  1124. case ABS:
  1125. if (! float_mode_p)
  1126. {
  1127. *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
  1128. return false;
  1129. }
  1130. /* FALLTHRU */
  1131. case FLOAT:
  1132. case UNSIGNED_FLOAT:
  1133. case FIX:
  1134. case UNSIGNED_FIX:
  1135. case FLOAT_TRUNCATE:
  1136. *total = cost_data->fp_add;
  1137. return false;
  1138. case FLOAT_EXTEND:
  1139. if (MEM_P (XEXP (x, 0)))
  1140. *total = 0;
  1141. else
  1142. *total = cost_data->fp_add;
  1143. return false;
  1144. default:
  1145. return false;
  1146. }
  1147. }
  1148. /* REF is an alignable memory location. Place an aligned SImode
  1149. reference into *PALIGNED_MEM and the number of bits to shift into
  1150. *PBITNUM. SCRATCH is a free register for use in reloading out
  1151. of range stack slots. */
  1152. void
  1153. get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
  1154. {
  1155. rtx base;
  1156. HOST_WIDE_INT disp, offset;
  1157. gcc_assert (MEM_P (ref));
  1158. if (reload_in_progress
  1159. && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
  1160. {
  1161. base = find_replacement (&XEXP (ref, 0));
  1162. gcc_assert (memory_address_p (GET_MODE (ref), base));
  1163. }
  1164. else
  1165. base = XEXP (ref, 0);
  1166. if (GET_CODE (base) == PLUS)
  1167. disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
  1168. else
  1169. disp = 0;
  1170. /* Find the byte offset within an aligned word. If the memory itself is
  1171. claimed to be aligned, believe it. Otherwise, aligned_memory_operand
  1172. will have examined the base register and determined it is aligned, and
  1173. thus displacements from it are naturally alignable. */
  1174. if (MEM_ALIGN (ref) >= 32)
  1175. offset = 0;
  1176. else
  1177. offset = disp & 3;
  1178. /* The location should not cross aligned word boundary. */
  1179. gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
  1180. <= GET_MODE_SIZE (SImode));
  1181. /* Access the entire aligned word. */
  1182. *paligned_mem = widen_memory_access (ref, SImode, -offset);
  1183. /* Convert the byte offset within the word to a bit offset. */
  1184. offset *= BITS_PER_UNIT;
  1185. *pbitnum = GEN_INT (offset);
  1186. }
  1187. /* Similar, but just get the address. Handle the two reload cases.
  1188. Add EXTRA_OFFSET to the address we return. */
  1189. rtx
  1190. get_unaligned_address (rtx ref)
  1191. {
  1192. rtx base;
  1193. HOST_WIDE_INT offset = 0;
  1194. gcc_assert (MEM_P (ref));
  1195. if (reload_in_progress
  1196. && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
  1197. {
  1198. base = find_replacement (&XEXP (ref, 0));
  1199. gcc_assert (memory_address_p (GET_MODE (ref), base));
  1200. }
  1201. else
  1202. base = XEXP (ref, 0);
  1203. if (GET_CODE (base) == PLUS)
  1204. offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
  1205. return plus_constant (Pmode, base, offset);
  1206. }
  1207. /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
  1208. X is always returned in a register. */
  1209. rtx
  1210. get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
  1211. {
  1212. if (GET_CODE (addr) == PLUS)
  1213. {
  1214. ofs += INTVAL (XEXP (addr, 1));
  1215. addr = XEXP (addr, 0);
  1216. }
  1217. return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
  1218. NULL_RTX, 1, OPTAB_LIB_WIDEN);
  1219. }
  1220. /* On the Alpha, all (non-symbolic) constants except zero go into
  1221. a floating-point register via memory. Note that we cannot
  1222. return anything that is not a subset of RCLASS, and that some
  1223. symbolic constants cannot be dropped to memory. */
  1224. enum reg_class
  1225. alpha_preferred_reload_class(rtx x, enum reg_class rclass)
  1226. {
  1227. /* Zero is present in any register class. */
  1228. if (x == CONST0_RTX (GET_MODE (x)))
  1229. return rclass;
  1230. /* These sorts of constants we can easily drop to memory. */
  1231. if (CONST_INT_P (x)
  1232. || GET_CODE (x) == CONST_DOUBLE
  1233. || GET_CODE (x) == CONST_VECTOR)
  1234. {
  1235. if (rclass == FLOAT_REGS)
  1236. return NO_REGS;
  1237. if (rclass == ALL_REGS)
  1238. return GENERAL_REGS;
  1239. return rclass;
  1240. }
  1241. /* All other kinds of constants should not (and in the case of HIGH
  1242. cannot) be dropped to memory -- instead we use a GENERAL_REGS
  1243. secondary reload. */
  1244. if (CONSTANT_P (x))
  1245. return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
  1246. return rclass;
  1247. }
  1248. /* Inform reload about cases where moving X with a mode MODE to a register in
  1249. RCLASS requires an extra scratch or immediate register. Return the class
  1250. needed for the immediate register. */
  1251. static reg_class_t
  1252. alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
  1253. enum machine_mode mode, secondary_reload_info *sri)
  1254. {
  1255. enum reg_class rclass = (enum reg_class) rclass_i;
  1256. /* Loading and storing HImode or QImode values to and from memory
  1257. usually requires a scratch register. */
  1258. if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
  1259. {
  1260. if (any_memory_operand (x, mode))
  1261. {
  1262. if (in_p)
  1263. {
  1264. if (!aligned_memory_operand (x, mode))
  1265. sri->icode = direct_optab_handler (reload_in_optab, mode);
  1266. }
  1267. else
  1268. sri->icode = direct_optab_handler (reload_out_optab, mode);
  1269. return NO_REGS;
  1270. }
  1271. }
  1272. /* We also cannot do integral arithmetic into FP regs, as might result
  1273. from register elimination into a DImode fp register. */
  1274. if (rclass == FLOAT_REGS)
  1275. {
  1276. if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
  1277. return GENERAL_REGS;
  1278. if (in_p && INTEGRAL_MODE_P (mode)
  1279. && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
  1280. return GENERAL_REGS;
  1281. }
  1282. return NO_REGS;
  1283. }
  1284. /* Subfunction of the following function. Update the flags of any MEM
  1285. found in part of X. */
  1286. static int
  1287. alpha_set_memflags_1 (rtx *xp, void *data)
  1288. {
  1289. rtx x = *xp, orig = (rtx) data;
  1290. if (!MEM_P (x))
  1291. return 0;
  1292. MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
  1293. MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
  1294. MEM_READONLY_P (x) = MEM_READONLY_P (orig);
  1295. /* Sadly, we cannot use alias sets because the extra aliasing
  1296. produced by the AND interferes. Given that two-byte quantities
  1297. are the only thing we would be able to differentiate anyway,
  1298. there does not seem to be any point in convoluting the early
  1299. out of the alias check. */
  1300. return -1;
  1301. }
  1302. /* Given SEQ, which is an INSN list, look for any MEMs in either
  1303. a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
  1304. volatile flags from REF into each of the MEMs found. If REF is not
  1305. a MEM, don't do anything. */
  1306. void
  1307. alpha_set_memflags (rtx seq, rtx ref)
  1308. {
  1309. rtx insn;
  1310. if (!MEM_P (ref))
  1311. return;
  1312. /* This is only called from alpha.md, after having had something
  1313. generated from one of the insn patterns. So if everything is
  1314. zero, the pattern is already up-to-date. */
  1315. if (!MEM_VOLATILE_P (ref)
  1316. && !MEM_NOTRAP_P (ref)
  1317. && !MEM_READONLY_P (ref))
  1318. return;
  1319. for (insn = seq; insn; insn = NEXT_INSN (insn))
  1320. if (INSN_P (insn))
  1321. for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref);
  1322. else
  1323. gcc_unreachable ();
  1324. }
  1325. static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
  1326. int, bool);
  1327. /* Internal routine for alpha_emit_set_const to check for N or below insns.
  1328. If NO_OUTPUT is true, then we only check to see if N insns are possible,
  1329. and return pc_rtx if successful. */
  1330. static rtx
  1331. alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
  1332. HOST_WIDE_INT c, int n, bool no_output)
  1333. {
  1334. HOST_WIDE_INT new_const;
  1335. int i, bits;
  1336. /* Use a pseudo if highly optimizing and still generating RTL. */
  1337. rtx subtarget
  1338. = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
  1339. rtx temp, insn;
  1340. /* If this is a sign-extended 32-bit constant, we can do this in at most
  1341. three insns, so do it if we have enough insns left. We always have
  1342. a sign-extended 32-bit constant when compiling on a narrow machine. */
  1343. if (HOST_BITS_PER_WIDE_INT != 64
  1344. || c >> 31 == -1 || c >> 31 == 0)
  1345. {
  1346. HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
  1347. HOST_WIDE_INT tmp1 = c - low;
  1348. HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
  1349. HOST_WIDE_INT extra = 0;
  1350. /* If HIGH will be interpreted as negative but the constant is
  1351. positive, we must adjust it to do two ldha insns. */
  1352. if ((high & 0x8000) != 0 && c >= 0)
  1353. {
  1354. extra = 0x4000;
  1355. tmp1 -= 0x40000000;
  1356. high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
  1357. }
  1358. if (c == low || (low == 0 && extra == 0))
  1359. {
  1360. /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
  1361. but that meant that we can't handle INT_MIN on 32-bit machines
  1362. (like NT/Alpha), because we recurse indefinitely through
  1363. emit_move_insn to gen_movdi. So instead, since we know exactly
  1364. what we want, create it explicitly. */
  1365. if (no_output)
  1366. return pc_rtx;
  1367. if (target == NULL)
  1368. target = gen_reg_rtx (mode);
  1369. emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
  1370. return target;
  1371. }
  1372. else if (n >= 2 + (extra != 0))
  1373. {
  1374. if (no_output)
  1375. return pc_rtx;
  1376. if (!can_create_pseudo_p ())
  1377. {
  1378. emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
  1379. temp = target;
  1380. }
  1381. else
  1382. temp = copy_to_suggested_reg (GEN_INT (high << 16),
  1383. subtarget, mode);
  1384. /* As of 2002-02-23, addsi3 is only available when not optimizing.
  1385. This means that if we go through expand_binop, we'll try to
  1386. generate extensions, etc, which will require new pseudos, which
  1387. will fail during some split phases. The SImode add patterns
  1388. still exist, but are not named. So build the insns by hand. */
  1389. if (extra != 0)
  1390. {
  1391. if (! subtarget)
  1392. subtarget = gen_reg_rtx (mode);
  1393. insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
  1394. insn = gen_rtx_SET (VOIDmode, subtarget, insn);
  1395. emit_insn (insn);
  1396. temp = subtarget;
  1397. }
  1398. if (target == NULL)
  1399. target = gen_reg_rtx (mode);
  1400. insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
  1401. insn = gen_rtx_SET (VOIDmode, target, insn);
  1402. emit_insn (insn);
  1403. return target;
  1404. }
  1405. }
  1406. /* If we couldn't do it that way, try some other methods. But if we have
  1407. no instructions left, don't bother. Likewise, if this is SImode and
  1408. we can't make pseudos, we can't do anything since the expand_binop
  1409. and expand_unop calls will widen and try to make pseudos. */
  1410. if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
  1411. return 0;
  1412. /* Next, see if we can load a related constant and then shift and possibly
  1413. negate it to get the constant we want. Try this once each increasing
  1414. numbers of insns. */
  1415. for (i = 1; i < n; i++)
  1416. {
  1417. /* First, see if minus some low bits, we've an easy load of
  1418. high bits. */
  1419. new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
  1420. if (new_const != 0)
  1421. {
  1422. temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
  1423. if (temp)
  1424. {
  1425. if (no_output)
  1426. return temp;
  1427. return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
  1428. target, 0, OPTAB_WIDEN);
  1429. }
  1430. }
  1431. /* Next try complementing. */
  1432. temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
  1433. if (temp)
  1434. {
  1435. if (no_output)
  1436. return temp;
  1437. return expand_unop (mode, one_cmpl_optab, temp, target, 0);
  1438. }
  1439. /* Next try to form a constant and do a left shift. We can do this
  1440. if some low-order bits are zero; the exact_log2 call below tells
  1441. us that information. The bits we are shifting out could be any
  1442. value, but here we'll just try the 0- and sign-extended forms of
  1443. the constant. To try to increase the chance of having the same
  1444. constant in more than one insn, start at the highest number of
  1445. bits to shift, but try all possibilities in case a ZAPNOT will
  1446. be useful. */
  1447. bits = exact_log2 (c & -c);
  1448. if (bits > 0)
  1449. for (; bits > 0; bits--)
  1450. {
  1451. new_const = c >> bits;
  1452. temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
  1453. if (!temp && c < 0)
  1454. {
  1455. new_const = (unsigned HOST_WIDE_INT)c >> bits;
  1456. temp = alpha_emit_set_const (subtarget, mode, new_const,
  1457. i, no_output);
  1458. }
  1459. if (temp)
  1460. {
  1461. if (no_output)
  1462. return temp;
  1463. return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
  1464. target, 0, OPTAB_WIDEN);
  1465. }
  1466. }
  1467. /* Now try high-order zero bits. Here we try the shifted-in bits as
  1468. all zero and all ones. Be careful to avoid shifting outside the
  1469. mode and to avoid shifting outside the host wide int…

Large files files are truncated, but you can click here to view the full file