diff options
author | Mike Frysinger <vapier@gentoo.org> | 2007-05-24 02:48:30 +0000 |
---|---|---|
committer | Mike Frysinger <vapier@gentoo.org> | 2007-05-24 02:48:30 +0000 |
commit | ad7c44fc22a259441b1a3a2972b52fd8d723794a (patch) | |
tree | e2386bffdbcbe1e09188f76ce2575f4b117146ef /4.2.0 | |
parent | fix from upstream to work with newer binutils #175900 (diff) | |
download | gcc-patches-ad7c44fc22a259441b1a3a2972b52fd8d723794a.tar.gz gcc-patches-ad7c44fc22a259441b1a3a2972b52fd8d723794a.tar.bz2 gcc-patches-ad7c44fc22a259441b1a3a2972b52fd8d723794a.zip |
add updated mips patches #178957
Diffstat (limited to '4.2.0')
-rw-r--r-- | 4.2.0/gentoo/90_all_mips-add-march-r10k.patch | 390 | ||||
-rw-r--r-- | 4.2.0/gentoo/91_all_mips-ip28_cache_barriers-v4.patch | 340 | ||||
-rw-r--r-- | 4.2.0/gentoo/README.history | 4 |
3 files changed, 734 insertions, 0 deletions
diff --git a/4.2.0/gentoo/90_all_mips-add-march-r10k.patch b/4.2.0/gentoo/90_all_mips-add-march-r10k.patch new file mode 100644 index 0000000..1c15bc3 --- /dev/null +++ b/4.2.0/gentoo/90_all_mips-add-march-r10k.patch @@ -0,0 +1,390 @@ +--- gcc-4.2.0/gcc/config/mips/10000.md ++++ gcc-4.2.0/gcc/config/mips/10000.md +@@ -0,0 +1,248 @@ ++;; VR1x000 pipeline description. ++;; Copyright (C) 2005, 2006 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 2, or (at your ++;; option) any later version. ++ ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING. If not, write to the ++;; Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, ++;; MA 02110-1301, USA. ++ ++ ++;; This file overrides parts of generic.md. It is derived from the ++;; old define_function_unit description. ++ ++ ++ ++;; R12K/R14K/R16K are derivatives of R10K, thus copy its description ++;; until specific tuning for each is added ++ ++ ++;; R10000 has int queue, fp queue, address queue ++(define_automaton "r10k_int, r10k_fp, r10k_addr") ++ ++;; R10000 has 2 integer ALUs, fp-adder and fp-multiplier, load/store ++(define_cpu_unit "r10k_alu1" "r10k_int") ++(define_cpu_unit "r10k_alu2" "r10k_int") ++(define_cpu_unit "r10k_fpadd" "r10k_fp") ++(define_cpu_unit "r10k_fpmpy" "r10k_fp") ++(define_cpu_unit "r10k_loadstore" "r10k_addr") ++ ++;; R10000 has separate fp-div and fp-sqrt units as well and these can ++;; execute in parallel, however their issue & completion logic is shared ++;; by the fp-multiplier ++(define_cpu_unit "r10k_fpdiv" "r10k_fp") ++(define_cpu_unit "r10k_fpsqrt" "r10k_fp") ++ ++ ++ ++ ++;; loader ++(define_insn_reservation "r10k_load" 2 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (eq_attr "type" "load,prefetch,prefetchx")) ++ "r10k_loadstore") ++ ++(define_insn_reservation "r10k_store" 0 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (eq_attr "type" "store,fpstore,fpidxstore")) ++ "r10k_loadstore") ++ ++(define_insn_reservation "r10k_fpload" 3 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (eq_attr "type" "fpload,fpidxload")) ++ "r10k_loadstore") ++ ++ ++ ++ ++;; Integer add/sub + logic ops, and mf/mt hi/lo can be done by alu1 or alu2 ++;; Miscellaneous arith goes here too (this is a guess) ++(define_insn_reservation "r10k_arith" 1 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (eq_attr "type" "arith,mfhilo,mthilo,slt,clz,const,nop,trap")) ++ "r10k_alu1 | r10k_alu2") ++ ++ ++ ++ ++;; ALU1 handles shifts, branch eval, and condmove ++;; ++;; Brancher is separate, but part of ALU1, but can only ++;; do one branch per cycle (needs implementing??) ++;; ++;; jump, call - unsure if brancher handles these too (added for now) ++(define_insn_reservation "r10k_shift" 1 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (eq_attr "type" "shift,branch,jump,call")) ++ "r10k_alu1") ++ ++(define_insn_reservation "r10k_int_cmove" 1 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "condmove") ++ (eq_attr "mode" "SI,DI"))) ++ "r10k_alu1") ++ ++ ++ ++ ++;; Coprocessor Moves ++;; mtc1/dmtc1 are handled by ALU1 ++;; mfc1/dmfc1 are handled by the fp-multiplier ++(define_insn_reservation "r10k_mt_xfer" 3 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "xfer") ++ (not (match_operand 0 "fpr_operand")))) ++ "r10k_alu1") ++ ++(define_insn_reservation "r10k_mf_xfer" 2 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "xfer") ++ (match_operand 0 "fpr_operand"))) ++ "r10k_fpmpy") ++ ++ ++ ++ ++;; Only ALU2 does int multiplications and divisions ++;; R10K allows an int insn using register Lo to be issued ++;; one cycle earlier than an insn using register Hi for ++;; the insns below, however, we skip on doing this ++;; for now until correct usage of lo_operand() is figured ++;; out. ++;; ++;; Divides keep ALU2 busy, but this isn't expressed here (I think...?) ++(define_insn_reservation "r10k_imul_single" 6 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "imul,imul3,imadd") ++ (eq_attr "mode" "SI"))) ++ "r10k_alu2 * 6") ++ ++(define_insn_reservation "r10k_imul_double" 10 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "imul,imul3,imadd") ++ (eq_attr "mode" "DI"))) ++ "r10k_alu2 * 10") ++ ++(define_insn_reservation "r10k_idiv_single" 35 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "idiv") ++ (eq_attr "mode" "SI"))) ++ "r10k_alu2 * 35") ++ ++(define_insn_reservation "r10k_idiv_double" 67 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "idiv") ++ (eq_attr "mode" "DI"))) ++ "r10k_alu2 * 67") ++ ++ ++ ++ ++;; FP add/sub, mul, abs value, neg, comp, & moves ++(define_insn_reservation "r10k_fp_miscadd" 2 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (eq_attr "type" "fadd,fabs,fneg,fcmp")) ++ "r10k_fpadd") ++ ++(define_insn_reservation "r10k_fp_miscmul" 2 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (eq_attr "type" "fmul,fmove")) ++ "r10k_fpmpy") ++ ++(define_insn_reservation "r10k_fp_cmove" 2 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "condmove") ++ (eq_attr "mode" "SF,DF"))) ++ "r10k_fpmpy") ++ ++ ++ ++ ++;; fcvt.s.[wl] has latency 4, repeat 2 ++;; All other fcvt have latency 2, repeat 1 ++(define_insn_reservation "r10k_fcvt_single" 4 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "fcvt") ++ (eq_attr "cnv_mode" "I2S"))) ++ "r10k_fpadd * 2") ++ ++(define_insn_reservation "r10k_fcvt_other" 2 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "fcvt") ++ (eq_attr "cnv_mode" "!I2S"))) ++ "r10k_fpadd") ++ ++ ++ ++ ++;; fmadd - Runs through fp-adder first, then fp-multiplier ++;; ++;; The latency for fmadd is 2 cycles if the result is used ++;; by another fmadd instruction ++(define_insn_reservation "r10k_fmadd" 4 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (eq_attr "type" "fmadd")) ++ "r10k_fpadd, r10k_fpmpy") ++ ++(define_bypass 2 "r10k_fmadd" "r10k_fmadd") ++ ++ ++ ++ ++;; fp Divisions & square roots ++(define_insn_reservation "r10k_fdiv_single" 12 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "fdiv,frdiv") ++ (eq_attr "mode" "SF"))) ++ "r10k_fpdiv * 14") ++ ++(define_insn_reservation "r10k_fdiv_double" 19 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "fdiv,frdiv") ++ (eq_attr "mode" "DF"))) ++ "r10k_fpdiv * 21") ++ ++(define_insn_reservation "r10k_fsqrt_single" 18 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "fsqrt") ++ (eq_attr "mode" "SF"))) ++ "r10k_fpsqrt * 20") ++ ++(define_insn_reservation "r10k_fsqrt_double" 33 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "fsqrt") ++ (eq_attr "mode" "DF"))) ++ "r10k_fpsqrt * 35") ++ ++(define_insn_reservation "r10k_frsqrt_single" 30 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "frsqrt") ++ (eq_attr "mode" "SF"))) ++ "r10k_fpsqrt * 20") ++ ++(define_insn_reservation "r10k_frsqrt_double" 52 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (and (eq_attr "type" "frsqrt") ++ (eq_attr "mode" "DF"))) ++ "r10k_fpsqrt * 35") ++ ++ ++ ++ ++;; Unknown/multi (this is a guess) ++(define_insn_reservation "r10k_unknown" 1 ++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") ++ (eq_attr "type" "unknown,multi")) ++ "r10k_alu1 + r10k_alu2") ++ +--- gcc-4.2.0/gcc/config/mips/mips.c ++++ gcc-4.2.0/gcc/config/mips/mips.c +@@ -736,6 +736,10 @@ const struct mips_cpu_info mips_cpu_info + + /* MIPS IV */ + { "r8000", PROCESSOR_R8000, 4 }, ++ { "r10000", PROCESSOR_R10000, 4 }, ++ { "r12000", PROCESSOR_R12000, 4 }, ++ { "r14000", PROCESSOR_R14000, 4 }, ++ { "r16000", PROCESSOR_R16000, 4 }, + { "vr5000", PROCESSOR_R5000, 4 }, + { "vr5400", PROCESSOR_R5400, 4 }, + { "vr5500", PROCESSOR_R5500, 4 }, +@@ -1016,6 +1020,58 @@ static struct mips_rtx_cost_data const m + 1, /* branch_cost */ + 4 /* memory_latency */ + }, ++ { /* R10000 */ ++ COSTS_N_INSNS (2), /* fp_add */ ++ COSTS_N_INSNS (2), /* fp_mult_sf */ ++ COSTS_N_INSNS (2), /* fp_mult_df */ ++ COSTS_N_INSNS (12), /* fp_div_sf */ ++ COSTS_N_INSNS (19), /* fp_div_df */ ++ COSTS_N_INSNS (6), /* int_mult_si */ ++ COSTS_N_INSNS (10), /* int_mult_di */ ++ COSTS_N_INSNS (35), /* int_div_si */ ++ COSTS_N_INSNS (67), /* int_div_di */ ++ 1, /* branch_cost */ ++ 4 /* memory_latency */ ++ }, ++ { /* R12000 */ ++ COSTS_N_INSNS (2), /* fp_add */ ++ COSTS_N_INSNS (2), /* fp_mult_sf */ ++ COSTS_N_INSNS (2), /* fp_mult_df */ ++ COSTS_N_INSNS (12), /* fp_div_sf */ ++ COSTS_N_INSNS (19), /* fp_div_df */ ++ COSTS_N_INSNS (6), /* int_mult_si */ ++ COSTS_N_INSNS (10), /* int_mult_di */ ++ COSTS_N_INSNS (35), /* int_div_si */ ++ COSTS_N_INSNS (67), /* int_div_di */ ++ 1, /* branch_cost */ ++ 4 /* memory_latency */ ++ }, ++ { /* R14000 */ ++ COSTS_N_INSNS (2), /* fp_add */ ++ COSTS_N_INSNS (2), /* fp_mult_sf */ ++ COSTS_N_INSNS (2), /* fp_mult_df */ ++ COSTS_N_INSNS (12), /* fp_div_sf */ ++ COSTS_N_INSNS (19), /* fp_div_df */ ++ COSTS_N_INSNS (6), /* int_mult_si */ ++ COSTS_N_INSNS (10), /* int_mult_di */ ++ COSTS_N_INSNS (35), /* int_div_si */ ++ COSTS_N_INSNS (67), /* int_div_di */ ++ 1, /* branch_cost */ ++ 4 /* memory_latency */ ++ }, ++ { /* R16000 */ ++ COSTS_N_INSNS (2), /* fp_add */ ++ COSTS_N_INSNS (2), /* fp_mult_sf */ ++ COSTS_N_INSNS (2), /* fp_mult_df */ ++ COSTS_N_INSNS (12), /* fp_div_sf */ ++ COSTS_N_INSNS (19), /* fp_div_df */ ++ COSTS_N_INSNS (6), /* int_mult_si */ ++ COSTS_N_INSNS (10), /* int_mult_di */ ++ COSTS_N_INSNS (35), /* int_div_si */ ++ COSTS_N_INSNS (67), /* int_div_di */ ++ 1, /* branch_cost */ ++ 4 /* memory_latency */ ++ }, + { /* SB1 */ + /* These costs are the same as the SB-1A below. */ + COSTS_N_INSNS (4), /* fp_add */ +@@ -9938,6 +9994,12 @@ mips_issue_rate (void) + { + switch (mips_tune) + { ++ case PROCESSOR_R10000: ++ case PROCESSOR_R12000: ++ case PROCESSOR_R14000: ++ case PROCESSOR_R16000: ++ return 4; ++ + case PROCESSOR_R4130: + case PROCESSOR_R5400: + case PROCESSOR_R5500: +--- gcc-4.2.0/gcc/config/mips/mips.h ++++ gcc-4.2.0/gcc/config/mips/mips.h +@@ -57,6 +57,10 @@ enum processor_type { + PROCESSOR_R7000, + PROCESSOR_R8000, + PROCESSOR_R9000, ++ PROCESSOR_R10000, ++ PROCESSOR_R12000, ++ PROCESSOR_R14000, ++ PROCESSOR_R16000, + PROCESSOR_SB1, + PROCESSOR_SB1A, + PROCESSOR_SR71000, +@@ -209,6 +213,10 @@ extern const struct mips_rtx_cost_data * + #define TARGET_MIPS5500 (mips_arch == PROCESSOR_R5500) + #define TARGET_MIPS7000 (mips_arch == PROCESSOR_R7000) + #define TARGET_MIPS9000 (mips_arch == PROCESSOR_R9000) ++#define TARGET_MIPS10000 (mips_arch == PROCESSOR_R10000) ++#define TARGET_MIPS12000 (mips_arch == PROCESSOR_R12000) ++#define TARGET_MIPS14000 (mips_arch == PROCESSOR_R14000) ++#define TARGET_MIPS16000 (mips_arch == PROCESSOR_R16000) + #define TARGET_SB1 (mips_arch == PROCESSOR_SB1 \ + || mips_arch == PROCESSOR_SB1A) + #define TARGET_SR71K (mips_arch == PROCESSOR_SR71000) +@@ -225,6 +233,10 @@ extern const struct mips_rtx_cost_data * + #define TUNE_MIPS6000 (mips_tune == PROCESSOR_R6000) + #define TUNE_MIPS7000 (mips_tune == PROCESSOR_R7000) + #define TUNE_MIPS9000 (mips_tune == PROCESSOR_R9000) ++#define TUNE_MIPS10000 (mips_tune == PROCESSOR_R10000) ++#define TUNE_MIPS12000 (mips_tune == PROCESSOR_R12000) ++#define TUNE_MIPS14000 (mips_tune == PROCESSOR_R14000) ++#define TUNE_MIPS16000 (mips_tune == PROCESSOR_R16000) + #define TUNE_SB1 (mips_tune == PROCESSOR_SB1 \ + || mips_tune == PROCESSOR_SB1A) + +--- gcc-4.2.0/gcc/config/mips/mips.md ++++ gcc-4.2.0/gcc/config/mips/mips.md +@@ -341,7 +341,7 @@ + ;; Attribute describing the processor. This attribute must match exactly + ;; with the processor_type enumeration in mips.h. + (define_attr "cpu" +- "r3000,4kc,4kp,5kc,5kf,20kc,24k,24kx,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,sb1,sb1a,sr71000" ++ "r3000,4kc,4kp,5kc,5kf,20kc,24k,24kx,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,r10000,r12000,r14000,r16000,sb1,sb1a,sr71000" + (const (symbol_ref "mips_tune"))) + + ;; The type of hardware hazard associated with this instruction. +@@ -590,6 +590,7 @@ + (include "6000.md") + (include "7000.md") + (include "9000.md") ++(include "10000.md") + (include "sb1.md") + (include "sr71k.md") + (include "generic.md") diff --git a/4.2.0/gentoo/91_all_mips-ip28_cache_barriers-v4.patch b/4.2.0/gentoo/91_all_mips-ip28_cache_barriers-v4.patch new file mode 100644 index 0000000..2db0c33 --- /dev/null +++ b/4.2.0/gentoo/91_all_mips-ip28_cache_barriers-v4.patch @@ -0,0 +1,340 @@ +--- gcc-4.2.0/gcc/config/mips/mips.c ++++ gcc-4.2.0/gcc/config/mips/mips.c +@@ -256,6 +256,9 @@ static const char *const mips_fp_conditi + MIPS_FP_CONDITIONS (STRINGIFY) + }; + ++/* R10K Cache Barrier Functions */ ++#include "r10k-cacheb.c" ++ + /* A function to save or store a register. The first argument is the + register and the second is the stack slot. */ + typedef void (*mips_save_restore_fn) (rtx, rtx); +@@ -9079,6 +9082,10 @@ mips_reorg (void) + if (TUNE_MIPS4130 && TARGET_VR4130_ALIGN) + vr4130_align_insns (); + } ++ if (TARGET_R10K_SPECEX) ++ { ++ r10k_insert_cache_barriers (); ++ } + } + + /* This function does three things: +--- gcc-4.2.0/gcc/config/mips/mips.opt ++++ gcc-4.2.0/gcc/config/mips/mips.opt +@@ -220,3 +220,13 @@ Perform VR4130-specific alignment optimi + mxgot + Target Report Var(TARGET_XGOT) + Lift restrictions on GOT size ++ ++mr10k-cache-barrier= ++Target Report Joined UInteger Var(TARGET_R10K_SPECEX) ++-mr10k-cache-barrier[=1|2] Generate cache barriers for SGI Indigo2/O2 R10k ++ ++mr10k-cache-barrier ++Target Undocumented Var(TARGET_R10K_SPECEX) VarExists ++ ++mip28-cache-barrier ++Target Undocumented Var(TARGET_R10K_SPECEX) VarExists +--- gcc-4.2.0/gcc/config/mips/r10k-cacheb.c ++++ gcc-4.2.0/gcc/config/mips/r10k-cacheb.c +@@ -0,0 +1,298 @@ ++/* Subroutines used for MIPS code generation: generate cache-barriers ++ for SiliconGraphics IP28 and IP32/R10000 kernel-code. ++ Copyright (C) 2005,2006 peter fuerst, pf@net.alphadv.de. ++ ++This file is intended to become part of GCC. ++ ++This file is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published ++by the Free Software Foundation; either version 2, or (at your ++option) any later version. ++ ++This file is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING. If not, write to the ++Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, ++MA 02110-1301 USA. */ ++ ++ ++#define ASM_R10K_CACHE_BARRIER "cache 0x14,0($sp)" ++ ++static int is_stack_pointer (rtx *x, void *data); ++static int check_p_mem_expr (rtx *memx, void *data); ++static int check_p_pattern_for_store (rtx *body, void *data); ++static int strmatch (const char *txt, const char *match); ++static int check_insn_for_store (int state, rtx insn); ++static int bb_insert_store_cache_barrier (rtx head, rtx nxtb); ++static int scan_1_bb_for_store (rtx head, rtx end); ++static int r10k_insert_cache_barriers (void); ++ ++ ++/* Check, whether an instruction is a possibly harmful store instruction, ++ i.e. a store which might cause damage, if speculatively executed. */ ++ ++/* Return truth value whether the expression `*memx' instantiates ++ (mem:M (not (stackpointer_address or constant))). */ ++ ++static int ++is_stack_pointer (rtx *x, void *data) ++{ ++ return (*x == stack_pointer_rtx); ++} ++ ++static int ++check_p_mem_expr (rtx *memx, void *data) ++{ ++ if (!MEM_P (*memx) || for_each_rtx (memx, is_stack_pointer, 0)) ++ return 0; ++ ++ /* Stores/Loads to/from constant addresses can be considered ++ harmless, since: ++ 1) the address is always valid, even when taken speculatively. ++ 2a) the location is (hopefully) never used as a dma-target, thus ++ there is no danger of cache-inconsistency. ++ 2b) uncached loads/stores are guaranteed to be non-speculative. */ ++ if ( CONSTANT_P(XEXP (*memx, 0)) ) ++ return 0; ++ ++ return 1; ++} ++ ++/* Return truth value whether we find (set (mem:M (non_stackpointer_address) ++ ...)) in instruction-pattern `body'. ++ Here we assume, that addressing with the stackpointer accesses neither ++ uncached-aliased nor invalid memory. ++ (May be, this applies to the global pointer and frame pointer also, ++ but its saver not to assume it. And probably it's not worthwile to ++ regard these registers) ++ ++ Speculative loads from invalid addresses also cause bus errors... ++ So check for (set (reg:M ...) (mem:M (non_stackpointer_address))) ++ too, unless there is an enhanced bus-error handler. */ ++ ++static int ++check_p_pattern_for_store (rtx *body, void *data) ++{ ++ if (*body && GET_CODE (*body) == SET) ++ { ++ /* Cache-barriers for SET_SRC may be requested as well. */ ++ if (!(TARGET_R10K_SPECEX & 2)) ++ body = &SET_DEST(*body); ++ ++ if (for_each_rtx (body, check_p_mem_expr, 0)) ++ return 1; ++ ++ /* Don't traverse sub-expressions again. */ ++ return -1; ++ } ++ return 0; ++} ++ ++static int ++strmatch (const char *txt, const char *match) ++{ ++ return !strncmp(txt, match, strlen (match)); ++} ++ ++/* Check for (ins (set (mem:M (dangerous_address)) ...)) or end of the ++ current basic block in instruction `insn'. ++ `state': (internal) recursion-counter and delayslot-flag ++ Criteria to recognize end-of/next basic-block are reduplicated here ++ from final_scan_insn. ++ return >0: `insn' is critical. ++ return <0: `insn' is at end of current basic-block. ++ return 0: `insn' can be ignored. */ ++ ++static int ++check_insn_for_store (int state, rtx insn) ++{ ++ rtx body; ++ ++ if (INSN_DELETED_P (insn)) ++ return 0; ++ ++ if (LABEL_P (insn)) ++ return -1; ++ ++ if (CALL_P (insn) || JUMP_P (insn) || NONJUMP_INSN_P (insn)) ++ { ++ body = PATTERN (insn); ++ if (GET_CODE (body) == SEQUENCE) ++ { ++ /* A delayed-branch sequence. */ ++ rtx insq; ++ FOR_EACH_SUBINSN(insq, insn) ++ if (! INSN_DELETED_P (insq)) ++ { ++ /* |1: delay-slot completely contained in sequence. */ ++ if (check_insn_for_store (8+state|1, insq) > 0) ++ return 1; ++ } ++ /* Following a (conditional) branch sequence, we have a new ++ basic block. */ ++ if (JUMP_P (SEQ_BEGIN(insn))) ++ return -1; ++ /* Handle a call sequence like a conditional branch sequence. */ ++ if (CALL_P (SEQ_BEGIN(insn))) ++ return -1; ++ } ++ if (GET_CODE (body) == PARALLEL) ++ if (for_each_rtx (&body, check_p_pattern_for_store, 0)) ++ return 1; ++ ++ /* Now, only a `simple' INSN or JUMP_INSN remains to be checked. */ ++ if (NONJUMP_INSN_P (insn)) ++ { ++ /* Since we don't know what's inside, we must take inline ++ assembly to be dangerous. */ ++ if (GET_CODE (body) == ASM_INPUT) ++ { ++ const char *t = XSTR (body, 0); ++ if (t && !strmatch(t, ASM_R10K_CACHE_BARRIER)) ++ return 1; ++ } ++ ++ if (check_p_pattern_for_store (&body, 0) > 0) ++ return 1; ++ } ++ /* Handle a CALL_INSN instruction like a conditional branch. */ ++ if (JUMP_P (insn) || CALL_P (insn)) ++ { ++ /* Following a (conditional) branch, we have a new basic block. */ ++ /* But check insn(s) in delay-slot first. If we could know in ++ advance that this jump is in `.reorder' mode, where gas will ++ insert a `nop' into the delay-slot, we could skip this test. ++ Since we don't know, always assume `.noreorder', sometimes ++ emitting a cache-barrier, that isn't needed. */ ++ /* But if we are here recursively, already checking a (pseudo-) ++ delay-slot, we are done. */ ++ if ( !(state & 1) ) ++ for (insn = NEXT_INSN (insn); insn; insn = NEXT_INSN (insn)) ++ { ++ if (LABEL_P (insn) || CALL_P (insn) || JUMP_P (insn)) ++ /* Not in delay-slot at all. */ ++ break; ++ ++ if (NONJUMP_INSN_P (insn)) ++ { ++ if (GET_CODE (PATTERN (insn)) == SEQUENCE) ++ /* Not in delay-slot at all. */ ++ break; ++ ++ if (check_insn_for_store (8+state|1, insn) > 0) ++ return 1; ++ /* We're done anyway. */ ++ break; ++ } ++ /* skip NOTE,... */; ++ } ++ return -1; ++ } ++ } ++ return 0; ++} ++ ++ ++/* Scan a basic block, starting with `insn', for a possibly harmful store ++ instruction. If found, output a cache barrier at the start of this ++ block. */ ++ ++static int ++bb_insert_store_cache_barrier (rtx head, rtx nxtb) ++{ ++ rtx insn = head; ++ ++ if (!insn || insn == nxtb) ++ return 0; ++ ++ while ((insn = NEXT_INSN (insn)) && insn != nxtb) ++ { ++ int found; ++ ++ if (NOTE_INSN_BASIC_BLOCK_P(insn)) /* See scan_1_bb_for_store() */ ++ break; ++ ++ found = check_insn_for_store (0, insn); ++ if (found < 0) ++ break; ++ if (found > 0) ++ { ++ /* found critical store instruction */ ++ insn = gen_rtx_ASM_INPUT (VOIDmode, ++ ASM_R10K_CACHE_BARRIER "\t" ++ ASM_COMMENT_START " Cache Barrier"); ++ /* Here we rely on the assumption, that an explicit delay-slot ++ - if any - is already embedded (in a sequence) in 'head'! */ ++ insn = emit_insn_after (insn, head); ++ return 1; ++ } ++ } ++ return 0; ++} ++ ++ ++/* Scan one basic block for a possibly harmful store instruction. ++ If found, insert a cache barrier at the start of this block, ++ return number of inserted cache_barriers. */ ++ ++static int ++scan_1_bb_for_store (rtx head, rtx end) ++{ ++ rtx nxtb; ++ int count; ++ gcc_assert (head); ++ gcc_assert (end); ++ ++ /* Note: 'end' is not necessarily reached from 'head' (hidden in ++ SEQUENCE, PARALLEL), but 'nxtb' is. */ ++ nxtb = NEXT_INSN (end); ++ ++ /* Each basic block starts with zero or more CODE_LABEL(s), followed ++ by one NOTE_INSN_BASIC_BLOCK. ++ Note: bb_head may equal next_insn(bb_end) already ! */ ++ while (head && head != nxtb && LABEL_P (head)) ++ head = NEXT_INSN (head); ++ ++ if (!head || head == nxtb) ++ return 0; ++ ++ /* Handle the basic block itself, at most up to next CALL_INSN. */ ++ count = bb_insert_store_cache_barrier (head, nxtb); ++ ++ /* 1) Handle any CALL_INSN instruction like a conditional branch. ++ 2) There may be "basic blocks" in the list, which are no basic blocks ++ at all. (containing CODE_LABELs in the body or gathering several ++ other basic blocks (e.g. bb5 containing bb6,bb7,bb8)). */ ++ ++ while ((head = NEXT_INSN (head)) && head != nxtb) ++ { ++ if (INSN_DELETED_P (head)) ++ continue; ++ ++ /* Later we'll be called again for this bb on its own. */ ++ if (NOTE_INSN_BASIC_BLOCK_P(head)) ++ break; ++ ++ if (CALL_P (SEQ_BEGIN (head)) || LABEL_P (head)) ++ count += bb_insert_store_cache_barrier (head, nxtb); ++ } ++ return count; ++} ++ ++static int ++r10k_insert_cache_barriers (void) ++{ ++ if (TARGET_R10K_SPECEX) ++ { ++ basic_block bb; ++ ++ FOR_EACH_BB (bb) ++ if (0 <= bb->index) ++ scan_1_bb_for_store (BB_HEAD (bb), BB_END (bb)); ++ } ++ return 0; ++} diff --git a/4.2.0/gentoo/README.history b/4.2.0/gentoo/README.history index dea341c..52205ff 100644 --- a/4.2.0/gentoo/README.history +++ b/4.2.0/gentoo/README.history @@ -1,3 +1,7 @@ +1.1 [pending] + + 90_all_mips-add-march-r10k.patch + + 91_all_mips-ip28_cache_barriers-v4.patch + 1.0 18.05.2007 + 00_all_gcc-4.1-alpha-mieee-default.patch + 00_all_gcc-trampolinewarn.patch |