aboutsummaryrefslogtreecommitdiff
path: root/4.2.0
diff options
context:
space:
mode:
authorMike Frysinger <vapier@gentoo.org>2007-05-24 02:48:30 +0000
committerMike Frysinger <vapier@gentoo.org>2007-05-24 02:48:30 +0000
commitad7c44fc22a259441b1a3a2972b52fd8d723794a (patch)
treee2386bffdbcbe1e09188f76ce2575f4b117146ef /4.2.0
parentfix from upstream to work with newer binutils #175900 (diff)
downloadgcc-patches-ad7c44fc22a259441b1a3a2972b52fd8d723794a.tar.gz
gcc-patches-ad7c44fc22a259441b1a3a2972b52fd8d723794a.tar.bz2
gcc-patches-ad7c44fc22a259441b1a3a2972b52fd8d723794a.zip
add updated mips patches #178957
Diffstat (limited to '4.2.0')
-rw-r--r--4.2.0/gentoo/90_all_mips-add-march-r10k.patch390
-rw-r--r--4.2.0/gentoo/91_all_mips-ip28_cache_barriers-v4.patch340
-rw-r--r--4.2.0/gentoo/README.history4
3 files changed, 734 insertions, 0 deletions
diff --git a/4.2.0/gentoo/90_all_mips-add-march-r10k.patch b/4.2.0/gentoo/90_all_mips-add-march-r10k.patch
new file mode 100644
index 0000000..1c15bc3
--- /dev/null
+++ b/4.2.0/gentoo/90_all_mips-add-march-r10k.patch
@@ -0,0 +1,390 @@
+--- gcc-4.2.0/gcc/config/mips/10000.md
++++ gcc-4.2.0/gcc/config/mips/10000.md
+@@ -0,0 +1,248 @@
++;; VR1x000 pipeline description.
++;; Copyright (C) 2005, 2006 Free Software Foundation, Inc.
++;;
++;; This file is part of GCC.
++
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 2, or (at your
++;; option) any later version.
++
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
++;; License for more details.
++
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING. If not, write to the
++;; Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
++;; MA 02110-1301, USA.
++
++
++;; This file overrides parts of generic.md. It is derived from the
++;; old define_function_unit description.
++
++
++
++;; R12K/R14K/R16K are derivatives of R10K, thus copy its description
++;; until specific tuning for each is added
++
++
++;; R10000 has int queue, fp queue, address queue
++(define_automaton "r10k_int, r10k_fp, r10k_addr")
++
++;; R10000 has 2 integer ALUs, fp-adder and fp-multiplier, load/store
++(define_cpu_unit "r10k_alu1" "r10k_int")
++(define_cpu_unit "r10k_alu2" "r10k_int")
++(define_cpu_unit "r10k_fpadd" "r10k_fp")
++(define_cpu_unit "r10k_fpmpy" "r10k_fp")
++(define_cpu_unit "r10k_loadstore" "r10k_addr")
++
++;; R10000 has separate fp-div and fp-sqrt units as well and these can
++;; execute in parallel, however their issue & completion logic is shared
++;; by the fp-multiplier
++(define_cpu_unit "r10k_fpdiv" "r10k_fp")
++(define_cpu_unit "r10k_fpsqrt" "r10k_fp")
++
++
++
++
++;; loader
++(define_insn_reservation "r10k_load" 2
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (eq_attr "type" "load,prefetch,prefetchx"))
++ "r10k_loadstore")
++
++(define_insn_reservation "r10k_store" 0
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (eq_attr "type" "store,fpstore,fpidxstore"))
++ "r10k_loadstore")
++
++(define_insn_reservation "r10k_fpload" 3
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (eq_attr "type" "fpload,fpidxload"))
++ "r10k_loadstore")
++
++
++
++
++;; Integer add/sub + logic ops, and mf/mt hi/lo can be done by alu1 or alu2
++;; Miscellaneous arith goes here too (this is a guess)
++(define_insn_reservation "r10k_arith" 1
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (eq_attr "type" "arith,mfhilo,mthilo,slt,clz,const,nop,trap"))
++ "r10k_alu1 | r10k_alu2")
++
++
++
++
++;; ALU1 handles shifts, branch eval, and condmove
++;;
++;; Brancher is separate, but part of ALU1, but can only
++;; do one branch per cycle (needs implementing??)
++;;
++;; jump, call - unsure if brancher handles these too (added for now)
++(define_insn_reservation "r10k_shift" 1
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (eq_attr "type" "shift,branch,jump,call"))
++ "r10k_alu1")
++
++(define_insn_reservation "r10k_int_cmove" 1
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "condmove")
++ (eq_attr "mode" "SI,DI")))
++ "r10k_alu1")
++
++
++
++
++;; Coprocessor Moves
++;; mtc1/dmtc1 are handled by ALU1
++;; mfc1/dmfc1 are handled by the fp-multiplier
++(define_insn_reservation "r10k_mt_xfer" 3
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "xfer")
++ (not (match_operand 0 "fpr_operand"))))
++ "r10k_alu1")
++
++(define_insn_reservation "r10k_mf_xfer" 2
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "xfer")
++ (match_operand 0 "fpr_operand")))
++ "r10k_fpmpy")
++
++
++
++
++;; Only ALU2 does int multiplications and divisions
++;; R10K allows an int insn using register Lo to be issued
++;; one cycle earlier than an insn using register Hi for
++;; the insns below, however, we skip on doing this
++;; for now until correct usage of lo_operand() is figured
++;; out.
++;;
++;; Divides keep ALU2 busy, but this isn't expressed here (I think...?)
++(define_insn_reservation "r10k_imul_single" 6
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "imul,imul3,imadd")
++ (eq_attr "mode" "SI")))
++ "r10k_alu2 * 6")
++
++(define_insn_reservation "r10k_imul_double" 10
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "imul,imul3,imadd")
++ (eq_attr "mode" "DI")))
++ "r10k_alu2 * 10")
++
++(define_insn_reservation "r10k_idiv_single" 35
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "idiv")
++ (eq_attr "mode" "SI")))
++ "r10k_alu2 * 35")
++
++(define_insn_reservation "r10k_idiv_double" 67
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "idiv")
++ (eq_attr "mode" "DI")))
++ "r10k_alu2 * 67")
++
++
++
++
++;; FP add/sub, mul, abs value, neg, comp, & moves
++(define_insn_reservation "r10k_fp_miscadd" 2
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (eq_attr "type" "fadd,fabs,fneg,fcmp"))
++ "r10k_fpadd")
++
++(define_insn_reservation "r10k_fp_miscmul" 2
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (eq_attr "type" "fmul,fmove"))
++ "r10k_fpmpy")
++
++(define_insn_reservation "r10k_fp_cmove" 2
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "condmove")
++ (eq_attr "mode" "SF,DF")))
++ "r10k_fpmpy")
++
++
++
++
++;; fcvt.s.[wl] has latency 4, repeat 2
++;; All other fcvt have latency 2, repeat 1
++(define_insn_reservation "r10k_fcvt_single" 4
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "fcvt")
++ (eq_attr "cnv_mode" "I2S")))
++ "r10k_fpadd * 2")
++
++(define_insn_reservation "r10k_fcvt_other" 2
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "fcvt")
++ (eq_attr "cnv_mode" "!I2S")))
++ "r10k_fpadd")
++
++
++
++
++;; fmadd - Runs through fp-adder first, then fp-multiplier
++;;
++;; The latency for fmadd is 2 cycles if the result is used
++;; by another fmadd instruction
++(define_insn_reservation "r10k_fmadd" 4
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (eq_attr "type" "fmadd"))
++ "r10k_fpadd, r10k_fpmpy")
++
++(define_bypass 2 "r10k_fmadd" "r10k_fmadd")
++
++
++
++
++;; fp Divisions & square roots
++(define_insn_reservation "r10k_fdiv_single" 12
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "fdiv,frdiv")
++ (eq_attr "mode" "SF")))
++ "r10k_fpdiv * 14")
++
++(define_insn_reservation "r10k_fdiv_double" 19
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "fdiv,frdiv")
++ (eq_attr "mode" "DF")))
++ "r10k_fpdiv * 21")
++
++(define_insn_reservation "r10k_fsqrt_single" 18
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "fsqrt")
++ (eq_attr "mode" "SF")))
++ "r10k_fpsqrt * 20")
++
++(define_insn_reservation "r10k_fsqrt_double" 33
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "fsqrt")
++ (eq_attr "mode" "DF")))
++ "r10k_fpsqrt * 35")
++
++(define_insn_reservation "r10k_frsqrt_single" 30
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "frsqrt")
++ (eq_attr "mode" "SF")))
++ "r10k_fpsqrt * 20")
++
++(define_insn_reservation "r10k_frsqrt_double" 52
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (and (eq_attr "type" "frsqrt")
++ (eq_attr "mode" "DF")))
++ "r10k_fpsqrt * 35")
++
++
++
++
++;; Unknown/multi (this is a guess)
++(define_insn_reservation "r10k_unknown" 1
++ (and (eq_attr "cpu" "r10000,r12000,r14000,r16000")
++ (eq_attr "type" "unknown,multi"))
++ "r10k_alu1 + r10k_alu2")
++
+--- gcc-4.2.0/gcc/config/mips/mips.c
++++ gcc-4.2.0/gcc/config/mips/mips.c
+@@ -736,6 +736,10 @@ const struct mips_cpu_info mips_cpu_info
+
+ /* MIPS IV */
+ { "r8000", PROCESSOR_R8000, 4 },
++ { "r10000", PROCESSOR_R10000, 4 },
++ { "r12000", PROCESSOR_R12000, 4 },
++ { "r14000", PROCESSOR_R14000, 4 },
++ { "r16000", PROCESSOR_R16000, 4 },
+ { "vr5000", PROCESSOR_R5000, 4 },
+ { "vr5400", PROCESSOR_R5400, 4 },
+ { "vr5500", PROCESSOR_R5500, 4 },
+@@ -1016,6 +1020,58 @@ static struct mips_rtx_cost_data const m
+ 1, /* branch_cost */
+ 4 /* memory_latency */
+ },
++ { /* R10000 */
++ COSTS_N_INSNS (2), /* fp_add */
++ COSTS_N_INSNS (2), /* fp_mult_sf */
++ COSTS_N_INSNS (2), /* fp_mult_df */
++ COSTS_N_INSNS (12), /* fp_div_sf */
++ COSTS_N_INSNS (19), /* fp_div_df */
++ COSTS_N_INSNS (6), /* int_mult_si */
++ COSTS_N_INSNS (10), /* int_mult_di */
++ COSTS_N_INSNS (35), /* int_div_si */
++ COSTS_N_INSNS (67), /* int_div_di */
++ 1, /* branch_cost */
++ 4 /* memory_latency */
++ },
++ { /* R12000 */
++ COSTS_N_INSNS (2), /* fp_add */
++ COSTS_N_INSNS (2), /* fp_mult_sf */
++ COSTS_N_INSNS (2), /* fp_mult_df */
++ COSTS_N_INSNS (12), /* fp_div_sf */
++ COSTS_N_INSNS (19), /* fp_div_df */
++ COSTS_N_INSNS (6), /* int_mult_si */
++ COSTS_N_INSNS (10), /* int_mult_di */
++ COSTS_N_INSNS (35), /* int_div_si */
++ COSTS_N_INSNS (67), /* int_div_di */
++ 1, /* branch_cost */
++ 4 /* memory_latency */
++ },
++ { /* R14000 */
++ COSTS_N_INSNS (2), /* fp_add */
++ COSTS_N_INSNS (2), /* fp_mult_sf */
++ COSTS_N_INSNS (2), /* fp_mult_df */
++ COSTS_N_INSNS (12), /* fp_div_sf */
++ COSTS_N_INSNS (19), /* fp_div_df */
++ COSTS_N_INSNS (6), /* int_mult_si */
++ COSTS_N_INSNS (10), /* int_mult_di */
++ COSTS_N_INSNS (35), /* int_div_si */
++ COSTS_N_INSNS (67), /* int_div_di */
++ 1, /* branch_cost */
++ 4 /* memory_latency */
++ },
++ { /* R16000 */
++ COSTS_N_INSNS (2), /* fp_add */
++ COSTS_N_INSNS (2), /* fp_mult_sf */
++ COSTS_N_INSNS (2), /* fp_mult_df */
++ COSTS_N_INSNS (12), /* fp_div_sf */
++ COSTS_N_INSNS (19), /* fp_div_df */
++ COSTS_N_INSNS (6), /* int_mult_si */
++ COSTS_N_INSNS (10), /* int_mult_di */
++ COSTS_N_INSNS (35), /* int_div_si */
++ COSTS_N_INSNS (67), /* int_div_di */
++ 1, /* branch_cost */
++ 4 /* memory_latency */
++ },
+ { /* SB1 */
+ /* These costs are the same as the SB-1A below. */
+ COSTS_N_INSNS (4), /* fp_add */
+@@ -9938,6 +9994,12 @@ mips_issue_rate (void)
+ {
+ switch (mips_tune)
+ {
++ case PROCESSOR_R10000:
++ case PROCESSOR_R12000:
++ case PROCESSOR_R14000:
++ case PROCESSOR_R16000:
++ return 4;
++
+ case PROCESSOR_R4130:
+ case PROCESSOR_R5400:
+ case PROCESSOR_R5500:
+--- gcc-4.2.0/gcc/config/mips/mips.h
++++ gcc-4.2.0/gcc/config/mips/mips.h
+@@ -57,6 +57,10 @@ enum processor_type {
+ PROCESSOR_R7000,
+ PROCESSOR_R8000,
+ PROCESSOR_R9000,
++ PROCESSOR_R10000,
++ PROCESSOR_R12000,
++ PROCESSOR_R14000,
++ PROCESSOR_R16000,
+ PROCESSOR_SB1,
+ PROCESSOR_SB1A,
+ PROCESSOR_SR71000,
+@@ -209,6 +213,10 @@ extern const struct mips_rtx_cost_data *
+ #define TARGET_MIPS5500 (mips_arch == PROCESSOR_R5500)
+ #define TARGET_MIPS7000 (mips_arch == PROCESSOR_R7000)
+ #define TARGET_MIPS9000 (mips_arch == PROCESSOR_R9000)
++#define TARGET_MIPS10000 (mips_arch == PROCESSOR_R10000)
++#define TARGET_MIPS12000 (mips_arch == PROCESSOR_R12000)
++#define TARGET_MIPS14000 (mips_arch == PROCESSOR_R14000)
++#define TARGET_MIPS16000 (mips_arch == PROCESSOR_R16000)
+ #define TARGET_SB1 (mips_arch == PROCESSOR_SB1 \
+ || mips_arch == PROCESSOR_SB1A)
+ #define TARGET_SR71K (mips_arch == PROCESSOR_SR71000)
+@@ -225,6 +233,10 @@ extern const struct mips_rtx_cost_data *
+ #define TUNE_MIPS6000 (mips_tune == PROCESSOR_R6000)
+ #define TUNE_MIPS7000 (mips_tune == PROCESSOR_R7000)
+ #define TUNE_MIPS9000 (mips_tune == PROCESSOR_R9000)
++#define TUNE_MIPS10000 (mips_tune == PROCESSOR_R10000)
++#define TUNE_MIPS12000 (mips_tune == PROCESSOR_R12000)
++#define TUNE_MIPS14000 (mips_tune == PROCESSOR_R14000)
++#define TUNE_MIPS16000 (mips_tune == PROCESSOR_R16000)
+ #define TUNE_SB1 (mips_tune == PROCESSOR_SB1 \
+ || mips_tune == PROCESSOR_SB1A)
+
+--- gcc-4.2.0/gcc/config/mips/mips.md
++++ gcc-4.2.0/gcc/config/mips/mips.md
+@@ -341,7 +341,7 @@
+ ;; Attribute describing the processor. This attribute must match exactly
+ ;; with the processor_type enumeration in mips.h.
+ (define_attr "cpu"
+- "r3000,4kc,4kp,5kc,5kf,20kc,24k,24kx,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,sb1,sb1a,sr71000"
++ "r3000,4kc,4kp,5kc,5kf,20kc,24k,24kx,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,r10000,r12000,r14000,r16000,sb1,sb1a,sr71000"
+ (const (symbol_ref "mips_tune")))
+
+ ;; The type of hardware hazard associated with this instruction.
+@@ -590,6 +590,7 @@
+ (include "6000.md")
+ (include "7000.md")
+ (include "9000.md")
++(include "10000.md")
+ (include "sb1.md")
+ (include "sr71k.md")
+ (include "generic.md")
diff --git a/4.2.0/gentoo/91_all_mips-ip28_cache_barriers-v4.patch b/4.2.0/gentoo/91_all_mips-ip28_cache_barriers-v4.patch
new file mode 100644
index 0000000..2db0c33
--- /dev/null
+++ b/4.2.0/gentoo/91_all_mips-ip28_cache_barriers-v4.patch
@@ -0,0 +1,340 @@
+--- gcc-4.2.0/gcc/config/mips/mips.c
++++ gcc-4.2.0/gcc/config/mips/mips.c
+@@ -256,6 +256,9 @@ static const char *const mips_fp_conditi
+ MIPS_FP_CONDITIONS (STRINGIFY)
+ };
+
++/* R10K Cache Barrier Functions */
++#include "r10k-cacheb.c"
++
+ /* A function to save or store a register. The first argument is the
+ register and the second is the stack slot. */
+ typedef void (*mips_save_restore_fn) (rtx, rtx);
+@@ -9079,6 +9082,10 @@ mips_reorg (void)
+ if (TUNE_MIPS4130 && TARGET_VR4130_ALIGN)
+ vr4130_align_insns ();
+ }
++ if (TARGET_R10K_SPECEX)
++ {
++ r10k_insert_cache_barriers ();
++ }
+ }
+
+ /* This function does three things:
+--- gcc-4.2.0/gcc/config/mips/mips.opt
++++ gcc-4.2.0/gcc/config/mips/mips.opt
+@@ -220,3 +220,13 @@ Perform VR4130-specific alignment optimi
+ mxgot
+ Target Report Var(TARGET_XGOT)
+ Lift restrictions on GOT size
++
++mr10k-cache-barrier=
++Target Report Joined UInteger Var(TARGET_R10K_SPECEX)
++-mr10k-cache-barrier[=1|2] Generate cache barriers for SGI Indigo2/O2 R10k
++
++mr10k-cache-barrier
++Target Undocumented Var(TARGET_R10K_SPECEX) VarExists
++
++mip28-cache-barrier
++Target Undocumented Var(TARGET_R10K_SPECEX) VarExists
+--- gcc-4.2.0/gcc/config/mips/r10k-cacheb.c
++++ gcc-4.2.0/gcc/config/mips/r10k-cacheb.c
+@@ -0,0 +1,298 @@
++/* Subroutines used for MIPS code generation: generate cache-barriers
++ for SiliconGraphics IP28 and IP32/R10000 kernel-code.
++ Copyright (C) 2005,2006 peter fuerst, pf@net.alphadv.de.
++
++This file is intended to become part of GCC.
++
++This file is free software; you can redistribute it and/or modify it
++under the terms of the GNU General Public License as published
++by the Free Software Foundation; either version 2, or (at your
++option) any later version.
++
++This file is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING. If not, write to the
++Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
++MA 02110-1301 USA. */
++
++
++#define ASM_R10K_CACHE_BARRIER "cache 0x14,0($sp)"
++
++static int is_stack_pointer (rtx *x, void *data);
++static int check_p_mem_expr (rtx *memx, void *data);
++static int check_p_pattern_for_store (rtx *body, void *data);
++static int strmatch (const char *txt, const char *match);
++static int check_insn_for_store (int state, rtx insn);
++static int bb_insert_store_cache_barrier (rtx head, rtx nxtb);
++static int scan_1_bb_for_store (rtx head, rtx end);
++static int r10k_insert_cache_barriers (void);
++
++
++/* Check, whether an instruction is a possibly harmful store instruction,
++ i.e. a store which might cause damage, if speculatively executed. */
++
++/* Return truth value whether the expression `*memx' instantiates
++ (mem:M (not (stackpointer_address or constant))). */
++
++static int
++is_stack_pointer (rtx *x, void *data)
++{
++ return (*x == stack_pointer_rtx);
++}
++
++static int
++check_p_mem_expr (rtx *memx, void *data)
++{
++ if (!MEM_P (*memx) || for_each_rtx (memx, is_stack_pointer, 0))
++ return 0;
++
++ /* Stores/Loads to/from constant addresses can be considered
++ harmless, since:
++ 1) the address is always valid, even when taken speculatively.
++ 2a) the location is (hopefully) never used as a dma-target, thus
++ there is no danger of cache-inconsistency.
++ 2b) uncached loads/stores are guaranteed to be non-speculative. */
++ if ( CONSTANT_P(XEXP (*memx, 0)) )
++ return 0;
++
++ return 1;
++}
++
++/* Return truth value whether we find (set (mem:M (non_stackpointer_address)
++ ...)) in instruction-pattern `body'.
++ Here we assume, that addressing with the stackpointer accesses neither
++ uncached-aliased nor invalid memory.
++ (May be, this applies to the global pointer and frame pointer also,
++ but its saver not to assume it. And probably it's not worthwile to
++ regard these registers)
++
++ Speculative loads from invalid addresses also cause bus errors...
++ So check for (set (reg:M ...) (mem:M (non_stackpointer_address)))
++ too, unless there is an enhanced bus-error handler. */
++
++static int
++check_p_pattern_for_store (rtx *body, void *data)
++{
++ if (*body && GET_CODE (*body) == SET)
++ {
++ /* Cache-barriers for SET_SRC may be requested as well. */
++ if (!(TARGET_R10K_SPECEX & 2))
++ body = &SET_DEST(*body);
++
++ if (for_each_rtx (body, check_p_mem_expr, 0))
++ return 1;
++
++ /* Don't traverse sub-expressions again. */
++ return -1;
++ }
++ return 0;
++}
++
++static int
++strmatch (const char *txt, const char *match)
++{
++ return !strncmp(txt, match, strlen (match));
++}
++
++/* Check for (ins (set (mem:M (dangerous_address)) ...)) or end of the
++ current basic block in instruction `insn'.
++ `state': (internal) recursion-counter and delayslot-flag
++ Criteria to recognize end-of/next basic-block are reduplicated here
++ from final_scan_insn.
++ return >0: `insn' is critical.
++ return <0: `insn' is at end of current basic-block.
++ return 0: `insn' can be ignored. */
++
++static int
++check_insn_for_store (int state, rtx insn)
++{
++ rtx body;
++
++ if (INSN_DELETED_P (insn))
++ return 0;
++
++ if (LABEL_P (insn))
++ return -1;
++
++ if (CALL_P (insn) || JUMP_P (insn) || NONJUMP_INSN_P (insn))
++ {
++ body = PATTERN (insn);
++ if (GET_CODE (body) == SEQUENCE)
++ {
++ /* A delayed-branch sequence. */
++ rtx insq;
++ FOR_EACH_SUBINSN(insq, insn)
++ if (! INSN_DELETED_P (insq))
++ {
++ /* |1: delay-slot completely contained in sequence. */
++ if (check_insn_for_store (8+state|1, insq) > 0)
++ return 1;
++ }
++ /* Following a (conditional) branch sequence, we have a new
++ basic block. */
++ if (JUMP_P (SEQ_BEGIN(insn)))
++ return -1;
++ /* Handle a call sequence like a conditional branch sequence. */
++ if (CALL_P (SEQ_BEGIN(insn)))
++ return -1;
++ }
++ if (GET_CODE (body) == PARALLEL)
++ if (for_each_rtx (&body, check_p_pattern_for_store, 0))
++ return 1;
++
++ /* Now, only a `simple' INSN or JUMP_INSN remains to be checked. */
++ if (NONJUMP_INSN_P (insn))
++ {
++ /* Since we don't know what's inside, we must take inline
++ assembly to be dangerous. */
++ if (GET_CODE (body) == ASM_INPUT)
++ {
++ const char *t = XSTR (body, 0);
++ if (t && !strmatch(t, ASM_R10K_CACHE_BARRIER))
++ return 1;
++ }
++
++ if (check_p_pattern_for_store (&body, 0) > 0)
++ return 1;
++ }
++ /* Handle a CALL_INSN instruction like a conditional branch. */
++ if (JUMP_P (insn) || CALL_P (insn))
++ {
++ /* Following a (conditional) branch, we have a new basic block. */
++ /* But check insn(s) in delay-slot first. If we could know in
++ advance that this jump is in `.reorder' mode, where gas will
++ insert a `nop' into the delay-slot, we could skip this test.
++ Since we don't know, always assume `.noreorder', sometimes
++ emitting a cache-barrier, that isn't needed. */
++ /* But if we are here recursively, already checking a (pseudo-)
++ delay-slot, we are done. */
++ if ( !(state & 1) )
++ for (insn = NEXT_INSN (insn); insn; insn = NEXT_INSN (insn))
++ {
++ if (LABEL_P (insn) || CALL_P (insn) || JUMP_P (insn))
++ /* Not in delay-slot at all. */
++ break;
++
++ if (NONJUMP_INSN_P (insn))
++ {
++ if (GET_CODE (PATTERN (insn)) == SEQUENCE)
++ /* Not in delay-slot at all. */
++ break;
++
++ if (check_insn_for_store (8+state|1, insn) > 0)
++ return 1;
++ /* We're done anyway. */
++ break;
++ }
++ /* skip NOTE,... */;
++ }
++ return -1;
++ }
++ }
++ return 0;
++}
++
++
++/* Scan a basic block, starting with `insn', for a possibly harmful store
++ instruction. If found, output a cache barrier at the start of this
++ block. */
++
++static int
++bb_insert_store_cache_barrier (rtx head, rtx nxtb)
++{
++ rtx insn = head;
++
++ if (!insn || insn == nxtb)
++ return 0;
++
++ while ((insn = NEXT_INSN (insn)) && insn != nxtb)
++ {
++ int found;
++
++ if (NOTE_INSN_BASIC_BLOCK_P(insn)) /* See scan_1_bb_for_store() */
++ break;
++
++ found = check_insn_for_store (0, insn);
++ if (found < 0)
++ break;
++ if (found > 0)
++ {
++ /* found critical store instruction */
++ insn = gen_rtx_ASM_INPUT (VOIDmode,
++ ASM_R10K_CACHE_BARRIER "\t"
++ ASM_COMMENT_START " Cache Barrier");
++ /* Here we rely on the assumption, that an explicit delay-slot
++ - if any - is already embedded (in a sequence) in 'head'! */
++ insn = emit_insn_after (insn, head);
++ return 1;
++ }
++ }
++ return 0;
++}
++
++
++/* Scan one basic block for a possibly harmful store instruction.
++ If found, insert a cache barrier at the start of this block,
++ return number of inserted cache_barriers. */
++
++static int
++scan_1_bb_for_store (rtx head, rtx end)
++{
++ rtx nxtb;
++ int count;
++ gcc_assert (head);
++ gcc_assert (end);
++
++ /* Note: 'end' is not necessarily reached from 'head' (hidden in
++ SEQUENCE, PARALLEL), but 'nxtb' is. */
++ nxtb = NEXT_INSN (end);
++
++ /* Each basic block starts with zero or more CODE_LABEL(s), followed
++ by one NOTE_INSN_BASIC_BLOCK.
++ Note: bb_head may equal next_insn(bb_end) already ! */
++ while (head && head != nxtb && LABEL_P (head))
++ head = NEXT_INSN (head);
++
++ if (!head || head == nxtb)
++ return 0;
++
++ /* Handle the basic block itself, at most up to next CALL_INSN. */
++ count = bb_insert_store_cache_barrier (head, nxtb);
++
++ /* 1) Handle any CALL_INSN instruction like a conditional branch.
++ 2) There may be "basic blocks" in the list, which are no basic blocks
++ at all. (containing CODE_LABELs in the body or gathering several
++ other basic blocks (e.g. bb5 containing bb6,bb7,bb8)). */
++
++ while ((head = NEXT_INSN (head)) && head != nxtb)
++ {
++ if (INSN_DELETED_P (head))
++ continue;
++
++ /* Later we'll be called again for this bb on its own. */
++ if (NOTE_INSN_BASIC_BLOCK_P(head))
++ break;
++
++ if (CALL_P (SEQ_BEGIN (head)) || LABEL_P (head))
++ count += bb_insert_store_cache_barrier (head, nxtb);
++ }
++ return count;
++}
++
++static int
++r10k_insert_cache_barriers (void)
++{
++ if (TARGET_R10K_SPECEX)
++ {
++ basic_block bb;
++
++ FOR_EACH_BB (bb)
++ if (0 <= bb->index)
++ scan_1_bb_for_store (BB_HEAD (bb), BB_END (bb));
++ }
++ return 0;
++}
diff --git a/4.2.0/gentoo/README.history b/4.2.0/gentoo/README.history
index dea341c..52205ff 100644
--- a/4.2.0/gentoo/README.history
+++ b/4.2.0/gentoo/README.history
@@ -1,3 +1,7 @@
+1.1 [pending]
+ + 90_all_mips-add-march-r10k.patch
+ + 91_all_mips-ip28_cache_barriers-v4.patch
+
1.0 18.05.2007
+ 00_all_gcc-4.1-alpha-mieee-default.patch
+ 00_all_gcc-trampolinewarn.patch