toolchain/gcc/patches/linaro/995-fa526.patch

   1 --- a/gcc/config/arm/arm-cores.def
   2 +++ b/gcc/config/arm/arm-cores.def
   3 @@ -74,6 +74,7 @@ ARM_CORE("strongarm",     strongarm,  4,
   4  ARM_CORE("strongarm110",  strongarm110,        4,                   FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
   5  ARM_CORE("strongarm1100", strongarm1100, 4,                 FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
   6  ARM_CORE("strongarm1110", strongarm1110, 4,                 FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
   7 +ARM_CORE("fa526",         fa526,        4,                               FL_LDSCHED, fastmul)
   8
   9  /* V4T Architecture Processors */
  10  ARM_CORE("arm7tdmi",      arm7tdmi,    4T,     FL_CO_PROC          , fastmul)
  11 --- a/gcc/config/arm/arm.md
  12 +++ b/gcc/config/arm/arm.md
  13 @@ -435,7 +435,7 @@
  14
  15  (define_attr "generic_sched" "yes,no"
  16    (const (if_then_else
  17 -          (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4")
  18 +          (ior (eq_attr "tune" "fa526,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4")
  19                (eq_attr "tune_cortexr4" "yes"))
  20            (const_string "no")
  21            (const_string "yes"))))
  22 @@ -467,6 +467,7 @@
  23  (include "arm1020e.md")
  24  (include "arm1026ejs.md")
  25  (include "arm1136jfs.md")
  26 +(include "fa526.md")
  27  (include "cortex-a5.md")
  28  (include "cortex-a8.md")
  29  (include "cortex-a9.md")
  30 --- a/gcc/config/arm/arm-tune.md
  31 +++ b/gcc/config/arm/arm-tune.md
  32 @@ -1,5 +1,5 @@
  33  ;; -*- buffer-read-only: t -*-
  34  ;; Generated automatically by gentune.sh from arm-cores.def
  35  (define_attr "tune"
  36 -       "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0"
  37 +       "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0"
  38         (const (symbol_ref "((enum attr_tune) arm_tune)")))
  39 --- a/gcc/config/arm/bpabi.h
  40 +++ b/gcc/config/arm/bpabi.h
  41 @@ -52,7 +52,8 @@
  42  /* The BPABI integer comparison routines return { -1, 0, 1 }.  */
  43  #define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI
  44
  45 -#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4:--fix-v4bx}"
  46 +#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*\
  47 +|march=armv4|mcpu=fa526:--fix-v4bx}"
  48
  49  #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5|mcpu=cortex-a8|mcpu=cortex-a9:%{!r:--be8}}}"
  50
  51 --- /dev/null
  52 +++ b/gcc/config/arm/fa526.md
  53 @@ -0,0 +1,161 @@
  54 +;; Faraday FA526 Pipeline Description
  55 +;; Copyright (C) 2010 Free Software Foundation, Inc.
  56 +;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
  57 +
  58 +;; This file is part of GCC.
  59 +;;
  60 +;; GCC is free software; you can redistribute it and/or modify it under
  61 +;; the terms of the GNU General Public License as published by the Free
  62 +;; Software Foundation; either version 3, or (at your option) any later
  63 +;; version.
  64 +;;
  65 +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  66 +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
  67 +;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  68 +;; for more details.
  69 +;;
  70 +;; You should have received a copy of the GNU General Public License
  71 +;; along with GCC; see the file COPYING3.  If not see
  72 +;; <http://www.gnu.org/licenses/>.  */
  73 +
  74 +;; These descriptions are based on the information contained in the
  75 +;; FA526 Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
  76 +;;
  77 +;; Modeled pipeline characteristics:
  78 +;; LD -> any use: latency = 3 (2 cycle penalty).
  79 +;; ALU -> any use: latency = 2 (1 cycle penalty).
  80 +
  81 +;; This automaton provides a pipeline description for the Faraday
  82 +;; FA526 core.
  83 +;;
  84 +;; The model given here assumes that the condition for all conditional
  85 +;; instructions is "true", i.e., that all of the instructions are
  86 +;; actually executed.
  87 +
  88 +(define_automaton "fa526")
  89 +
  90 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  91 +;; Pipelines
  92 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  93 +
  94 +;; There is a single pipeline
  95 +;;
  96 +;;   The ALU pipeline has fetch, decode, execute, memory, and
  97 +;;   write stages.  We only need to model the execute, memory and write
  98 +;;   stages.
  99 +
 100 +;;      S      E      M      W
 101 +
 102 +(define_cpu_unit "fa526_core" "fa526")
 103 +
 104 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 105 +;; ALU Instructions
 106 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 107 +
 108 +;; ALU instructions require two cycles to execute, and use the ALU
 109 +;; pipeline in each of the three stages.  The results are available
 110 +;; after the execute stage stage has finished.
 111 +;;
 112 +;; If the destination register is the PC, the pipelines are stalled
 113 +;; for several cycles.  That case is not modeled here.
 114 +
 115 +;; ALU operations
 116 +(define_insn_reservation "526_alu_op" 1
 117 + (and (eq_attr "tune" "fa526")
 118 +      (eq_attr "type" "alu"))
 119 + "fa526_core")
 120 +
 121 +(define_insn_reservation "526_alu_shift_op" 2
 122 + (and (eq_attr "tune" "fa526")
 123 +      (eq_attr "type" "alu_shift,alu_shift_reg"))
 124 + "fa526_core")
 125 +
 126 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 127 +;; Multiplication Instructions
 128 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 129 +
 130 +(define_insn_reservation "526_mult1" 2
 131 + (and (eq_attr "tune" "fa526")
 132 +      (eq_attr "insn" "smlalxy,smulxy,smlaxy,smlalxy"))
 133 + "fa526_core")
 134 +
 135 +(define_insn_reservation "526_mult2" 5
 136 + (and (eq_attr "tune" "fa526")
 137 +      (eq_attr "insn" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\
 138 +                       umlals,smulls,smlals,smlawx"))
 139 + "fa526_core*4")
 140 +
 141 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 142 +;; Load/Store Instructions
 143 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 144 +
 145 +;; The models for load/store instructions do not accurately describe
 146 +;; the difference between operations with a base register writeback
 147 +;; (such as "ldm!").  These models assume that all memory references
 148 +;; hit in dcache.
 149 +
 150 +(define_insn_reservation "526_load1_op" 3
 151 + (and (eq_attr "tune" "fa526")
 152 +      (eq_attr "type" "load1,load_byte"))
 153 + "fa526_core")
 154 +
 155 +(define_insn_reservation "526_load2_op" 4
 156 + (and (eq_attr "tune" "fa526")
 157 +      (eq_attr "type" "load2"))
 158 + "fa526_core*2")
 159 +
 160 +(define_insn_reservation "526_load3_op" 5
 161 + (and (eq_attr "tune" "fa526")
 162 +      (eq_attr "type" "load3"))
 163 + "fa526_core*3")
 164 +
 165 +(define_insn_reservation "526_load4_op" 6
 166 + (and (eq_attr "tune" "fa526")
 167 +      (eq_attr "type" "load4"))
 168 + "fa526_core*4")
 169 +
 170 +(define_insn_reservation "526_store1_op" 0
 171 + (and (eq_attr "tune" "fa526")
 172 +      (eq_attr "type" "store1"))
 173 + "fa526_core")
 174 +
 175 +(define_insn_reservation "526_store2_op" 1
 176 + (and (eq_attr "tune" "fa526")
 177 +      (eq_attr "type" "store2"))
 178 + "fa526_core*2")
 179 +
 180 +(define_insn_reservation "526_store3_op" 2
 181 + (and (eq_attr "tune" "fa526")
 182 +      (eq_attr "type" "store3"))
 183 + "fa526_core*3")
 184 +
 185 +(define_insn_reservation "526_store4_op" 3
 186 + (and (eq_attr "tune" "fa526")
 187 +      (eq_attr "type" "store4"))
 188 + "fa526_core*4")
 189 +
 190 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 191 +;; Branch and Call Instructions
 192 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 193 +
 194 +;; Branch instructions are difficult to model accurately.  The FA526
 195 +;; core can predict most branches.  If the branch is predicted
 196 +;; correctly, and predicted early enough, the branch can be completely
 197 +;; eliminated from the instruction stream.  Some branches can
 198 +;; therefore appear to require zero cycle to execute.  We assume that
 199 +;; all branches are predicted correctly, and that the latency is
 200 +;; therefore the minimum value.
 201 +
 202 +(define_insn_reservation "526_branch_op" 0
 203 + (and (eq_attr "tune" "fa526")
 204 +      (eq_attr "type" "branch"))
 205 + "fa526_core")
 206 +
 207 +;; The latency for a call is actually the latency when the result is available.
 208 +;; i.e. R0 ready for int return value.  For most cases, the return value is set
 209 +;; by a mov instruction, which has 1 cycle latency.
 210 +(define_insn_reservation "526_call_op" 1
 211 + (and (eq_attr "tune" "fa526")
 212 +      (eq_attr "type" "call"))
 213 + "fa526_core")
 214 +
 215 --- a/gcc/config/arm/t-arm
 216 +++ b/gcc/config/arm/t-arm
 217 @@ -24,6 +24,7 @@ MD_INCLUDES=  $(srcdir)/config/arm/arm-t
 218                 $(srcdir)/config/arm/arm1020e.md \
 219                 $(srcdir)/config/arm/arm1026ejs.md \
 220                 $(srcdir)/config/arm/arm1136jfs.md \
 221 +               $(srcdir)/config/arm/fa526.md \
 222                 $(srcdir)/config/arm/arm926ejs.md \
 223                 $(srcdir)/config/arm/cirrus.md \
 224                 $(srcdir)/config/arm/fpa.md \
 225 --- a/gcc/config/arm/t-arm-elf
 226 +++ b/gcc/config/arm/t-arm-elf
 227 @@ -36,6 +36,10 @@ MULTILIB_DIRNAMES    = arm thumb
 228  MULTILIB_EXCEPTIONS  =
 229  MULTILIB_MATCHES     =
 230
 231 +#MULTILIB_OPTIONS     += mcpu=fa526
 232 +#MULTILIB_DIRNAMES    += fa526
 233 +#MULTILIB_EXCEPTIONS  += *mthumb*/*mcpu=fa526
 234 +
 235  #MULTILIB_OPTIONS      += march=armv7
 236  #MULTILIB_DIRNAMES     += thumb2
 237  #MULTILIB_EXCEPTIONS   += march=armv7* marm/*march=armv7*
 238 @@ -52,6 +56,7 @@ MULTILIB_MATCHES     =
 239  MULTILIB_OPTIONS       += mfloat-abi=hard
 240  MULTILIB_DIRNAMES      += fpu
 241  MULTILIB_EXCEPTIONS    += *mthumb/*mfloat-abi=hard*
 242 +MULTILIB_EXCEPTIONS    += *mcpu=fa526/*mfloat-abi=hard*
 243
 244  # MULTILIB_OPTIONS    += mcpu=ep9312
 245  # MULTILIB_DIRNAMES   += ep9312
 246 --- a/gcc/doc/invoke.texi
 247 +++ b/gcc/doc/invoke.texi
 248 @@ -9900,7 +9900,8 @@ assembly code.  Permissible names are: @
 249  @samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-m4}, @samp{cortex-m3},
 250  @samp{cortex-m1},
 251  @samp{cortex-m0},
 252 -@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
 253 +@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312},
 254 +@samp{fa526}.
 255
 256  @item -mtune=@var{name}
 257  @opindex mtune