lib/aarch64/misc_helpers.S

   1 /*
   2  * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved.
   3  *
   4  * SPDX-License-Identifier: BSD-3-Clause
   5  */
   6
   7 #include <arch.h>
   8 #include <asm_macros.S>
   9 #include <assert_macros.S>
  10 #include <xlat_tables_defs.h>
  11
  12 #if !ERROR_DEPRECATED
  13         .globl  get_afflvl_shift
  14         .globl  mpidr_mask_lower_afflvls
  15         .globl  eret
  16 #endif /* ERROR_DEPRECATED */
  17         .globl  smc
  18
  19         .globl  zero_normalmem
  20         .globl  zeromem
  21         .globl  zeromem16
  22         .globl  memcpy16
  23
  24         .globl  disable_mmu_el1
  25         .globl  disable_mmu_el3
  26         .globl  disable_mmu_icache_el1
  27         .globl  disable_mmu_icache_el3
  28
  29         .globl  fixup_gdt_reloc
  30
  31 #if SUPPORT_VFP
  32         .globl  enable_vfp
  33 #endif
  34
  35 #if !ERROR_DEPRECATED
  36 func get_afflvl_shift
  37         cmp     x0, #3
  38         cinc    x0, x0, eq
  39         mov     x1, #MPIDR_AFFLVL_SHIFT
  40         lsl     x0, x0, x1
  41         ret
  42 endfunc get_afflvl_shift
  43
  44 func mpidr_mask_lower_afflvls
  45         cmp     x1, #3
  46         cinc    x1, x1, eq
  47         mov     x2, #MPIDR_AFFLVL_SHIFT
  48         lsl     x2, x1, x2
  49         lsr     x0, x0, x2
  50         lsl     x0, x0, x2
  51         ret
  52 endfunc mpidr_mask_lower_afflvls
  53
  54
  55 func eret
  56         eret
  57 endfunc eret
  58 #endif /* ERROR_DEPRECATED */
  59
  60 func smc
  61         smc     #0
  62 endfunc smc
  63
  64 /* -----------------------------------------------------------------------
  65  * void zero_normalmem(void *mem, unsigned int length);
  66  *
  67  * Initialise a region in normal memory to 0. This functions complies with the
  68  * AAPCS and can be called from C code.
  69  *
  70  * NOTE: MMU must be enabled when using this function as it can only operate on
  71  *       normal memory. It is intended to be mainly used from C code when MMU
  72  *       is usually enabled.
  73  * -----------------------------------------------------------------------
  74  */
  75 .equ    zero_normalmem, zeromem_dczva
  76
  77 /* -----------------------------------------------------------------------
  78  * void zeromem(void *mem, unsigned int length);
  79  *
  80  * Initialise a region of device memory to 0. This functions complies with the
  81  * AAPCS and can be called from C code.
  82  *
  83  * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be
  84  *       used instead for faster zeroing.
  85  *
  86  * -----------------------------------------------------------------------
  87  */
  88 func zeromem
  89         /* x2 is the address past the last zeroed address */
  90         add     x2, x0, x1
  91         /*
  92          * Uses the fallback path that does not use DC ZVA instruction and
  93          * therefore does not need enabled MMU
  94          */
  95         b       .Lzeromem_dczva_fallback_entry
  96 endfunc zeromem
  97
  98 /* -----------------------------------------------------------------------
  99  * void zeromem_dczva(void *mem, unsigned int length);
 100  *
 101  * Fill a region of normal memory of size "length" in bytes with null bytes.
 102  * MMU must be enabled and the memory be of
 103  * normal type. This is because this function internally uses the DC ZVA
 104  * instruction, which generates an Alignment fault if used on any type of
 105  * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU
 106  * is disabled, all memory behaves like Device-nGnRnE memory (see section
 107  * D4.2.8), hence the requirement on the MMU being enabled.
 108  * NOTE: The code assumes that the block size as defined in DCZID_EL0
 109  *       register is at least 16 bytes.
 110  *
 111  * -----------------------------------------------------------------------
 112  */
 113 func zeromem_dczva
 114
 115         /*
 116          * The function consists of a series of loops that zero memory one byte
 117          * at a time, 16 bytes at a time or using the DC ZVA instruction to
 118          * zero aligned block of bytes, which is assumed to be more than 16.
 119          * In the case where the DC ZVA instruction cannot be used or if the
 120          * first 16 bytes loop would overflow, there is fallback path that does
 121          * not use DC ZVA.
 122          * Note: The fallback path is also used by the zeromem function that
 123          *       branches to it directly.
 124          *
 125          *              +---------+   zeromem_dczva
 126          *              |  entry  |
 127          *              +----+----+
 128          *                   |
 129          *                   v
 130          *              +---------+
 131          *              | checks  |>o-------+ (If any check fails, fallback)
 132          *              +----+----+         |
 133          *                   |              |---------------+
 134          *                   v              | Fallback path |
 135          *            +------+------+       |---------------+
 136          *            | 1 byte loop |       |
 137          *            +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end
 138          *                   |              |
 139          *                   v              |
 140          *           +-------+-------+      |
 141          *           | 16 bytes loop |      |
 142          *           +-------+-------+      |
 143          *                   |              |
 144          *                   v              |
 145          *            +------+------+ .Lzeromem_dczva_blocksize_aligned
 146          *            | DC ZVA loop |       |
 147          *            +------+------+       |
 148          *       +--------+  |              |
 149          *       |        |  |              |
 150          *       |        v  v              |
 151          *       |   +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned
 152          *       |   | 16 bytes loop |      |
 153          *       |   +-------+-------+      |
 154          *       |           |              |
 155          *       |           v              |
 156          *       |    +------+------+ .Lzeromem_dczva_final_1byte_aligned
 157          *       |    | 1 byte loop |       |
 158          *       |    +-------------+       |
 159          *       |           |              |
 160          *       |           v              |
 161          *       |       +---+--+           |
 162          *       |       | exit |           |
 163          *       |       +------+           |
 164          *       |                          |
 165          *       |           +--------------+    +------------------+ zeromem
 166          *       |           |  +----------------| zeromem function |
 167          *       |           |  |                +------------------+
 168          *       |           v  v
 169          *       |    +-------------+ .Lzeromem_dczva_fallback_entry
 170          *       |    | 1 byte loop |
 171          *       |    +------+------+
 172          *       |           |
 173          *       +-----------+
 174          */
 175
 176         /*
 177          * Readable names for registers
 178          *
 179          * Registers x0, x1 and x2 are also set by zeromem which
 180          * branches into the fallback path directly, so cursor, length and
 181          * stop_address should not be retargeted to other registers.
 182          */
 183         cursor       .req x0 /* Start address and then current address */
 184         length       .req x1 /* Length in bytes of the region to zero out */
 185         /* Reusing x1 as length is never used after block_mask is set */
 186         block_mask   .req x1 /* Bitmask of the block size read in DCZID_EL0 */
 187         stop_address .req x2 /* Address past the last zeroed byte */
 188         block_size   .req x3 /* Size of a block in bytes as read in DCZID_EL0 */
 189         tmp1         .req x4
 190         tmp2         .req x5
 191
 192 #if ENABLE_ASSERTIONS
 193         /*
 194          * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3)
 195          * register value and panic if the MMU is disabled.
 196          */
 197 #if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined(IMAGE_BL2) && BL2_AT_EL3)
 198         mrs     tmp1, sctlr_el3
 199 #else
 200         mrs     tmp1, sctlr_el1
 201 #endif
 202
 203         tst     tmp1, #SCTLR_M_BIT
 204         ASM_ASSERT(ne)
 205 #endif /* ENABLE_ASSERTIONS */
 206
 207         /* stop_address is the address past the last to zero */
 208         add     stop_address, cursor, length
 209
 210         /*
 211          * Get block_size = (log2(<block size>) >> 2) (see encoding of
 212          * dczid_el0 reg)
 213          */
 214         mrs     block_size, dczid_el0
 215
 216         /*
 217          * Select the 4 lowest bits and convert the extracted log2(<block size
 218          * in words>) to <block size in bytes>
 219          */
 220         ubfx    block_size, block_size, #0, #4
 221         mov     tmp2, #(1 << 2)
 222         lsl     block_size, tmp2, block_size
 223
 224 #if ENABLE_ASSERTIONS
 225         /*
 226          * Assumes block size is at least 16 bytes to avoid manual realignment
 227          * of the cursor at the end of the DCZVA loop.
 228          */
 229         cmp     block_size, #16
 230         ASM_ASSERT(hs)
 231 #endif
 232         /*
 233          * Not worth doing all the setup for a region less than a block and
 234          * protects against zeroing a whole block when the area to zero is
 235          * smaller than that. Also, as it is assumed that the block size is at
 236          * least 16 bytes, this also protects the initial aligning loops from
 237          * trying to zero 16 bytes when length is less than 16.
 238          */
 239         cmp     length, block_size
 240         b.lo    .Lzeromem_dczva_fallback_entry
 241
 242         /*
 243          * Calculate the bitmask of the block alignment. It will never
 244          * underflow as the block size is between 4 bytes and 2kB.
 245          * block_mask = block_size - 1
 246          */
 247         sub     block_mask, block_size, #1
 248
 249         /*
 250          * length alias should not be used after this point unless it is
 251          * defined as a register other than block_mask's.
 252          */
 253          .unreq length
 254
 255         /*
 256          * If the start address is already aligned to zero block size, go
 257          * straight to the cache zeroing loop. This is safe because at this
 258          * point, the length cannot be smaller than a block size.
 259          */
 260         tst     cursor, block_mask
 261         b.eq    .Lzeromem_dczva_blocksize_aligned
 262
 263         /*
 264          * Calculate the first block-size-aligned address. It is assumed that
 265          * the zero block size is at least 16 bytes. This address is the last
 266          * address of this initial loop.
 267          */
 268         orr     tmp1, cursor, block_mask
 269         add     tmp1, tmp1, #1
 270
 271         /*
 272          * If the addition overflows, skip the cache zeroing loops. This is
 273          * quite unlikely however.
 274          */
 275         cbz     tmp1, .Lzeromem_dczva_fallback_entry
 276
 277         /*
 278          * If the first block-size-aligned address is past the last address,
 279          * fallback to the simpler code.
 280          */
 281         cmp     tmp1, stop_address
 282         b.hi    .Lzeromem_dczva_fallback_entry
 283
 284         /*
 285          * If the start address is already aligned to 16 bytes, skip this loop.
 286          * It is safe to do this because tmp1 (the stop address of the initial
 287          * 16 bytes loop) will never be greater than the final stop address.
 288          */
 289         tst     cursor, #0xf
 290         b.eq    .Lzeromem_dczva_initial_1byte_aligned_end
 291
 292         /* Calculate the next address aligned to 16 bytes */
 293         orr     tmp2, cursor, #0xf
 294         add     tmp2, tmp2, #1
 295         /* If it overflows, fallback to the simple path (unlikely) */
 296         cbz     tmp2, .Lzeromem_dczva_fallback_entry
 297         /*
 298          * Next aligned address cannot be after the stop address because the
 299          * length cannot be smaller than 16 at this point.
 300          */
 301
 302         /* First loop: zero byte per byte */
 303 1:
 304         strb    wzr, [cursor], #1
 305         cmp     cursor, tmp2
 306         b.ne    1b
 307 .Lzeromem_dczva_initial_1byte_aligned_end:
 308
 309         /*
 310          * Second loop: we need to zero 16 bytes at a time from cursor to tmp1
 311          * before being able to use the code that deals with block-size-aligned
 312          * addresses.
 313          */
 314         cmp     cursor, tmp1
 315         b.hs    2f
 316 1:
 317         stp     xzr, xzr, [cursor], #16
 318         cmp     cursor, tmp1
 319         b.lo    1b
 320 2:
 321
 322         /*
 323          * Third loop: zero a block at a time using DC ZVA cache block zeroing
 324          * instruction.
 325          */
 326 .Lzeromem_dczva_blocksize_aligned:
 327         /*
 328          * Calculate the last block-size-aligned address. If the result equals
 329          * to the start address, the loop will exit immediately.
 330          */
 331         bic     tmp1, stop_address, block_mask
 332
 333         cmp     cursor, tmp1
 334         b.hs    2f
 335 1:
 336         /* Zero the block containing the cursor */
 337         dc      zva, cursor
 338         /* Increment the cursor by the size of a block */
 339         add     cursor, cursor, block_size
 340         cmp     cursor, tmp1
 341         b.lo    1b
 342 2:
 343
 344         /*
 345          * Fourth loop: zero 16 bytes at a time and then byte per byte the
 346          * remaining area
 347          */
 348 .Lzeromem_dczva_final_16bytes_aligned:
 349         /*
 350          * Calculate the last 16 bytes aligned address. It is assumed that the
 351          * block size will never be smaller than 16 bytes so that the current
 352          * cursor is aligned to at least 16 bytes boundary.
 353          */
 354         bic     tmp1, stop_address, #15
 355
 356         cmp     cursor, tmp1
 357         b.hs    2f
 358 1:
 359         stp     xzr, xzr, [cursor], #16
 360         cmp     cursor, tmp1
 361         b.lo    1b
 362 2:
 363
 364         /* Fifth and final loop: zero byte per byte */
 365 .Lzeromem_dczva_final_1byte_aligned:
 366         cmp     cursor, stop_address
 367         b.eq    2f
 368 1:
 369         strb    wzr, [cursor], #1
 370         cmp     cursor, stop_address
 371         b.ne    1b
 372 2:
 373         ret
 374
 375         /* Fallback for unaligned start addresses */
 376 .Lzeromem_dczva_fallback_entry:
 377         /*
 378          * If the start address is already aligned to 16 bytes, skip this loop.
 379          */
 380         tst     cursor, #0xf
 381         b.eq    .Lzeromem_dczva_final_16bytes_aligned
 382
 383         /* Calculate the next address aligned to 16 bytes */
 384         orr     tmp1, cursor, #15
 385         add     tmp1, tmp1, #1
 386         /* If it overflows, fallback to byte per byte zeroing */
 387         cbz     tmp1, .Lzeromem_dczva_final_1byte_aligned
 388         /* If the next aligned address is after the stop address, fall back */
 389         cmp     tmp1, stop_address
 390         b.hs    .Lzeromem_dczva_final_1byte_aligned
 391
 392         /* Fallback entry loop: zero byte per byte */
 393 1:
 394         strb    wzr, [cursor], #1
 395         cmp     cursor, tmp1
 396         b.ne    1b
 397
 398         b       .Lzeromem_dczva_final_16bytes_aligned
 399
 400         .unreq  cursor
 401         /*
 402          * length is already unreq'ed to reuse the register for another
 403          * variable.
 404          */
 405         .unreq  stop_address
 406         .unreq  block_size
 407         .unreq  block_mask
 408         .unreq  tmp1
 409         .unreq  tmp2
 410 endfunc zeromem_dczva
 411
 412 /* --------------------------------------------------------------------------
 413  * void memcpy16(void *dest, const void *src, unsigned int length)
 414  *
 415  * Copy length bytes from memory area src to memory area dest.
 416  * The memory areas should not overlap.
 417  * Destination and source addresses must be 16-byte aligned.
 418  * --------------------------------------------------------------------------
 419  */
 420 func memcpy16
 421 #if ENABLE_ASSERTIONS
 422         orr     x3, x0, x1
 423         tst     x3, #0xf
 424         ASM_ASSERT(eq)
 425 #endif
 426 /* copy 16 bytes at a time */
 427 m_loop16:
 428         cmp     x2, #16
 429         b.lo    m_loop1
 430         ldp     x3, x4, [x1], #16
 431         stp     x3, x4, [x0], #16
 432         sub     x2, x2, #16
 433         b       m_loop16
 434 /* copy byte per byte */
 435 m_loop1:
 436         cbz     x2, m_end
 437         ldrb    w3, [x1], #1
 438         strb    w3, [x0], #1
 439         subs    x2, x2, #1
 440         b.ne    m_loop1
 441 m_end:
 442         ret
 443 endfunc memcpy16
 444
 445 /* ---------------------------------------------------------------------------
 446  * Disable the MMU at EL3
 447  * ---------------------------------------------------------------------------
 448  */
 449
 450 func disable_mmu_el3
 451         mov     x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
 452 do_disable_mmu_el3:
 453         mrs     x0, sctlr_el3
 454         bic     x0, x0, x1
 455         msr     sctlr_el3, x0
 456         isb     /* ensure MMU is off */
 457         dsb     sy
 458         ret
 459 endfunc disable_mmu_el3
 460
 461
 462 func disable_mmu_icache_el3
 463         mov     x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
 464         b       do_disable_mmu_el3
 465 endfunc disable_mmu_icache_el3
 466
 467 /* ---------------------------------------------------------------------------
 468  * Disable the MMU at EL1
 469  * ---------------------------------------------------------------------------
 470  */
 471
 472 func disable_mmu_el1
 473         mov     x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
 474 do_disable_mmu_el1:
 475         mrs     x0, sctlr_el1
 476         bic     x0, x0, x1
 477         msr     sctlr_el1, x0
 478         isb     /* ensure MMU is off */
 479         dsb     sy
 480         ret
 481 endfunc disable_mmu_el1
 482
 483
 484 func disable_mmu_icache_el1
 485         mov     x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
 486         b       do_disable_mmu_el1
 487 endfunc disable_mmu_icache_el1
 488
 489 /* ---------------------------------------------------------------------------
 490  * Enable the use of VFP at EL3
 491  * ---------------------------------------------------------------------------
 492  */
 493 #if SUPPORT_VFP
 494 func enable_vfp
 495         mrs     x0, cpacr_el1
 496         orr     x0, x0, #CPACR_VFP_BITS
 497         msr     cpacr_el1, x0
 498         mrs     x0, cptr_el3
 499         mov     x1, #AARCH64_CPTR_TFP
 500         bic     x0, x0, x1
 501         msr     cptr_el3, x0
 502         isb
 503         ret
 504 endfunc enable_vfp
 505 #endif
 506
 507 /* ---------------------------------------------------------------------------
 508  * Helper to fixup Global Descriptor table (GDT) and dynamic relocations
 509  * (.rela.dyn) at runtime.
 510  *
 511  * This function is meant to be used when the firmware is compiled with -fpie
 512  * and linked with -pie options. We rely on the linker script exporting
 513  * appropriate markers for start and end of the section. For GOT, we
 514  * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
 515  * __RELA_START__ and __RELA_END__.
 516  *
 517  * The function takes the limits of the memory to apply fixups to as
 518  * arguments (which is usually the limits of the relocable BL image).
 519  *   x0 -  the start of the fixup region
 520  *   x1 -  the limit of the fixup region
 521  * These addresses have to be page (4KB aligned).
 522  * ---------------------------------------------------------------------------
 523  */
 524 func fixup_gdt_reloc
 525         mov     x6, x0
 526         mov     x7, x1
 527
 528         /* Test if the limits are 4K aligned */
 529 #if ENABLE_ASSERTIONS
 530         orr     x0, x0, x1
 531         tst     x0, #(PAGE_SIZE - 1)
 532         ASM_ASSERT(eq)
 533 #endif
 534         /*
 535          * Calculate the offset based on return address in x30.
 536          * Assume that this funtion is called within a page of the start of
 537          * of fixup region.
 538          */
 539         and     x2, x30, #~(PAGE_SIZE - 1)
 540         sub     x0, x2, x6      /* Diff(S) = Current Address - Compiled Address */
 541
 542         adrp    x1, __GOT_START__
 543         add     x1, x1, :lo12:__GOT_START__
 544         adrp    x2, __GOT_END__
 545         add     x2, x2, :lo12:__GOT_END__
 546
 547         /*
 548          * GOT is an array of 64_bit addresses which must be fixed up as
 549          * new_addr = old_addr + Diff(S).
 550          * The new_addr is the address currently the binary is executing from
 551          * and old_addr is the address at compile time.
 552          */
 553 1:
 554         ldr     x3, [x1]
 555         /* Skip adding offset if address is < lower limit */
 556         cmp     x3, x6
 557         b.lo    2f
 558         /* Skip adding offset if address is >= upper limit */
 559         cmp     x3, x7
 560         b.ge    2f
 561         add     x3, x3, x0
 562         str     x3, [x1]
 563 2:
 564         add     x1, x1, #8
 565         cmp     x1, x2
 566         b.lo    1b
 567
 568         /* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */
 569         adrp    x1, __RELA_START__
 570         add     x1, x1, :lo12:__RELA_START__
 571         adrp    x2, __RELA_END__
 572         add     x2, x2, :lo12:__RELA_END__
 573         /*
 574          * According to ELF-64 specification, the RELA data structure is as
 575          * follows:
 576          *      typedef struct
 577          *      {
 578          *              Elf64_Addr r_offset;
 579          *              Elf64_Xword r_info;
 580          *              Elf64_Sxword r_addend;
 581          *      } Elf64_Rela;
 582          *
 583          * r_offset is address of reference
 584          * r_info is symbol index and type of relocation (in this case
 585          * 0x403 which corresponds to R_AARCH64_RELATIV).
 586          * r_addend is constant part of expression.
 587          *
 588          * Size of Elf64_Rela structure is 24 bytes.
 589          */
 590 1:
 591         /* Assert that the relocation type is R_AARCH64_RELATIV */
 592 #if ENABLE_ASSERTIONS
 593         ldr     x3, [x1, #8]
 594         cmp     x3, #0x403
 595         ASM_ASSERT(eq)
 596 #endif
 597         ldr     x3, [x1]        /* r_offset */
 598         add     x3, x0, x3
 599         ldr     x4, [x1, #16]   /* r_addend */
 600
 601         /* Skip adding offset if r_addend is < lower limit */
 602         cmp     x4, x6
 603         b.lo    2f
 604         /* Skip adding offset if r_addend entry is >= upper limit */
 605         cmp     x4, x7
 606         b.ge    2f
 607
 608         add     x4, x0, x4      /* Diff(S) + r_addend */
 609         str     x4, [x3]
 610
 611 2:      add     x1, x1, #24
 612         cmp     x1, x2
 613         b.lo    1b
 614
 615         ret
 616 endfunc fixup_gdt_reloc