2 * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved.
4 * SPDX-License-Identifier: BSD-3-Clause
8 #include <asm_macros.S>
9 #include <assert_macros.S>
10 #include <xlat_tables_defs.h>
13 .globl get_afflvl_shift
14 .globl mpidr_mask_lower_afflvls
16 #endif /* ERROR_DEPRECATED */
24 .globl disable_mmu_el1
25 .globl disable_mmu_el3
26 .globl disable_mmu_icache_el1
27 .globl disable_mmu_icache_el3
29 .globl fixup_gdt_reloc
39 mov x1, #MPIDR_AFFLVL_SHIFT
42 endfunc get_afflvl_shift
44 func mpidr_mask_lower_afflvls
47 mov x2, #MPIDR_AFFLVL_SHIFT
52 endfunc mpidr_mask_lower_afflvls
58 #endif /* ERROR_DEPRECATED */
64 /* -----------------------------------------------------------------------
65 * void zero_normalmem(void *mem, unsigned int length);
67 * Initialise a region in normal memory to 0. This functions complies with the
68 * AAPCS and can be called from C code.
70 * NOTE: MMU must be enabled when using this function as it can only operate on
71 * normal memory. It is intended to be mainly used from C code when MMU
73 * -----------------------------------------------------------------------
75 .equ zero_normalmem, zeromem_dczva
77 /* -----------------------------------------------------------------------
78 * void zeromem(void *mem, unsigned int length);
80 * Initialise a region of device memory to 0. This functions complies with the
81 * AAPCS and can be called from C code.
83 * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be
84 * used instead for faster zeroing.
86 * -----------------------------------------------------------------------
89 /* x2 is the address past the last zeroed address */
92 * Uses the fallback path that does not use DC ZVA instruction and
93 * therefore does not need enabled MMU
95 b .Lzeromem_dczva_fallback_entry
98 /* -----------------------------------------------------------------------
99 * void zeromem_dczva(void *mem, unsigned int length);
101 * Fill a region of normal memory of size "length" in bytes with null bytes.
102 * MMU must be enabled and the memory be of
103 * normal type. This is because this function internally uses the DC ZVA
104 * instruction, which generates an Alignment fault if used on any type of
105 * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU
106 * is disabled, all memory behaves like Device-nGnRnE memory (see section
107 * D4.2.8), hence the requirement on the MMU being enabled.
108 * NOTE: The code assumes that the block size as defined in DCZID_EL0
109 * register is at least 16 bytes.
111 * -----------------------------------------------------------------------
116 * The function consists of a series of loops that zero memory one byte
117 * at a time, 16 bytes at a time or using the DC ZVA instruction to
118 * zero aligned block of bytes, which is assumed to be more than 16.
119 * In the case where the DC ZVA instruction cannot be used or if the
120 * first 16 bytes loop would overflow, there is fallback path that does
122 * Note: The fallback path is also used by the zeromem function that
123 * branches to it directly.
125 * +---------+ zeromem_dczva
131 * | checks |>o-------+ (If any check fails, fallback)
133 * | |---------------+
134 * v | Fallback path |
135 * +------+------+ |---------------+
137 * +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end
140 * +-------+-------+ |
141 * | 16 bytes loop | |
142 * +-------+-------+ |
145 * +------+------+ .Lzeromem_dczva_blocksize_aligned
151 * | +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned
152 * | | 16 bytes loop | |
153 * | +-------+-------+ |
156 * | +------+------+ .Lzeromem_dczva_final_1byte_aligned
157 * | | 1 byte loop | |
158 * | +-------------+ |
165 * | +--------------+ +------------------+ zeromem
166 * | | +----------------| zeromem function |
167 * | | | +------------------+
169 * | +-------------+ .Lzeromem_dczva_fallback_entry
177 * Readable names for registers
179 * Registers x0, x1 and x2 are also set by zeromem which
180 * branches into the fallback path directly, so cursor, length and
181 * stop_address should not be retargeted to other registers.
183 cursor .req x0 /* Start address and then current address */
184 length .req x1 /* Length in bytes of the region to zero out */
185 /* Reusing x1 as length is never used after block_mask is set */
186 block_mask .req x1 /* Bitmask of the block size read in DCZID_EL0 */
187 stop_address .req x2 /* Address past the last zeroed byte */
188 block_size .req x3 /* Size of a block in bytes as read in DCZID_EL0 */
192 #if ENABLE_ASSERTIONS
194 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3)
195 * register value and panic if the MMU is disabled.
197 #if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined(IMAGE_BL2) && BL2_AT_EL3)
203 tst tmp1, #SCTLR_M_BIT
205 #endif /* ENABLE_ASSERTIONS */
207 /* stop_address is the address past the last to zero */
208 add stop_address, cursor, length
211 * Get block_size = (log2(<block size>) >> 2) (see encoding of
214 mrs block_size, dczid_el0
217 * Select the 4 lowest bits and convert the extracted log2(<block size
218 * in words>) to <block size in bytes>
220 ubfx block_size, block_size, #0, #4
222 lsl block_size, tmp2, block_size
224 #if ENABLE_ASSERTIONS
226 * Assumes block size is at least 16 bytes to avoid manual realignment
227 * of the cursor at the end of the DCZVA loop.
233 * Not worth doing all the setup for a region less than a block and
234 * protects against zeroing a whole block when the area to zero is
235 * smaller than that. Also, as it is assumed that the block size is at
236 * least 16 bytes, this also protects the initial aligning loops from
237 * trying to zero 16 bytes when length is less than 16.
239 cmp length, block_size
240 b.lo .Lzeromem_dczva_fallback_entry
243 * Calculate the bitmask of the block alignment. It will never
244 * underflow as the block size is between 4 bytes and 2kB.
245 * block_mask = block_size - 1
247 sub block_mask, block_size, #1
250 * length alias should not be used after this point unless it is
251 * defined as a register other than block_mask's.
256 * If the start address is already aligned to zero block size, go
257 * straight to the cache zeroing loop. This is safe because at this
258 * point, the length cannot be smaller than a block size.
260 tst cursor, block_mask
261 b.eq .Lzeromem_dczva_blocksize_aligned
264 * Calculate the first block-size-aligned address. It is assumed that
265 * the zero block size is at least 16 bytes. This address is the last
266 * address of this initial loop.
268 orr tmp1, cursor, block_mask
272 * If the addition overflows, skip the cache zeroing loops. This is
273 * quite unlikely however.
275 cbz tmp1, .Lzeromem_dczva_fallback_entry
278 * If the first block-size-aligned address is past the last address,
279 * fallback to the simpler code.
281 cmp tmp1, stop_address
282 b.hi .Lzeromem_dczva_fallback_entry
285 * If the start address is already aligned to 16 bytes, skip this loop.
286 * It is safe to do this because tmp1 (the stop address of the initial
287 * 16 bytes loop) will never be greater than the final stop address.
290 b.eq .Lzeromem_dczva_initial_1byte_aligned_end
292 /* Calculate the next address aligned to 16 bytes */
293 orr tmp2, cursor, #0xf
295 /* If it overflows, fallback to the simple path (unlikely) */
296 cbz tmp2, .Lzeromem_dczva_fallback_entry
298 * Next aligned address cannot be after the stop address because the
299 * length cannot be smaller than 16 at this point.
302 /* First loop: zero byte per byte */
304 strb wzr, [cursor], #1
307 .Lzeromem_dczva_initial_1byte_aligned_end:
310 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1
311 * before being able to use the code that deals with block-size-aligned
317 stp xzr, xzr, [cursor], #16
323 * Third loop: zero a block at a time using DC ZVA cache block zeroing
326 .Lzeromem_dczva_blocksize_aligned:
328 * Calculate the last block-size-aligned address. If the result equals
329 * to the start address, the loop will exit immediately.
331 bic tmp1, stop_address, block_mask
336 /* Zero the block containing the cursor */
338 /* Increment the cursor by the size of a block */
339 add cursor, cursor, block_size
345 * Fourth loop: zero 16 bytes at a time and then byte per byte the
348 .Lzeromem_dczva_final_16bytes_aligned:
350 * Calculate the last 16 bytes aligned address. It is assumed that the
351 * block size will never be smaller than 16 bytes so that the current
352 * cursor is aligned to at least 16 bytes boundary.
354 bic tmp1, stop_address, #15
359 stp xzr, xzr, [cursor], #16
364 /* Fifth and final loop: zero byte per byte */
365 .Lzeromem_dczva_final_1byte_aligned:
366 cmp cursor, stop_address
369 strb wzr, [cursor], #1
370 cmp cursor, stop_address
375 /* Fallback for unaligned start addresses */
376 .Lzeromem_dczva_fallback_entry:
378 * If the start address is already aligned to 16 bytes, skip this loop.
381 b.eq .Lzeromem_dczva_final_16bytes_aligned
383 /* Calculate the next address aligned to 16 bytes */
384 orr tmp1, cursor, #15
386 /* If it overflows, fallback to byte per byte zeroing */
387 cbz tmp1, .Lzeromem_dczva_final_1byte_aligned
388 /* If the next aligned address is after the stop address, fall back */
389 cmp tmp1, stop_address
390 b.hs .Lzeromem_dczva_final_1byte_aligned
392 /* Fallback entry loop: zero byte per byte */
394 strb wzr, [cursor], #1
398 b .Lzeromem_dczva_final_16bytes_aligned
402 * length is already unreq'ed to reuse the register for another
410 endfunc zeromem_dczva
412 /* --------------------------------------------------------------------------
413 * void memcpy16(void *dest, const void *src, unsigned int length)
415 * Copy length bytes from memory area src to memory area dest.
416 * The memory areas should not overlap.
417 * Destination and source addresses must be 16-byte aligned.
418 * --------------------------------------------------------------------------
421 #if ENABLE_ASSERTIONS
426 /* copy 16 bytes at a time */
430 ldp x3, x4, [x1], #16
431 stp x3, x4, [x0], #16
434 /* copy byte per byte */
445 /* ---------------------------------------------------------------------------
446 * Disable the MMU at EL3
447 * ---------------------------------------------------------------------------
451 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
456 isb /* ensure MMU is off */
459 endfunc disable_mmu_el3
462 func disable_mmu_icache_el3
463 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
465 endfunc disable_mmu_icache_el3
467 /* ---------------------------------------------------------------------------
468 * Disable the MMU at EL1
469 * ---------------------------------------------------------------------------
473 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
478 isb /* ensure MMU is off */
481 endfunc disable_mmu_el1
484 func disable_mmu_icache_el1
485 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
487 endfunc disable_mmu_icache_el1
489 /* ---------------------------------------------------------------------------
490 * Enable the use of VFP at EL3
491 * ---------------------------------------------------------------------------
496 orr x0, x0, #CPACR_VFP_BITS
499 mov x1, #AARCH64_CPTR_TFP
507 /* ---------------------------------------------------------------------------
508 * Helper to fixup Global Descriptor table (GDT) and dynamic relocations
509 * (.rela.dyn) at runtime.
511 * This function is meant to be used when the firmware is compiled with -fpie
512 * and linked with -pie options. We rely on the linker script exporting
513 * appropriate markers for start and end of the section. For GOT, we
514 * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
515 * __RELA_START__ and __RELA_END__.
517 * The function takes the limits of the memory to apply fixups to as
518 * arguments (which is usually the limits of the relocable BL image).
519 * x0 - the start of the fixup region
520 * x1 - the limit of the fixup region
521 * These addresses have to be page (4KB aligned).
522 * ---------------------------------------------------------------------------
528 /* Test if the limits are 4K aligned */
529 #if ENABLE_ASSERTIONS
531 tst x0, #(PAGE_SIZE - 1)
535 * Calculate the offset based on return address in x30.
536 * Assume that this funtion is called within a page of the start of
539 and x2, x30, #~(PAGE_SIZE - 1)
540 sub x0, x2, x6 /* Diff(S) = Current Address - Compiled Address */
542 adrp x1, __GOT_START__
543 add x1, x1, :lo12:__GOT_START__
545 add x2, x2, :lo12:__GOT_END__
548 * GOT is an array of 64_bit addresses which must be fixed up as
549 * new_addr = old_addr + Diff(S).
550 * The new_addr is the address currently the binary is executing from
551 * and old_addr is the address at compile time.
555 /* Skip adding offset if address is < lower limit */
558 /* Skip adding offset if address is >= upper limit */
568 /* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */
569 adrp x1, __RELA_START__
570 add x1, x1, :lo12:__RELA_START__
571 adrp x2, __RELA_END__
572 add x2, x2, :lo12:__RELA_END__
574 * According to ELF-64 specification, the RELA data structure is as
578 * Elf64_Addr r_offset;
579 * Elf64_Xword r_info;
580 * Elf64_Sxword r_addend;
583 * r_offset is address of reference
584 * r_info is symbol index and type of relocation (in this case
585 * 0x403 which corresponds to R_AARCH64_RELATIV).
586 * r_addend is constant part of expression.
588 * Size of Elf64_Rela structure is 24 bytes.
591 /* Assert that the relocation type is R_AARCH64_RELATIV */
592 #if ENABLE_ASSERTIONS
597 ldr x3, [x1] /* r_offset */
599 ldr x4, [x1, #16] /* r_addend */
601 /* Skip adding offset if r_addend is < lower limit */
604 /* Skip adding offset if r_addend entry is >= upper limit */
608 add x4, x0, x4 /* Diff(S) + r_addend */
616 endfunc fixup_gdt_reloc