8920f7297ea851e67c081c94b4af5df25e817b40
[project/bcm63xx/atf.git] / lib / aarch64 / misc_helpers.S
1 /*
2 * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7 #include <arch.h>
8 #include <asm_macros.S>
9 #include <assert_macros.S>
10 #include <xlat_tables_defs.h>
11
12 #if !ERROR_DEPRECATED
13 .globl get_afflvl_shift
14 .globl mpidr_mask_lower_afflvls
15 .globl eret
16 #endif /* ERROR_DEPRECATED */
17 .globl smc
18
19 .globl zero_normalmem
20 .globl zeromem
21 .globl zeromem16
22 .globl memcpy16
23
24 .globl disable_mmu_el1
25 .globl disable_mmu_el3
26 .globl disable_mmu_icache_el1
27 .globl disable_mmu_icache_el3
28
29 .globl fixup_gdt_reloc
30
31 #if SUPPORT_VFP
32 .globl enable_vfp
33 #endif
34
35 #if !ERROR_DEPRECATED
36 func get_afflvl_shift
37 cmp x0, #3
38 cinc x0, x0, eq
39 mov x1, #MPIDR_AFFLVL_SHIFT
40 lsl x0, x0, x1
41 ret
42 endfunc get_afflvl_shift
43
44 func mpidr_mask_lower_afflvls
45 cmp x1, #3
46 cinc x1, x1, eq
47 mov x2, #MPIDR_AFFLVL_SHIFT
48 lsl x2, x1, x2
49 lsr x0, x0, x2
50 lsl x0, x0, x2
51 ret
52 endfunc mpidr_mask_lower_afflvls
53
54
55 func eret
56 eret
57 endfunc eret
58 #endif /* ERROR_DEPRECATED */
59
60 func smc
61 smc #0
62 endfunc smc
63
64 /* -----------------------------------------------------------------------
65 * void zero_normalmem(void *mem, unsigned int length);
66 *
67 * Initialise a region in normal memory to 0. This functions complies with the
68 * AAPCS and can be called from C code.
69 *
70 * NOTE: MMU must be enabled when using this function as it can only operate on
71 * normal memory. It is intended to be mainly used from C code when MMU
72 * is usually enabled.
73 * -----------------------------------------------------------------------
74 */
75 .equ zero_normalmem, zeromem_dczva
76
77 /* -----------------------------------------------------------------------
78 * void zeromem(void *mem, unsigned int length);
79 *
80 * Initialise a region of device memory to 0. This functions complies with the
81 * AAPCS and can be called from C code.
82 *
83 * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be
84 * used instead for faster zeroing.
85 *
86 * -----------------------------------------------------------------------
87 */
88 func zeromem
89 /* x2 is the address past the last zeroed address */
90 add x2, x0, x1
91 /*
92 * Uses the fallback path that does not use DC ZVA instruction and
93 * therefore does not need enabled MMU
94 */
95 b .Lzeromem_dczva_fallback_entry
96 endfunc zeromem
97
98 /* -----------------------------------------------------------------------
99 * void zeromem_dczva(void *mem, unsigned int length);
100 *
101 * Fill a region of normal memory of size "length" in bytes with null bytes.
102 * MMU must be enabled and the memory be of
103 * normal type. This is because this function internally uses the DC ZVA
104 * instruction, which generates an Alignment fault if used on any type of
105 * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU
106 * is disabled, all memory behaves like Device-nGnRnE memory (see section
107 * D4.2.8), hence the requirement on the MMU being enabled.
108 * NOTE: The code assumes that the block size as defined in DCZID_EL0
109 * register is at least 16 bytes.
110 *
111 * -----------------------------------------------------------------------
112 */
113 func zeromem_dczva
114
115 /*
116 * The function consists of a series of loops that zero memory one byte
117 * at a time, 16 bytes at a time or using the DC ZVA instruction to
118 * zero aligned block of bytes, which is assumed to be more than 16.
119 * In the case where the DC ZVA instruction cannot be used or if the
120 * first 16 bytes loop would overflow, there is fallback path that does
121 * not use DC ZVA.
122 * Note: The fallback path is also used by the zeromem function that
123 * branches to it directly.
124 *
125 * +---------+ zeromem_dczva
126 * | entry |
127 * +----+----+
128 * |
129 * v
130 * +---------+
131 * | checks |>o-------+ (If any check fails, fallback)
132 * +----+----+ |
133 * | |---------------+
134 * v | Fallback path |
135 * +------+------+ |---------------+
136 * | 1 byte loop | |
137 * +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end
138 * | |
139 * v |
140 * +-------+-------+ |
141 * | 16 bytes loop | |
142 * +-------+-------+ |
143 * | |
144 * v |
145 * +------+------+ .Lzeromem_dczva_blocksize_aligned
146 * | DC ZVA loop | |
147 * +------+------+ |
148 * +--------+ | |
149 * | | | |
150 * | v v |
151 * | +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned
152 * | | 16 bytes loop | |
153 * | +-------+-------+ |
154 * | | |
155 * | v |
156 * | +------+------+ .Lzeromem_dczva_final_1byte_aligned
157 * | | 1 byte loop | |
158 * | +-------------+ |
159 * | | |
160 * | v |
161 * | +---+--+ |
162 * | | exit | |
163 * | +------+ |
164 * | |
165 * | +--------------+ +------------------+ zeromem
166 * | | +----------------| zeromem function |
167 * | | | +------------------+
168 * | v v
169 * | +-------------+ .Lzeromem_dczva_fallback_entry
170 * | | 1 byte loop |
171 * | +------+------+
172 * | |
173 * +-----------+
174 */
175
176 /*
177 * Readable names for registers
178 *
179 * Registers x0, x1 and x2 are also set by zeromem which
180 * branches into the fallback path directly, so cursor, length and
181 * stop_address should not be retargeted to other registers.
182 */
183 cursor .req x0 /* Start address and then current address */
184 length .req x1 /* Length in bytes of the region to zero out */
185 /* Reusing x1 as length is never used after block_mask is set */
186 block_mask .req x1 /* Bitmask of the block size read in DCZID_EL0 */
187 stop_address .req x2 /* Address past the last zeroed byte */
188 block_size .req x3 /* Size of a block in bytes as read in DCZID_EL0 */
189 tmp1 .req x4
190 tmp2 .req x5
191
192 #if ENABLE_ASSERTIONS
193 /*
194 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3)
195 * register value and panic if the MMU is disabled.
196 */
197 #if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined(IMAGE_BL2) && BL2_AT_EL3)
198 mrs tmp1, sctlr_el3
199 #else
200 mrs tmp1, sctlr_el1
201 #endif
202
203 tst tmp1, #SCTLR_M_BIT
204 ASM_ASSERT(ne)
205 #endif /* ENABLE_ASSERTIONS */
206
207 /* stop_address is the address past the last to zero */
208 add stop_address, cursor, length
209
210 /*
211 * Get block_size = (log2(<block size>) >> 2) (see encoding of
212 * dczid_el0 reg)
213 */
214 mrs block_size, dczid_el0
215
216 /*
217 * Select the 4 lowest bits and convert the extracted log2(<block size
218 * in words>) to <block size in bytes>
219 */
220 ubfx block_size, block_size, #0, #4
221 mov tmp2, #(1 << 2)
222 lsl block_size, tmp2, block_size
223
224 #if ENABLE_ASSERTIONS
225 /*
226 * Assumes block size is at least 16 bytes to avoid manual realignment
227 * of the cursor at the end of the DCZVA loop.
228 */
229 cmp block_size, #16
230 ASM_ASSERT(hs)
231 #endif
232 /*
233 * Not worth doing all the setup for a region less than a block and
234 * protects against zeroing a whole block when the area to zero is
235 * smaller than that. Also, as it is assumed that the block size is at
236 * least 16 bytes, this also protects the initial aligning loops from
237 * trying to zero 16 bytes when length is less than 16.
238 */
239 cmp length, block_size
240 b.lo .Lzeromem_dczva_fallback_entry
241
242 /*
243 * Calculate the bitmask of the block alignment. It will never
244 * underflow as the block size is between 4 bytes and 2kB.
245 * block_mask = block_size - 1
246 */
247 sub block_mask, block_size, #1
248
249 /*
250 * length alias should not be used after this point unless it is
251 * defined as a register other than block_mask's.
252 */
253 .unreq length
254
255 /*
256 * If the start address is already aligned to zero block size, go
257 * straight to the cache zeroing loop. This is safe because at this
258 * point, the length cannot be smaller than a block size.
259 */
260 tst cursor, block_mask
261 b.eq .Lzeromem_dczva_blocksize_aligned
262
263 /*
264 * Calculate the first block-size-aligned address. It is assumed that
265 * the zero block size is at least 16 bytes. This address is the last
266 * address of this initial loop.
267 */
268 orr tmp1, cursor, block_mask
269 add tmp1, tmp1, #1
270
271 /*
272 * If the addition overflows, skip the cache zeroing loops. This is
273 * quite unlikely however.
274 */
275 cbz tmp1, .Lzeromem_dczva_fallback_entry
276
277 /*
278 * If the first block-size-aligned address is past the last address,
279 * fallback to the simpler code.
280 */
281 cmp tmp1, stop_address
282 b.hi .Lzeromem_dczva_fallback_entry
283
284 /*
285 * If the start address is already aligned to 16 bytes, skip this loop.
286 * It is safe to do this because tmp1 (the stop address of the initial
287 * 16 bytes loop) will never be greater than the final stop address.
288 */
289 tst cursor, #0xf
290 b.eq .Lzeromem_dczva_initial_1byte_aligned_end
291
292 /* Calculate the next address aligned to 16 bytes */
293 orr tmp2, cursor, #0xf
294 add tmp2, tmp2, #1
295 /* If it overflows, fallback to the simple path (unlikely) */
296 cbz tmp2, .Lzeromem_dczva_fallback_entry
297 /*
298 * Next aligned address cannot be after the stop address because the
299 * length cannot be smaller than 16 at this point.
300 */
301
302 /* First loop: zero byte per byte */
303 1:
304 strb wzr, [cursor], #1
305 cmp cursor, tmp2
306 b.ne 1b
307 .Lzeromem_dczva_initial_1byte_aligned_end:
308
309 /*
310 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1
311 * before being able to use the code that deals with block-size-aligned
312 * addresses.
313 */
314 cmp cursor, tmp1
315 b.hs 2f
316 1:
317 stp xzr, xzr, [cursor], #16
318 cmp cursor, tmp1
319 b.lo 1b
320 2:
321
322 /*
323 * Third loop: zero a block at a time using DC ZVA cache block zeroing
324 * instruction.
325 */
326 .Lzeromem_dczva_blocksize_aligned:
327 /*
328 * Calculate the last block-size-aligned address. If the result equals
329 * to the start address, the loop will exit immediately.
330 */
331 bic tmp1, stop_address, block_mask
332
333 cmp cursor, tmp1
334 b.hs 2f
335 1:
336 /* Zero the block containing the cursor */
337 dc zva, cursor
338 /* Increment the cursor by the size of a block */
339 add cursor, cursor, block_size
340 cmp cursor, tmp1
341 b.lo 1b
342 2:
343
344 /*
345 * Fourth loop: zero 16 bytes at a time and then byte per byte the
346 * remaining area
347 */
348 .Lzeromem_dczva_final_16bytes_aligned:
349 /*
350 * Calculate the last 16 bytes aligned address. It is assumed that the
351 * block size will never be smaller than 16 bytes so that the current
352 * cursor is aligned to at least 16 bytes boundary.
353 */
354 bic tmp1, stop_address, #15
355
356 cmp cursor, tmp1
357 b.hs 2f
358 1:
359 stp xzr, xzr, [cursor], #16
360 cmp cursor, tmp1
361 b.lo 1b
362 2:
363
364 /* Fifth and final loop: zero byte per byte */
365 .Lzeromem_dczva_final_1byte_aligned:
366 cmp cursor, stop_address
367 b.eq 2f
368 1:
369 strb wzr, [cursor], #1
370 cmp cursor, stop_address
371 b.ne 1b
372 2:
373 ret
374
375 /* Fallback for unaligned start addresses */
376 .Lzeromem_dczva_fallback_entry:
377 /*
378 * If the start address is already aligned to 16 bytes, skip this loop.
379 */
380 tst cursor, #0xf
381 b.eq .Lzeromem_dczva_final_16bytes_aligned
382
383 /* Calculate the next address aligned to 16 bytes */
384 orr tmp1, cursor, #15
385 add tmp1, tmp1, #1
386 /* If it overflows, fallback to byte per byte zeroing */
387 cbz tmp1, .Lzeromem_dczva_final_1byte_aligned
388 /* If the next aligned address is after the stop address, fall back */
389 cmp tmp1, stop_address
390 b.hs .Lzeromem_dczva_final_1byte_aligned
391
392 /* Fallback entry loop: zero byte per byte */
393 1:
394 strb wzr, [cursor], #1
395 cmp cursor, tmp1
396 b.ne 1b
397
398 b .Lzeromem_dczva_final_16bytes_aligned
399
400 .unreq cursor
401 /*
402 * length is already unreq'ed to reuse the register for another
403 * variable.
404 */
405 .unreq stop_address
406 .unreq block_size
407 .unreq block_mask
408 .unreq tmp1
409 .unreq tmp2
410 endfunc zeromem_dczva
411
412 /* --------------------------------------------------------------------------
413 * void memcpy16(void *dest, const void *src, unsigned int length)
414 *
415 * Copy length bytes from memory area src to memory area dest.
416 * The memory areas should not overlap.
417 * Destination and source addresses must be 16-byte aligned.
418 * --------------------------------------------------------------------------
419 */
420 func memcpy16
421 #if ENABLE_ASSERTIONS
422 orr x3, x0, x1
423 tst x3, #0xf
424 ASM_ASSERT(eq)
425 #endif
426 /* copy 16 bytes at a time */
427 m_loop16:
428 cmp x2, #16
429 b.lo m_loop1
430 ldp x3, x4, [x1], #16
431 stp x3, x4, [x0], #16
432 sub x2, x2, #16
433 b m_loop16
434 /* copy byte per byte */
435 m_loop1:
436 cbz x2, m_end
437 ldrb w3, [x1], #1
438 strb w3, [x0], #1
439 subs x2, x2, #1
440 b.ne m_loop1
441 m_end:
442 ret
443 endfunc memcpy16
444
445 /* ---------------------------------------------------------------------------
446 * Disable the MMU at EL3
447 * ---------------------------------------------------------------------------
448 */
449
450 func disable_mmu_el3
451 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
452 do_disable_mmu_el3:
453 mrs x0, sctlr_el3
454 bic x0, x0, x1
455 msr sctlr_el3, x0
456 isb /* ensure MMU is off */
457 dsb sy
458 ret
459 endfunc disable_mmu_el3
460
461
462 func disable_mmu_icache_el3
463 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
464 b do_disable_mmu_el3
465 endfunc disable_mmu_icache_el3
466
467 /* ---------------------------------------------------------------------------
468 * Disable the MMU at EL1
469 * ---------------------------------------------------------------------------
470 */
471
472 func disable_mmu_el1
473 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
474 do_disable_mmu_el1:
475 mrs x0, sctlr_el1
476 bic x0, x0, x1
477 msr sctlr_el1, x0
478 isb /* ensure MMU is off */
479 dsb sy
480 ret
481 endfunc disable_mmu_el1
482
483
484 func disable_mmu_icache_el1
485 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
486 b do_disable_mmu_el1
487 endfunc disable_mmu_icache_el1
488
489 /* ---------------------------------------------------------------------------
490 * Enable the use of VFP at EL3
491 * ---------------------------------------------------------------------------
492 */
493 #if SUPPORT_VFP
494 func enable_vfp
495 mrs x0, cpacr_el1
496 orr x0, x0, #CPACR_VFP_BITS
497 msr cpacr_el1, x0
498 mrs x0, cptr_el3
499 mov x1, #AARCH64_CPTR_TFP
500 bic x0, x0, x1
501 msr cptr_el3, x0
502 isb
503 ret
504 endfunc enable_vfp
505 #endif
506
507 /* ---------------------------------------------------------------------------
508 * Helper to fixup Global Descriptor table (GDT) and dynamic relocations
509 * (.rela.dyn) at runtime.
510 *
511 * This function is meant to be used when the firmware is compiled with -fpie
512 * and linked with -pie options. We rely on the linker script exporting
513 * appropriate markers for start and end of the section. For GOT, we
514 * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
515 * __RELA_START__ and __RELA_END__.
516 *
517 * The function takes the limits of the memory to apply fixups to as
518 * arguments (which is usually the limits of the relocable BL image).
519 * x0 - the start of the fixup region
520 * x1 - the limit of the fixup region
521 * These addresses have to be page (4KB aligned).
522 * ---------------------------------------------------------------------------
523 */
524 func fixup_gdt_reloc
525 mov x6, x0
526 mov x7, x1
527
528 /* Test if the limits are 4K aligned */
529 #if ENABLE_ASSERTIONS
530 orr x0, x0, x1
531 tst x0, #(PAGE_SIZE - 1)
532 ASM_ASSERT(eq)
533 #endif
534 /*
535 * Calculate the offset based on return address in x30.
536 * Assume that this funtion is called within a page of the start of
537 * of fixup region.
538 */
539 and x2, x30, #~(PAGE_SIZE - 1)
540 sub x0, x2, x6 /* Diff(S) = Current Address - Compiled Address */
541
542 adrp x1, __GOT_START__
543 add x1, x1, :lo12:__GOT_START__
544 adrp x2, __GOT_END__
545 add x2, x2, :lo12:__GOT_END__
546
547 /*
548 * GOT is an array of 64_bit addresses which must be fixed up as
549 * new_addr = old_addr + Diff(S).
550 * The new_addr is the address currently the binary is executing from
551 * and old_addr is the address at compile time.
552 */
553 1:
554 ldr x3, [x1]
555 /* Skip adding offset if address is < lower limit */
556 cmp x3, x6
557 b.lo 2f
558 /* Skip adding offset if address is >= upper limit */
559 cmp x3, x7
560 b.ge 2f
561 add x3, x3, x0
562 str x3, [x1]
563 2:
564 add x1, x1, #8
565 cmp x1, x2
566 b.lo 1b
567
568 /* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */
569 adrp x1, __RELA_START__
570 add x1, x1, :lo12:__RELA_START__
571 adrp x2, __RELA_END__
572 add x2, x2, :lo12:__RELA_END__
573 /*
574 * According to ELF-64 specification, the RELA data structure is as
575 * follows:
576 * typedef struct
577 * {
578 * Elf64_Addr r_offset;
579 * Elf64_Xword r_info;
580 * Elf64_Sxword r_addend;
581 * } Elf64_Rela;
582 *
583 * r_offset is address of reference
584 * r_info is symbol index and type of relocation (in this case
585 * 0x403 which corresponds to R_AARCH64_RELATIV).
586 * r_addend is constant part of expression.
587 *
588 * Size of Elf64_Rela structure is 24 bytes.
589 */
590 1:
591 /* Assert that the relocation type is R_AARCH64_RELATIV */
592 #if ENABLE_ASSERTIONS
593 ldr x3, [x1, #8]
594 cmp x3, #0x403
595 ASM_ASSERT(eq)
596 #endif
597 ldr x3, [x1] /* r_offset */
598 add x3, x0, x3
599 ldr x4, [x1, #16] /* r_addend */
600
601 /* Skip adding offset if r_addend is < lower limit */
602 cmp x4, x6
603 b.lo 2f
604 /* Skip adding offset if r_addend entry is >= upper limit */
605 cmp x4, x7
606 b.ge 2f
607
608 add x4, x0, x4 /* Diff(S) + r_addend */
609 str x4, [x3]
610
611 2: add x1, x1, #24
612 cmp x1, x2
613 b.lo 1b
614
615 ret
616 endfunc fixup_gdt_reloc