reworked memset,memcpy of the cris kernel, this improves board speed by factor 20
[openwrt/svn-archive/archive.git] / target / linux / etrax-2.6 / patches / cris / 006-gcc-4.patch
1 diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/memset.c linux-2.6.19.2/arch/cris/arch-v10/lib/memset.c
2 --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/memset.c 2007-06-03 13:59:39.000000000 +0200
3 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/memset.c 2007-06-03 14:11:43.000000000 +0200
4 @@ -110,45 +110,28 @@
5 If you want to check that the allocation was right; then
6 check the equalities in the first comment. It should say
7 "r13=r13, r12=r12, r11=r11" */
8 - __asm__ volatile ("
9 - ;; Check that the following is true (same register names on
10 - ;; both sides of equal sign, as in r8=r8):
11 - ;; %0=r13, %1=r12, %4=r11
12 - ;;
13 - ;; Save the registers we'll clobber in the movem process
14 - ;; on the stack. Don't mention them to gcc, it will only be
15 - ;; upset.
16 - subq 11*4,$sp
17 - movem $r10,[$sp]
18 -
19 - move.d $r11,$r0
20 - move.d $r11,$r1
21 - move.d $r11,$r2
22 - move.d $r11,$r3
23 - move.d $r11,$r4
24 - move.d $r11,$r5
25 - move.d $r11,$r6
26 - move.d $r11,$r7
27 - move.d $r11,$r8
28 - move.d $r11,$r9
29 - move.d $r11,$r10
30 -
31 - ;; Now we've got this:
32 - ;; r13 - dst
33 - ;; r12 - n
34 + __asm__ volatile (
35 + "subq 11*4,$sp\n\t"
36 + "movem $r10,[$sp]\n\t"
37 + "move.d $r11,$r0\n\t"
38 + "move.d $r11,$r1\n\t"
39 + "move.d $r11,$r2\n\t"
40 + "move.d $r11,$r3\n\t"
41 + "move.d $r11,$r4\n\t"
42 + "move.d $r11,$r5\n\t"
43 + "move.d $r11,$r6\n\t"
44 + "move.d $r11,$r7\n\t"
45 + "move.d $r11,$r8\n\t"
46 + "move.d $r11,$r9\n\t"
47 + "move.d $r11,$r10\n\t"
48 + "subq 12*4,$r12\n\t"
49 +"0:\n\t"
50 + "subq 12*4,$r12\n\t"
51 + "bge 0b\n\t"
52 + "movem $r11,[$r13+]\n\t"
53 + "addq 12*4,$r12\n\t"
54 + "movem [$sp+],$r10"
55
56 - ;; Update n for the first loop
57 - subq 12*4,$r12
58 -0:
59 - subq 12*4,$r12
60 - bge 0b
61 - movem $r11,[$r13+]
62 -
63 - addq 12*4,$r12 ;; compensate for last loop underflowing n
64 -
65 - ;; Restore registers from stack
66 - movem [$sp+],$r10"
67 -
68 /* Outputs */ : "=r" (dst), "=r" (n)
69 /* Inputs */ : "0" (dst), "1" (n), "r" (lc));
70
71 @@ -161,10 +144,14 @@
72
73 while ( n >= 16 )
74 {
75 - *((long*)dst)++ = lc;
76 - *((long*)dst)++ = lc;
77 - *((long*)dst)++ = lc;
78 - *((long*)dst)++ = lc;
79 + *((long*)dst) = lc;
80 + dst+=4;
81 + *((long*)dst) = lc;
82 + dst+=4;
83 + *((long*)dst) = lc;
84 + dst+=4;
85 + *((long*)dst) = lc;
86 + dst+=4;
87 n -= 16;
88 }
89
90 @@ -182,67 +169,95 @@
91 *(short*)dst = (short) lc;
92 break;
93 case 3:
94 - *((short*)dst)++ = (short) lc;
95 + *((short*)dst) = (short) lc;
96 + dst+=2;
97 *(char*)dst = (char) lc;
98 break;
99 case 4:
100 - *((long*)dst)++ = lc;
101 + *((long*)dst) = lc;
102 + dst+=4;
103 break;
104 case 5:
105 - *((long*)dst)++ = lc;
106 + *((long*)dst) = lc;
107 + dst+=4;
108 *(char*)dst = (char) lc;
109 break;
110 case 6:
111 - *((long*)dst)++ = lc;
112 + *((long*)dst) = lc;
113 + dst+=4;
114 *(short*)dst = (short) lc;
115 break;
116 case 7:
117 - *((long*)dst)++ = lc;
118 - *((short*)dst)++ = (short) lc;
119 + *((long*)dst) = lc;
120 + dst+=4;
121 + *((short*)dst) = (short) lc;
122 + dst+=2;
123 *(char*)dst = (char) lc;
124 break;
125 case 8:
126 - *((long*)dst)++ = lc;
127 - *((long*)dst)++ = lc;
128 + *((long*)dst) = lc;
129 + dst+=4;
130 + *((long*)dst) = lc;
131 + dst+=4;
132 break;
133 case 9:
134 - *((long*)dst)++ = lc;
135 - *((long*)dst)++ = lc;
136 + *((long*)dst) = lc;
137 + dst+=4;
138 + *((long*)dst) = lc;
139 + dst+=4;
140 *(char*)dst = (char) lc;
141 break;
142 case 10:
143 - *((long*)dst)++ = lc;
144 - *((long*)dst)++ = lc;
145 + *((long*)dst) = lc;
146 + dst+=4;
147 + *((long*)dst) = lc;
148 + dst+=4;
149 *(short*)dst = (short) lc;
150 break;
151 case 11:
152 - *((long*)dst)++ = lc;
153 - *((long*)dst)++ = lc;
154 - *((short*)dst)++ = (short) lc;
155 + *((long*)dst) = lc;
156 + dst+=4;
157 + *((long*)dst) = lc;
158 + dst+=4;
159 + *((short*)dst) = (short) lc;
160 + dst+=2;
161 *(char*)dst = (char) lc;
162 break;
163 case 12:
164 - *((long*)dst)++ = lc;
165 - *((long*)dst)++ = lc;
166 - *((long*)dst)++ = lc;
167 + *((long*)dst) = lc;
168 + dst+=4;
169 + *((long*)dst) = lc;
170 + dst+=4;
171 + *((long*)dst) = lc;
172 + dst+=4;
173 break;
174 case 13:
175 - *((long*)dst)++ = lc;
176 - *((long*)dst)++ = lc;
177 - *((long*)dst)++ = lc;
178 + *((long*)dst) = lc;
179 + dst+=4;
180 + *((long*)dst) = lc;
181 + dst+=4;
182 + *((long*)dst) = lc;
183 + dst+=4;
184 *(char*)dst = (char) lc;
185 break;
186 case 14:
187 - *((long*)dst)++ = lc;
188 - *((long*)dst)++ = lc;
189 - *((long*)dst)++ = lc;
190 + *((long*)dst) = lc;
191 + dst+=4;
192 + *((long*)dst) = lc;
193 + dst+=4;
194 + *((long*)dst) = lc;
195 + dst+=4;
196 *(short*)dst = (short) lc;
197 break;
198 case 15:
199 - *((long*)dst)++ = lc;
200 - *((long*)dst)++ = lc;
201 - *((long*)dst)++ = lc;
202 - *((short*)dst)++ = (short) lc;
203 + *((long*)dst) = lc;
204 + dst+=4;
205 + *((long*)dst) = lc;
206 + dst+=4;
207 + *((long*)dst) = lc;
208 + dst+=4;
209 + *((short*)dst) = (short) lc;
210 + dst+=2;
211 *(char*)dst = (char) lc;
212 break;
213 }
214 diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/string.c linux-2.6.19.2/arch/cris/arch-v10/lib/string.c
215 --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/string.c 2007-06-03 13:59:39.000000000 +0200
216 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/string.c 2007-06-03 14:21:02.000000000 +0200
217 @@ -95,37 +95,19 @@
218 If you want to check that the allocation was right; then
219 check the equalities in the first comment. It should say
220 "r13=r13, r11=r11, r12=r12" */
221 - __asm__ volatile ("
222 - ;; Check that the following is true (same register names on
223 - ;; both sides of equal sign, as in r8=r8):
224 - ;; %0=r13, %1=r11, %2=r12
225 - ;;
226 - ;; Save the registers we'll use in the movem process
227 - ;; on the stack.
228 - subq 11*4,$sp
229 - movem $r10,[$sp]
230 -
231 - ;; Now we've got this:
232 - ;; r11 - src
233 - ;; r13 - dst
234 - ;; r12 - n
235 -
236 - ;; Update n for the first loop
237 - subq 44,$r12
238 -0:
239 - movem [$r11+],$r10
240 - subq 44,$r12
241 - bge 0b
242 - movem $r10,[$r13+]
243 -
244 - addq 44,$r12 ;; compensate for last loop underflowing n
245 -
246 - ;; Restore registers from stack
247 - movem [$sp+],$r10"
248 -
249 + __asm__ volatile (
250 + "subq 11*4,$sp\n\t"
251 + "movem $r10,[$sp]\n\t"
252 + "subq 44,$r12\n\t"
253 +"0:\n\t"
254 + "movem [$r11+],$r10\n\t"
255 + "subq 44,$r12\n\t"
256 + "bge 0b\n\t"
257 + "movem $r10,[$r13+]\n\t"
258 + "addq 44,$r12\n\t"
259 + "movem [$sp+],$r10\n\t"
260 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n)
261 /* Inputs */ : "0" (dst), "1" (src), "2" (n));
262 -
263 }
264
265 /* Either we directly starts copying, using dword copying
266 @@ -135,10 +117,14 @@
267
268 while ( n >= 16 )
269 {
270 - *((long*)dst)++ = *((long*)src)++;
271 - *((long*)dst)++ = *((long*)src)++;
272 - *((long*)dst)++ = *((long*)src)++;
273 - *((long*)dst)++ = *((long*)src)++;
274 + *((long*)dst) = *((long*)src);
275 + src+=4;dst+=4;
276 + *((long*)dst) = *((long*)src);
277 + src+=4;dst+=4;
278 + *((long*)dst) = *((long*)src);
279 + src+=4;dst+=4;
280 + *((long*)dst) = *((long*)src);
281 + src+=4;dst+=4;
282 n -= 16;
283 }
284
285 @@ -156,67 +142,95 @@
286 *(short*)dst = *(short*)src;
287 break;
288 case 3:
289 - *((short*)dst)++ = *((short*)src)++;
290 + *((short*)dst) = *((short*)src);
291 + src+=2;dst+=2;
292 *(char*)dst = *(char*)src;
293 break;
294 case 4:
295 - *((long*)dst)++ = *((long*)src)++;
296 + *((long*)dst) = *((long*)src);
297 + src+=4;dst+=4;
298 break;
299 case 5:
300 - *((long*)dst)++ = *((long*)src)++;
301 + *((long*)dst) = *((long*)src);
302 + src+=4;dst+=4;
303 *(char*)dst = *(char*)src;
304 break;
305 case 6:
306 - *((long*)dst)++ = *((long*)src)++;
307 + *((long*)dst) = *((long*)src);
308 + src+=4;dst+=4;
309 *(short*)dst = *(short*)src;
310 break;
311 case 7:
312 - *((long*)dst)++ = *((long*)src)++;
313 - *((short*)dst)++ = *((short*)src)++;
314 + *((long*)dst) = *((long*)src);
315 + src+=4;dst+=4;
316 + *((short*)dst) = *((short*)src);
317 + src+=2;dst+=2;
318 *(char*)dst = *(char*)src;
319 break;
320 case 8:
321 - *((long*)dst)++ = *((long*)src)++;
322 - *((long*)dst)++ = *((long*)src)++;
323 + *((long*)dst) = *((long*)src);
324 + src+=4;dst+=4;
325 + *((long*)dst) = *((long*)src);
326 + src+=4;dst+=4;
327 break;
328 case 9:
329 - *((long*)dst)++ = *((long*)src)++;
330 - *((long*)dst)++ = *((long*)src)++;
331 + *((long*)dst) = *((long*)src);
332 + src+=4;dst+=4;
333 + *((long*)dst) = *((long*)src);
334 + src+=4;dst+=4;
335 *(char*)dst = *(char*)src;
336 break;
337 case 10:
338 - *((long*)dst)++ = *((long*)src)++;
339 - *((long*)dst)++ = *((long*)src)++;
340 + *((long*)dst) = *((long*)src);
341 + src+=4;dst+=4;
342 + *((long*)dst) = *((long*)src);
343 + src+=4;dst+=4;
344 *(short*)dst = *(short*)src;
345 break;
346 case 11:
347 - *((long*)dst)++ = *((long*)src)++;
348 - *((long*)dst)++ = *((long*)src)++;
349 - *((short*)dst)++ = *((short*)src)++;
350 + *((long*)dst) = *((long*)src);
351 + src+=4;dst+=4;
352 + *((long*)dst) = *((long*)src);
353 + src+=4;dst+=4;
354 + *((short*)dst) = *((short*)src);
355 + src+=2;dst+=2;
356 *(char*)dst = *(char*)src;
357 break;
358 case 12:
359 - *((long*)dst)++ = *((long*)src)++;
360 - *((long*)dst)++ = *((long*)src)++;
361 - *((long*)dst)++ = *((long*)src)++;
362 + *((long*)dst) = *((long*)src);
363 + src+=4;dst+=4;
364 + *((long*)dst) = *((long*)src);
365 + src+=4;dst+=4;
366 + *((long*)dst) = *((long*)src);
367 + src+=4;dst+=4;
368 break;
369 case 13:
370 - *((long*)dst)++ = *((long*)src)++;
371 - *((long*)dst)++ = *((long*)src)++;
372 - *((long*)dst)++ = *((long*)src)++;
373 + *((long*)dst) = *((long*)src);
374 + src+=4;dst+=4;
375 + *((long*)dst) = *((long*)src);
376 + src+=4;dst+=4;
377 + *((long*)dst) = *((long*)src);
378 + src+=4;dst+=4;
379 *(char*)dst = *(char*)src;
380 break;
381 case 14:
382 - *((long*)dst)++ = *((long*)src)++;
383 - *((long*)dst)++ = *((long*)src)++;
384 - *((long*)dst)++ = *((long*)src)++;
385 + *((long*)dst) = *((long*)src);
386 + src+=4;dst+=4;
387 + *((long*)dst) = *((long*)src);
388 + src+=4;dst+=4;
389 + *((long*)dst) = *((long*)src);
390 + src+=4;dst+=4;
391 *(short*)dst = *(short*)src;
392 break;
393 case 15:
394 - *((long*)dst)++ = *((long*)src)++;
395 - *((long*)dst)++ = *((long*)src)++;
396 - *((long*)dst)++ = *((long*)src)++;
397 - *((short*)dst)++ = *((short*)src)++;
398 + *((long*)dst) = *((long*)src);
399 + src+=4;dst+=4;
400 + *((long*)dst) = *((long*)src);
401 + src+=4;dst+=4;
402 + *((long*)dst) = *((long*)src);
403 + src+=4;dst+=4;
404 + *((short*)dst) = *((short*)src);
405 + src+=2;dst+=2;
406 *(char*)dst = *(char*)src;
407 break;
408 }
409 diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/usercopy.c linux-2.6.19.2/arch/cris/arch-v10/lib/usercopy.c
410 --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/usercopy.c 2007-06-03 13:59:39.000000000 +0200
411 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/usercopy.c 2007-06-03 14:25:55.000000000 +0200
412 @@ -88,63 +88,38 @@
413 If you want to check that the allocation was right; then
414 check the equalities in the first comment. It should say
415 "r13=r13, r11=r11, r12=r12". */
416 - __asm__ volatile ("\
417 - .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
418 - .err \n\
419 - .endif \n\
420 -
421 - ;; Save the registers we'll use in the movem process
422 - ;; on the stack.
423 - subq 11*4,$sp
424 - movem $r10,[$sp]
425 -
426 - ;; Now we've got this:
427 - ;; r11 - src
428 - ;; r13 - dst
429 - ;; r12 - n
430 -
431 - ;; Update n for the first loop
432 - subq 44,$r12
433 -
434 -; Since the noted PC of a faulting instruction in a delay-slot of a taken
435 -; branch, is that of the branch target, we actually point at the from-movem
436 -; for this case. There is no ambiguity here; if there was a fault in that
437 -; instruction (meaning a kernel oops), the faulted PC would be the address
438 -; after *that* movem.
439 -
440 -0:
441 - movem [$r11+],$r10
442 - subq 44,$r12
443 - bge 0b
444 - movem $r10,[$r13+]
445 -1:
446 - addq 44,$r12 ;; compensate for last loop underflowing n
447 -
448 - ;; Restore registers from stack
449 - movem [$sp+],$r10
450 -2:
451 - .section .fixup,\"ax\"
452 -
453 -; To provide a correct count in r10 of bytes that failed to be copied,
454 -; we jump back into the loop if the loop-branch was taken. There is no
455 -; performance penalty for sany use; the program will segfault soon enough.
456 -
457 -3:
458 - move.d [$sp],$r10
459 - addq 44,$r10
460 - move.d $r10,[$sp]
461 - jump 0b
462 -4:
463 - movem [$sp+],$r10
464 - addq 44,$r10
465 - addq 44,$r12
466 - jump 2b
467 -
468 - .previous
469 - .section __ex_table,\"a\"
470 - .dword 0b,3b
471 - .dword 1b,4b
472 - .previous"
473 + __asm__ volatile (
474 + ".ifnc %0%1%2%3,$r13$r11$r12$r10 \n\t"
475 + ".err \n\t"
476 + ".endif \n\t"
477 + "subq 11*4,$sp\n\t"
478 + "movem $r10,[$sp]\n\t"
479 + "subq 44,$r12\n\t"
480 + "0:\n\t"
481 + "movem [$r11+],$r10\n\t"
482 + "subq 44,$r12\n\t"
483 + "bge 0b\n\t"
484 + "movem $r10,[$r13+]\n\t"
485 + "1:\n\t"
486 + "addq 44,$r12 \n\t"
487 + "movem [$sp+],$r10\n\t"
488 + "2:\n\t"
489 + ".section .fixup,\"ax\"\n\t"
490 + "3:\n\t"
491 + "move.d [$sp],$r10\n\t"
492 + "addq 44,$r10\n\t"
493 + "move.d $r10,[$sp]\n\t"
494 + "jump 0b\n\t"
495 + "4:\n\t"
496 + "movem [$sp+],$r10\n\t"
497 + "addq 44,$r10\n\t"
498 + "addq 44,$r12\n\t"
499 + "jump 2b\n\t"
500 + ".previous\n\t"
501 + ".section __ex_table,\"a\"\n\t"
502 + ".dword 0b,3b\n\t"
503 + ".dword 1b,4b\n\t"
504 + ".previous\n\t"
505
506 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
507 /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
508 @@ -253,60 +228,32 @@
509 If you want to check that the allocation was right; then
510 check the equalities in the first comment. It should say
511 "r13=r13, r11=r11, r12=r12" */
512 - __asm__ volatile ("
513 - .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
514 - .err \n\
515 - .endif \n\
516 -
517 - ;; Save the registers we'll use in the movem process
518 - ;; on the stack.
519 - subq 11*4,$sp
520 - movem $r10,[$sp]
521 -
522 - ;; Now we've got this:
523 - ;; r11 - src
524 - ;; r13 - dst
525 - ;; r12 - n
526 -
527 - ;; Update n for the first loop
528 - subq 44,$r12
529 -0:
530 - movem [$r11+],$r10
531 -1:
532 - subq 44,$r12
533 - bge 0b
534 - movem $r10,[$r13+]
535 -
536 - addq 44,$r12 ;; compensate for last loop underflowing n
537 -
538 - ;; Restore registers from stack
539 - movem [$sp+],$r10
540 -4:
541 - .section .fixup,\"ax\"
542 -
543 -;; Do not jump back into the loop if we fail. For some uses, we get a
544 -;; page fault somewhere on the line. Without checking for page limits,
545 -;; we don't know where, but we need to copy accurately and keep an
546 -;; accurate count; not just clear the whole line. To do that, we fall
547 -;; down in the code below, proceeding with smaller amounts. It should
548 -;; be kept in mind that we have to cater to code like what at one time
549 -;; was in fs/super.c:
550 -;; i = size - copy_from_user((void *)page, data, size);
551 -;; which would cause repeated faults while clearing the remainder of
552 -;; the SIZE bytes at PAGE after the first fault.
553 -;; A caveat here is that we must not fall through from a failing page
554 -;; to a valid page.
555 -
556 -3:
557 - movem [$sp+],$r10
558 - addq 44,$r12 ;; Get back count before faulting point.
559 - subq 44,$r11 ;; Get back pointer to faulting movem-line.
560 - jump 4b ;; Fall through, pretending the fault didn't happen.
561 -
562 - .previous
563 - .section __ex_table,\"a\"
564 - .dword 1b,3b
565 - .previous"
566 + __asm__ volatile (
567 + ".ifnc %0%1%2%3,$r13$r11$r12$r10 \n\t"
568 + ".err \n\t"
569 + ".endif \n\t"
570 + "subq 11*4,$sp\n\t"
571 + "movem $r10,[$sp]\n\t"
572 + "subq 44,$r12\n\t"
573 + "0:\n\t"
574 + "movem [$r11+],$r10\n\t"
575 + "1:\n\t"
576 + "subq 44,$r12\n\t"
577 + "bge 0b\n\t"
578 + "movem $r10,[$r13+]\n\t"
579 + "addq 44,$r12 \n\t"
580 + "movem [$sp+],$r10\n\t"
581 + "4:\n\t"
582 + ".section .fixup,\"ax\"\n\t"
583 + "3:\n\t"
584 + "movem [$sp+],$r10\n\t"
585 + "addq 44,$r12\n\t"
586 + "subq 44,$r11\n\t"
587 + "jump 4b \n\t"
588 + ".previous\n\t"
589 + ".section __ex_table,\"a\"\n\t"
590 + ".dword 1b,3b\n\t"
591 + ".previous\n\t"
592
593 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
594 /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
595 @@ -425,66 +372,50 @@
596 If you want to check that the allocation was right; then
597 check the equalities in the first comment. It should say
598 something like "r13=r13, r11=r11, r12=r12". */
599 - __asm__ volatile ("
600 - .ifnc %0%1%2,$r13$r12$r10 \n\
601 - .err \n\
602 - .endif \n\
603 -
604 - ;; Save the registers we'll clobber in the movem process
605 - ;; on the stack. Don't mention them to gcc, it will only be
606 - ;; upset.
607 - subq 11*4,$sp
608 - movem $r10,[$sp]
609 -
610 - clear.d $r0
611 - clear.d $r1
612 - clear.d $r2
613 - clear.d $r3
614 - clear.d $r4
615 - clear.d $r5
616 - clear.d $r6
617 - clear.d $r7
618 - clear.d $r8
619 - clear.d $r9
620 - clear.d $r10
621 - clear.d $r11
622 -
623 - ;; Now we've got this:
624 - ;; r13 - dst
625 - ;; r12 - n
626 -
627 - ;; Update n for the first loop
628 - subq 12*4,$r12
629 -0:
630 - subq 12*4,$r12
631 - bge 0b
632 - movem $r11,[$r13+]
633 -1:
634 - addq 12*4,$r12 ;; compensate for last loop underflowing n
635 -
636 - ;; Restore registers from stack
637 - movem [$sp+],$r10
638 -2:
639 - .section .fixup,\"ax\"
640 -3:
641 - move.d [$sp],$r10
642 - addq 12*4,$r10
643 - move.d $r10,[$sp]
644 - clear.d $r10
645 - jump 0b
646 -
647 -4:
648 - movem [$sp+],$r10
649 - addq 12*4,$r10
650 - addq 12*4,$r12
651 - jump 2b
652 -
653 - .previous
654 - .section __ex_table,\"a\"
655 - .dword 0b,3b
656 - .dword 1b,4b
657 - .previous"
658 -
659 + __asm__ volatile (
660 + ".ifnc %0%1%2,$r13$r12$r10\n\t"
661 + ".err \n\t"
662 + ".endif\n\t"
663 + "subq 11*4,$sp\n\t"
664 + "movem $r10,[$sp]\n\t"
665 + "clear.d $r0\n\t"
666 + "clear.d $r1\n\t"
667 + "clear.d $r2\n\t"
668 + "clear.d $r3\n\t"
669 + "clear.d $r4\n\t"
670 + "clear.d $r5\n\t"
671 + "clear.d $r6\n\t"
672 + "clear.d $r7\n\t"
673 + "clear.d $r8\n\t"
674 + "clear.d $r9\n\t"
675 + "clear.d $r10\n\t"
676 + "clear.d $r11\n\t"
677 + "subq 12*4,$r12\n\t"
678 + "0:\n\t"
679 + "subq 12*4,$r12\n\t"
680 + "bge 0b\n\t"
681 + "movem $r11,[$r13+]\n\t"
682 + "1: \n\t"
683 + "addq 12*4,$r12 \n\t"
684 + "movem [$sp+],$r10\n\t"
685 + "2:\n\t"
686 + ".section .fixup,\"ax\"\n\t"
687 + "3:\n\t"
688 + "move.d [$sp],$r10\n\t"
689 + "addq 12*4,$r10\n\t"
690 + "move.d $r10,[$sp]\n\t"
691 + "clear.d $r10\n\t"
692 + "jump 0b\n\t"
693 + "4:\n\t"
694 + "movem [$sp+],$r10\n\t"
695 + "addq 12*4,$r10\n\t"
696 + "addq 12*4,$r12\n\t"
697 + "jump 2b\n\t"
698 + ".previous\n\t"
699 + ".section __ex_table,\"a\"\n\t"
700 + ".dword 0b,3b\n\t"
701 + ".dword 1b,4b\n\t"
702 + ".previous\n\t"
703 /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
704 /* Inputs */ : "0" (dst), "1" (n), "2" (retn)
705 /* Clobber */ : "r11");