luajit: backport softfloat ppc support
[feed/packages.git] / lang / luajit / patches / 050-ppc-softfloat.patch
1 From 71b7bc88341945f13f3951e2bb5fd247b639ff7a Mon Sep 17 00:00:00 2001
2 From: Mike Pall <mike>
3 Date: Sun, 3 Sep 2017 23:20:53 +0200
4 Subject: [PATCH] PPC: Add soft-float support to JIT compiler backend.
5
6 Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
7 Sponsored by Cisco Systems, Inc.
8 ---
9 src/lj_arch.h | 1 -
10 src/lj_asm_ppc.h | 321 ++++++++++++++++++++++++++++++++++++++++-------
11 2 files changed, 278 insertions(+), 44 deletions(-)
12
13 --- a/src/lj_arch.h
14 +++ b/src/lj_arch.h
15 @@ -273,7 +273,6 @@
16 #endif
17
18 #if LJ_ABI_SOFTFP
19 -#define LJ_ARCH_NOJIT 1 /* NYI */
20 #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
21 #else
22 #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
23 --- a/src/lj_asm_ppc.h
24 +++ b/src/lj_asm_ppc.h
25 @@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as,
26 emit_tab(as, pi, rt, left, right);
27 }
28
29 +#if !LJ_SOFTFP
30 /* Fuse to multiply-add/sub instruction. */
31 static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
32 {
33 @@ -245,6 +246,7 @@ static int asm_fusemadd(ASMState *as, IR
34 }
35 return 0;
36 }
37 +#endif
38
39 /* -- Calls --------------------------------------------------------------- */
40
41 @@ -253,13 +255,17 @@ static void asm_gencall(ASMState *as, co
42 {
43 uint32_t n, nargs = CCI_XNARGS(ci);
44 int32_t ofs = 8;
45 - Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
46 + Reg gpr = REGARG_FIRSTGPR;
47 +#if !LJ_SOFTFP
48 + Reg fpr = REGARG_FIRSTFPR;
49 +#endif
50 if ((void *)ci->func)
51 emit_call(as, (void *)ci->func);
52 for (n = 0; n < nargs; n++) { /* Setup args. */
53 IRRef ref = args[n];
54 if (ref) {
55 IRIns *ir = IR(ref);
56 +#if !LJ_SOFTFP
57 if (irt_isfp(ir->t)) {
58 if (fpr <= REGARG_LASTFPR) {
59 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
60 @@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, co
61 emit_spstore(as, ir, r, ofs);
62 ofs += irt_isnum(ir->t) ? 8 : 4;
63 }
64 - } else {
65 + } else
66 +#endif
67 + {
68 if (gpr <= REGARG_LASTGPR) {
69 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
70 ra_leftov(as, gpr, ref);
71 @@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, co
72 }
73 checkmclim(as);
74 }
75 +#if !LJ_SOFTFP
76 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
77 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
78 +#endif
79 }
80
81 /* Setup result reg/sp for call. Evict scratch regs. */
82 @@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as
83 {
84 RegSet drop = RSET_SCRATCH;
85 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
86 +#if !LJ_SOFTFP
87 if ((ci->flags & CCI_NOFPRCLOBBER))
88 drop &= ~RSET_FPR;
89 +#endif
90 if (ra_hasreg(ir->r))
91 rset_clear(drop, ir->r); /* Dest reg handled below. */
92 if (hiop && ra_hasreg((ir+1)->r))
93 @@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as
94 ra_evictset(as, drop); /* Evictions must be performed first. */
95 if (ra_used(ir)) {
96 lua_assert(!irt_ispri(ir->t));
97 - if (irt_isfp(ir->t)) {
98 + if (!LJ_SOFTFP && irt_isfp(ir->t)) {
99 if ((ci->flags & CCI_CASTU64)) {
100 /* Use spill slot or temp slots. */
101 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
102 @@ -377,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns
103
104 /* -- Type conversions ---------------------------------------------------- */
105
106 +#if !LJ_SOFTFP
107 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
108 {
109 RegSet allow = RSET_FPR;
110 @@ -409,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIn
111 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
112 emit_fab(as, PPCI_FADD, tmp, left, right);
113 }
114 +#endif
115
116 static void asm_conv(ASMState *as, IRIns *ir)
117 {
118 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
119 +#if !LJ_SOFTFP
120 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
121 +#endif
122 IRRef lref = ir->op1;
123 - lua_assert(irt_type(ir->t) != st);
124 lua_assert(!(irt_isint64(ir->t) ||
125 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
126 +#if LJ_SOFTFP
127 + /* FP conversions are handled by SPLIT. */
128 + lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
129 + /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
130 +#else
131 + lua_assert(irt_type(ir->t) != st);
132 if (irt_isfp(ir->t)) {
133 Reg dest = ra_dest(as, ir, RSET_FPR);
134 if (stfp) { /* FP to FP conversion. */
135 @@ -476,7 +497,9 @@ static void asm_conv(ASMState *as, IRIns
136 emit_fb(as, PPCI_FCTIWZ, tmp, left);
137 }
138 }
139 - } else {
140 + } else
141 +#endif
142 + {
143 Reg dest = ra_dest(as, ir, RSET_GPR);
144 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
145 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
146 @@ -496,17 +519,41 @@ static void asm_strto(ASMState *as, IRIn
147 {
148 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
149 IRRef args[2];
150 - int32_t ofs;
151 + int32_t ofs = SPOFS_TMP;
152 +#if LJ_SOFTFP
153 + ra_evictset(as, RSET_SCRATCH);
154 + if (ra_used(ir)) {
155 + if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
156 + (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
157 + int i;
158 + for (i = 0; i < 2; i++) {
159 + Reg r = (ir+i)->r;
160 + if (ra_hasreg(r)) {
161 + ra_free(as, r);
162 + ra_modified(as, r);
163 + emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
164 + }
165 + }
166 + ofs = sps_scale(ir->s & ~1);
167 + } else {
168 + Reg rhi = ra_dest(as, ir+1, RSET_GPR);
169 + Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
170 + emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
171 + emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
172 + }
173 + }
174 +#else
175 RegSet drop = RSET_SCRATCH;
176 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
177 ra_evictset(as, drop);
178 + if (ir->s) ofs = sps_scale(ir->s);
179 +#endif
180 asm_guardcc(as, CC_EQ);
181 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
182 args[0] = ir->op1; /* GCstr *str */
183 args[1] = ASMREF_TMP1; /* TValue *n */
184 asm_gencall(as, ci, args);
185 /* Store the result to the spill slot or temp slots. */
186 - ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
187 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
188 }
189
190 @@ -530,7 +577,10 @@ static void asm_tvptr(ASMState *as, Reg
191 Reg src = ra_alloc1(as, ref, allow);
192 emit_setgl(as, src, tmptv.gcr);
193 }
194 - type = ra_allock(as, irt_toitype(ir->t), allow);
195 + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
196 + type = ra_alloc1(as, ref+1, allow);
197 + else
198 + type = ra_allock(as, irt_toitype(ir->t), allow);
199 emit_setgl(as, type, tmptv.it);
200 }
201 }
202 @@ -574,11 +624,27 @@ static void asm_href(ASMState *as, IRIns
203 Reg tisnum = RID_NONE, tmpnum = RID_NONE;
204 IRRef refkey = ir->op2;
205 IRIns *irkey = IR(refkey);
206 + int isk = irref_isk(refkey);
207 IRType1 kt = irkey->t;
208 uint32_t khash;
209 MCLabel l_end, l_loop, l_next;
210
211 rset_clear(allow, tab);
212 +#if LJ_SOFTFP
213 + if (!isk) {
214 + key = ra_alloc1(as, refkey, allow);
215 + rset_clear(allow, key);
216 + if (irkey[1].o == IR_HIOP) {
217 + if (ra_hasreg((irkey+1)->r)) {
218 + tmpnum = (irkey+1)->r;
219 + ra_noweak(as, tmpnum);
220 + } else {
221 + tmpnum = ra_allocref(as, refkey+1, allow);
222 + }
223 + rset_clear(allow, tmpnum);
224 + }
225 + }
226 +#else
227 if (irt_isnum(kt)) {
228 key = ra_alloc1(as, refkey, RSET_FPR);
229 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
230 @@ -588,6 +654,7 @@ static void asm_href(ASMState *as, IRIns
231 key = ra_alloc1(as, refkey, allow);
232 rset_clear(allow, key);
233 }
234 +#endif
235 tmp2 = ra_scratch(as, allow);
236 rset_clear(allow, tmp2);
237
238 @@ -610,7 +677,7 @@ static void asm_href(ASMState *as, IRIns
239 asm_guardcc(as, CC_EQ);
240 else
241 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
242 - if (irt_isnum(kt)) {
243 + if (!LJ_SOFTFP && irt_isnum(kt)) {
244 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
245 emit_condbranch(as, PPCI_BC, CC_GE, l_next);
246 emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
247 @@ -620,7 +687,10 @@ static void asm_href(ASMState *as, IRIns
248 emit_ab(as, PPCI_CMPW, tmp2, key);
249 emit_condbranch(as, PPCI_BC, CC_NE, l_next);
250 }
251 - emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
252 + if (LJ_SOFTFP && ra_hasreg(tmpnum))
253 + emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
254 + else
255 + emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
256 if (!irt_ispri(kt))
257 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
258 }
259 @@ -629,19 +699,19 @@ static void asm_href(ASMState *as, IRIns
260 (((char *)as->mcp-(char *)l_loop) & 0xffffu);
261
262 /* Load main position relative to tab->node into dest. */
263 - khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
264 + khash = isk ? ir_khash(irkey) : 1;
265 if (khash == 0) {
266 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
267 } else {
268 Reg tmphash = tmp1;
269 - if (irref_isk(refkey))
270 + if (isk)
271 tmphash = ra_allock(as, khash, allow);
272 emit_tab(as, PPCI_ADD, dest, dest, tmp1);
273 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
274 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
275 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
276 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
277 - if (irref_isk(refkey)) {
278 + if (isk) {
279 /* Nothing to do. */
280 } else if (irt_isstr(kt)) {
281 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
282 @@ -651,13 +721,19 @@ static void asm_href(ASMState *as, IRIns
283 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
284 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
285 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
286 - if (irt_isnum(kt)) {
287 + if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
288 +#if LJ_SOFTFP
289 + emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
290 + emit_rotlwi(as, dest, tmp1, HASH_ROT1);
291 + emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
292 +#else
293 int32_t ofs = ra_spill(as, irkey);
294 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
295 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
296 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
297 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
298 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
299 +#endif
300 } else {
301 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
302 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
303 @@ -784,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir)
304 case IRT_U8: return PPCI_LBZ;
305 case IRT_I16: return PPCI_LHA;
306 case IRT_U16: return PPCI_LHZ;
307 - case IRT_NUM: return PPCI_LFD;
308 - case IRT_FLOAT: return PPCI_LFS;
309 + case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD;
310 + case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
311 default: return PPCI_LWZ;
312 }
313 }
314 @@ -795,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir)
315 switch (irt_type(ir->t)) {
316 case IRT_I8: case IRT_U8: return PPCI_STB;
317 case IRT_I16: case IRT_U16: return PPCI_STH;
318 - case IRT_NUM: return PPCI_STFD;
319 - case IRT_FLOAT: return PPCI_STFS;
320 + case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD;
321 + case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
322 default: return PPCI_STW;
323 }
324 }
325 @@ -839,7 +915,8 @@ static void asm_fstore(ASMState *as, IRI
326
327 static void asm_xload(ASMState *as, IRIns *ir)
328 {
329 - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
330 + Reg dest = ra_dest(as, ir,
331 + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
332 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
333 if (irt_isi8(ir->t))
334 emit_as(as, PPCI_EXTSB, dest, dest);
335 @@ -857,7 +934,8 @@ static void asm_xstore_(ASMState *as, IR
336 Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
337 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
338 } else {
339 - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
340 + Reg src = ra_alloc1(as, ir->op2,
341 + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
342 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
343 rset_exclude(RSET_GPR, src), ofs);
344 }
345 @@ -871,10 +949,19 @@ static void asm_ahuvload(ASMState *as, I
346 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
347 RegSet allow = RSET_GPR;
348 int32_t ofs = AHUREF_LSX;
349 + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
350 + t.irt = IRT_NUM;
351 + if (ra_used(ir+1)) {
352 + type = ra_dest(as, ir+1, allow);
353 + rset_clear(allow, type);
354 + }
355 + ofs = 0;
356 + }
357 if (ra_used(ir)) {
358 - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
359 - if (!irt_isnum(t)) ofs = 0;
360 - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
361 + lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
362 + irt_isint(ir->t) || irt_isaddr(ir->t));
363 + if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
364 + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
365 rset_clear(allow, dest);
366 }
367 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
368 @@ -883,12 +970,13 @@ static void asm_ahuvload(ASMState *as, I
369 asm_guardcc(as, CC_GE);
370 emit_ab(as, PPCI_CMPLW, type, tisnum);
371 if (ra_hasreg(dest)) {
372 - if (ofs == AHUREF_LSX) {
373 + if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
374 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
375 (idx&255)), (idx>>8)));
376 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
377 } else {
378 - emit_fai(as, PPCI_LFD, dest, idx, ofs);
379 + emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
380 + ofs+4*LJ_SOFTFP);
381 }
382 }
383 } else {
384 @@ -911,7 +999,7 @@ static void asm_ahustore(ASMState *as, I
385 int32_t ofs = AHUREF_LSX;
386 if (ir->r == RID_SINK)
387 return;
388 - if (irt_isnum(ir->t)) {
389 + if (!LJ_SOFTFP && irt_isnum(ir->t)) {
390 src = ra_alloc1(as, ir->op2, RSET_FPR);
391 } else {
392 if (!irt_ispri(ir->t)) {
393 @@ -919,11 +1007,14 @@ static void asm_ahustore(ASMState *as, I
394 rset_clear(allow, src);
395 ofs = 0;
396 }
397 - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
398 + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
399 + type = ra_alloc1(as, (ir+1)->op2, allow);
400 + else
401 + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
402 rset_clear(allow, type);
403 }
404 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
405 - if (irt_isnum(ir->t)) {
406 + if (!LJ_SOFTFP && irt_isnum(ir->t)) {
407 if (ofs == AHUREF_LSX) {
408 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
409 emit_slwi(as, RID_TMP, (idx>>8), 3);
410 @@ -948,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIn
411 IRType1 t = ir->t;
412 Reg dest = RID_NONE, type = RID_NONE, base;
413 RegSet allow = RSET_GPR;
414 + int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
415 + if (hiop)
416 + t.irt = IRT_NUM;
417 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
418 - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
419 + lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
420 lua_assert(LJ_DUALNUM ||
421 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
422 +#if LJ_SOFTFP
423 + lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
424 + if (hiop && ra_used(ir+1)) {
425 + type = ra_dest(as, ir+1, allow);
426 + rset_clear(allow, type);
427 + }
428 +#else
429 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
430 dest = ra_scratch(as, RSET_FPR);
431 asm_tointg(as, ir, dest);
432 t.irt = IRT_NUM; /* Continue with a regular number type check. */
433 - } else if (ra_used(ir)) {
434 + } else
435 +#endif
436 + if (ra_used(ir)) {
437 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
438 - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
439 + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
440 rset_clear(allow, dest);
441 base = ra_alloc1(as, REF_BASE, allow);
442 rset_clear(allow, base);
443 - if ((ir->op2 & IRSLOAD_CONVERT)) {
444 + if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
445 if (irt_isint(t)) {
446 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
447 dest = ra_scratch(as, RSET_FPR);
448 @@ -994,10 +1097,13 @@ dotypecheck:
449 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
450 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
451 asm_guardcc(as, CC_GE);
452 - emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum);
453 +#if !LJ_SOFTFP
454 type = RID_TMP;
455 +#endif
456 + emit_ab(as, PPCI_CMPLW, type, tisnum);
457 }
458 - if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4);
459 + if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
460 + base, ofs-(LJ_SOFTFP?0:4));
461 } else {
462 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
463 asm_guardcc(as, CC_NE);
464 @@ -1119,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns
465
466 /* -- Arithmetic and logic operations ------------------------------------- */
467
468 +#if !LJ_SOFTFP
469 static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
470 {
471 Reg dest = ra_dest(as, ir, RSET_FPR);
472 @@ -1146,13 +1253,17 @@ static void asm_fpmath(ASMState *as, IRI
473 else
474 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
475 }
476 +#endif
477
478 static void asm_add(ASMState *as, IRIns *ir)
479 {
480 +#if !LJ_SOFTFP
481 if (irt_isnum(ir->t)) {
482 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
483 asm_fparith(as, ir, PPCI_FADD);
484 - } else {
485 + } else
486 +#endif
487 + {
488 Reg dest = ra_dest(as, ir, RSET_GPR);
489 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
490 PPCIns pi;
491 @@ -1191,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns
492
493 static void asm_sub(ASMState *as, IRIns *ir)
494 {
495 +#if !LJ_SOFTFP
496 if (irt_isnum(ir->t)) {
497 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
498 asm_fparith(as, ir, PPCI_FSUB);
499 - } else {
500 + } else
501 +#endif
502 + {
503 PPCIns pi = PPCI_SUBF;
504 Reg dest = ra_dest(as, ir, RSET_GPR);
505 Reg left, right;
506 @@ -1220,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns
507
508 static void asm_mul(ASMState *as, IRIns *ir)
509 {
510 +#if !LJ_SOFTFP
511 if (irt_isnum(ir->t)) {
512 asm_fparith(as, ir, PPCI_FMUL);
513 - } else {
514 + } else
515 +#endif
516 + {
517 PPCIns pi = PPCI_MULLW;
518 Reg dest = ra_dest(as, ir, RSET_GPR);
519 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
520 @@ -1250,9 +1367,12 @@ static void asm_mul(ASMState *as, IRIns
521
522 static void asm_neg(ASMState *as, IRIns *ir)
523 {
524 +#if !LJ_SOFTFP
525 if (irt_isnum(ir->t)) {
526 asm_fpunary(as, ir, PPCI_FNEG);
527 - } else {
528 + } else
529 +#endif
530 + {
531 Reg dest, left;
532 PPCIns pi = PPCI_NEG;
533 if (as->flagmcp == as->mcp) {
534 @@ -1563,9 +1683,40 @@ static void asm_bitshift(ASMState *as, I
535 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
536 #define asm_bror(as, ir) lua_assert(0)
537
538 +#if LJ_SOFTFP
539 +static void asm_sfpmin_max(ASMState *as, IRIns *ir)
540 +{
541 + CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
542 + IRRef args[4];
543 + MCLabel l_right, l_end;
544 + Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
545 + Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
546 + Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
547 + PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
548 + righthi = (lefthi >> 8); lefthi &= 255;
549 + rightlo = (leftlo >> 8); leftlo &= 255;
550 + args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
551 + args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
552 + l_end = emit_label(as);
553 + if (desthi != righthi) emit_mr(as, desthi, righthi);
554 + if (destlo != rightlo) emit_mr(as, destlo, rightlo);
555 + l_right = emit_label(as);
556 + if (l_end != l_right) emit_jmp(as, l_end);
557 + if (desthi != lefthi) emit_mr(as, desthi, lefthi);
558 + if (destlo != leftlo) emit_mr(as, destlo, leftlo);
559 + if (l_right == as->mcp+1) {
560 + cond ^= 4; l_right = l_end; ++as->mcp;
561 + }
562 + emit_condbranch(as, PPCI_BC, cond, l_right);
563 + ra_evictset(as, RSET_SCRATCH);
564 + emit_cmpi(as, RID_RET, 1);
565 + asm_gencall(as, &ci, args);
566 +}
567 +#endif
568 +
569 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
570 {
571 - if (irt_isnum(ir->t)) {
572 + if (!LJ_SOFTFP && irt_isnum(ir->t)) {
573 Reg dest = ra_dest(as, ir, RSET_FPR);
574 Reg tmp = dest;
575 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
576 @@ -1653,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, I
577 static void asm_comp(ASMState *as, IRIns *ir)
578 {
579 PPCCC cc = asm_compmap[ir->o];
580 - if (irt_isnum(ir->t)) {
581 + if (!LJ_SOFTFP && irt_isnum(ir->t)) {
582 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
583 right = (left >> 8); left &= 255;
584 asm_guardcc(as, (cc >> 4));
585 @@ -1674,6 +1825,44 @@ static void asm_comp(ASMState *as, IRIns
586
587 #define asm_equal(as, ir) asm_comp(as, ir)
588
589 +#if LJ_SOFTFP
590 +/* SFP comparisons. */
591 +static void asm_sfpcomp(ASMState *as, IRIns *ir)
592 +{
593 + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
594 + RegSet drop = RSET_SCRATCH;
595 + Reg r;
596 + IRRef args[4];
597 + args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
598 + args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
599 +
600 + for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
601 + if (!rset_test(as->freeset, r) &&
602 + regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
603 + rset_clear(drop, r);
604 + }
605 + ra_evictset(as, drop);
606 + asm_setupresult(as, ir, ci);
607 + switch ((IROp)ir->o) {
608 + case IR_ULT:
609 + asm_guardcc(as, CC_EQ);
610 + emit_ai(as, PPCI_CMPWI, RID_RET, 0);
611 + case IR_ULE:
612 + asm_guardcc(as, CC_EQ);
613 + emit_ai(as, PPCI_CMPWI, RID_RET, 1);
614 + break;
615 + case IR_GE: case IR_GT:
616 + asm_guardcc(as, CC_EQ);
617 + emit_ai(as, PPCI_CMPWI, RID_RET, 2);
618 + default:
619 + asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
620 + emit_ai(as, PPCI_CMPWI, RID_RET, 0);
621 + break;
622 + }
623 + asm_gencall(as, ci, args);
624 +}
625 +#endif
626 +
627 #if LJ_HASFFI
628 /* 64 bit integer comparisons. */
629 static void asm_comp64(ASMState *as, IRIns *ir)
630 @@ -1703,19 +1892,36 @@ static void asm_comp64(ASMState *as, IRI
631 /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
632 static void asm_hiop(ASMState *as, IRIns *ir)
633 {
634 -#if LJ_HASFFI
635 +#if LJ_HASFFI || LJ_SOFTFP
636 /* HIOP is marked as a store because it needs its own DCE logic. */
637 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
638 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
639 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
640 as->curins--; /* Always skip the CONV. */
641 +#if LJ_HASFFI && !LJ_SOFTFP
642 if (usehi || uselo)
643 asm_conv64(as, ir);
644 return;
645 +#endif
646 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
647 as->curins--; /* Always skip the loword comparison. */
648 +#if LJ_SOFTFP
649 + if (!irt_isint(ir->t)) {
650 + asm_sfpcomp(as, ir-1);
651 + return;
652 + }
653 +#endif
654 +#if LJ_HASFFI
655 asm_comp64(as, ir);
656 +#endif
657 + return;
658 +#if LJ_SOFTFP
659 + } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
660 + as->curins--; /* Always skip the loword min/max. */
661 + if (uselo || usehi)
662 + asm_sfpmin_max(as, ir-1);
663 return;
664 +#endif
665 } else if ((ir-1)->o == IR_XSTORE) {
666 as->curins--; /* Handle both stores here. */
667 if ((ir-1)->r != RID_SINK) {
668 @@ -1726,14 +1932,27 @@ static void asm_hiop(ASMState *as, IRIns
669 }
670 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
671 switch ((ir-1)->o) {
672 +#if LJ_HASFFI
673 case IR_ADD: as->curins--; asm_add64(as, ir); break;
674 case IR_SUB: as->curins--; asm_sub64(as, ir); break;
675 case IR_NEG: as->curins--; asm_neg64(as, ir); break;
676 +#endif
677 +#if LJ_SOFTFP
678 + case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
679 + case IR_STRTO:
680 + if (!uselo)
681 + ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
682 + break;
683 +#endif
684 case IR_CALLN:
685 + case IR_CALLS:
686 case IR_CALLXS:
687 if (!uselo)
688 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
689 break;
690 +#if LJ_SOFTFP
691 + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
692 +#endif
693 case IR_CNEWI:
694 /* Nothing to do here. Handled by lo op itself. */
695 break;
696 @@ -1797,8 +2016,19 @@ static void asm_stack_restore(ASMState *
697 if ((sn & SNAP_NORESTORE))
698 continue;
699 if (irt_isnum(ir->t)) {
700 +#if LJ_SOFTFP
701 + Reg tmp;
702 + RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
703 + lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
704 + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
705 + emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
706 + if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
707 + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
708 + emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
709 +#else
710 Reg src = ra_alloc1(as, ref, RSET_FPR);
711 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
712 +#endif
713 } else {
714 Reg type;
715 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
716 @@ -1811,6 +2041,10 @@ static void asm_stack_restore(ASMState *
717 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
718 if (s == 0) continue; /* Do not overwrite link to previous frame. */
719 type = ra_allock(as, (int32_t)(*flinks--), allow);
720 +#if LJ_SOFTFP
721 + } else if ((sn & SNAP_SOFTFPNUM)) {
722 + type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
723 +#endif
724 } else {
725 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
726 }
727 @@ -1947,14 +2181,15 @@ static Reg asm_setup_call_slots(ASMState
728 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
729 asm_collectargs(as, ir, ci, args);
730 for (i = 0; i < nargs; i++)
731 - if (args[i] && irt_isfp(IR(args[i])->t)) {
732 + if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
733 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
734 } else {
735 if (ngpr > 0) ngpr--; else nslots++;
736 }
737 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
738 as->evenspill = nslots;
739 - return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
740 + return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
741 + REGSP_HINT(RID_RET);
742 }
743
744 static void asm_setup_target(ASMState *as)