diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index ac84e3b0f..7a0726b80 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do acklibrary { name = "lib_"..plat, srcs = { - "./*.s", -- trp.s + "./*.s", -- cfu8.s }, vars = { plat = plat }, deps = { diff --git a/mach/powerpc/libem/cfu8.s b/mach/powerpc/libem/cfu8.s index 915f84dd2..fd69ff521 100644 --- a/mach/powerpc/libem/cfu8.s +++ b/mach/powerpc/libem/cfu8.s @@ -1,3 +1,5 @@ +.sect .text; .sect .rom; .sect .data; .sect .bss + .sect .text ! Converts a 64-bit double into a 32-bit unsigned integer. @@ -6,32 +8,40 @@ .define .cfu8 .cfu8: - lis r3, ha16[.fd_00000000] - lfd f0, lo16[.fd_00000000](r3) ! f0 = 0.0 - - lfd f1, 0(sp) ! value to be converted - - lis r3, ha16[.fd_FFFFFFFF] - lfd f3, lo16[.fd_FFFFFFFF](r3) ! f3 = 0xFFFFFFFF - - lis r3, ha16[.fd_80000000] - lfd f4, lo16[.fd_80000000](r3) ! f4 = 0x80000000 - - fsel f2, f1, f1, f0 - fsub f5, f3, f1 - fsel f2, f5, f2, f3 - fsub f5, f2, f4 - fcmpu cr0, f2, f4 - fsel f2, f5, f5, f2 - fctiwz f2, f2 - - stfd f2, 0(sp) - addi sp, sp, 4 - - bltlr - - lwz r3, 0(sp) - xoris r3, r3, 0x8000 - stw r3, 0(sp) - + lfd f1, 0(sp) ! f1 = value to convert + lis r3, ha16[.fs_80000000] + lfs f2, lo16[.fs_80000000](r3) ! f2 = 2**31 + fsub f1, f1, f2 + fctiwz f1, f1 ! convert value - 2**31 + stfd f1, 0(sp) + lwz r3, 4(sp) + xoris r3, r3, 0x8000 ! add 2**31 + stw r3, 4(sp) + addi sp, sp, 4 blr + +.sect .rom +.fs_80000000: + !float 2.147483648e+9 sz 4 + .data1 0117,00,00,00 + +! Freescale and IBM provide an example using fsel to select value or +! value - 2**31 for fctiwz. The following code adapts Freescale's +! _Programming Environments Manual for 32-Bit Implementations of the +! PowerPC Architecture_, section C.3.2, pdf page 557. +! +! Given f2 = value clamped from 0 to 2**32 - 1, f4 = 2**31, then +! fsub f5, f2, f4 +! fcmpu cr2, f2, f4 +! fsel f2, f5, f5, f2 +! fctiwz f2, f2 +! stfdu f2, 0(sp) +! lwz r3, 4(sp) +! blt cr2, 1f +! xoris r3, r3, 0x8000 +! 1: yields r3 = the converted value. +! +! Debian's clang 3.5.0-10 and gcc 4.9.2-10 don't clamp the value +! before conversion. They avoid fsel and put the conditional branch +! before fctwiz. PowerPC 601 lacks fsel (but kernel might trap and +! emulate fsel). PowerPC 603, 604, G3, G4, G5 have fsel. diff --git a/mach/powerpc/libem/fd_80000000.s b/mach/powerpc/libem/fd_80000000.s deleted file mode 100644 index 5c153bba8..000000000 --- a/mach/powerpc/libem/fd_80000000.s +++ /dev/null @@ -1,10 +0,0 @@ -.sect .text; .sect .rom; .sect .data; .sect .bss - -.sect .rom - -! Contains a handy double-precision 0x80000000. - -.define .fd_80000000 -.fd_80000000: - !float 2.147483648e+9 sz 8 - .data1 0101,0340,00,00,00,00,00,00 diff --git a/mach/powerpc/libem/fd_FFFFFFFF.s b/mach/powerpc/libem/fd_FFFFFFFF.s deleted file mode 100644 index 88cf04bd9..000000000 --- a/mach/powerpc/libem/fd_FFFFFFFF.s +++ /dev/null @@ -1,10 +0,0 @@ -.sect .text; .sect .rom; .sect .data; .sect .bss - -.sect .rom - -! Contains a handy double-precision 0xFFFFFFFF. - -.define .fd_FFFFFFFF -.fd_FFFFFFFF: - !float 4.294967295e+9 sz 8 - .data1 0101,0357,0377,0377,0377,0340,00,00