From c578c495bb12b3b558e89b6b304a1b92756c544e Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sat, 11 Feb 2017 18:00:56 -0500 Subject: [PATCH] Edit PowerPC assembly for .and, .cms, .ior, .xor, .zer Remove one addi instruction from some loops. These loops had increased 2 pointers, they now increase 1 index. I must initialize the index, so I add "li r6, 0" before each loop. Change .zer to use subf instead of neg, add. Change .xor to take the size on the real stack, as .and and .or have done since 81c677d. --- mach/powerpc/libem/and.s | 24 +++++++++++------------- mach/powerpc/libem/cms.s | 21 ++++++++++----------- mach/powerpc/libem/ior.s | 24 +++++++++++------------- mach/powerpc/libem/xor.s | 24 ++++++++++++------------ mach/powerpc/libem/zer.s | 18 ++++++++---------- mach/powerpc/ncg/table | 12 +++++++----- 6 files changed, 59 insertions(+), 64 deletions(-) diff --git a/mach/powerpc/libem/and.s b/mach/powerpc/libem/and.s index cb4e1e54a..cf5feee4d 100644 --- a/mach/powerpc/libem/and.s +++ b/mach/powerpc/libem/and.s @@ -5,20 +5,18 @@ .define .and .and: - lwz r3, 0 (sp) ! r3 = size - addi sp, sp, 4 - - mr r4, sp ! r4 = ptr to set a - add r5, sp, r3 ! r5 = ptr to set b - srwi r6, r3, 2 - mtspr ctr, r6 ! ctr = r3 / 4 + lwz r3, 0(sp) ! r3 = size + srwi r7, r3, 2 + mtspr ctr, r7 ! ctr = size / 4 + addi r4, sp, 4 ! r4 = ptr to set a + add r5, r4, r3 ! r5 = ptr to set b + li r6, 0 ! r6 = index 1: - lwz r7, 0(r4) - lwz r8, 0(r5) + lwzx r7, r4, r6 + lwzx r8, r5, r6 and r8, r7, r8 ! intersection of words - stw r8, 0(r5) - addi r4, r4, 4 - addi r5, r5, 4 + stwx r8, r5, r6 + addi r6, r6, 4 bdnz 1b ! loop ctr times - add sp, sp, r3 + mr sp, r5 blr diff --git a/mach/powerpc/libem/cms.s b/mach/powerpc/libem/cms.s index 30aaccd20..5bcd3106d 100644 --- a/mach/powerpc/libem/cms.s +++ b/mach/powerpc/libem/cms.s @@ -7,24 +7,23 @@ .define .cms .cms: + srwi r7, r3, 2 + mtspr ctr, r7 ! ctr = size / 4 mr r4, sp ! r4 = ptr to set a add r5, sp, r3 ! r5 = ptr to set b - mr r6, r3 ! r6 = size - srwi r3, r3, 2 - mtspr ctr, r3 ! ctr = size / 4 + li r6, 0 ! r6 = index + add r9, r5, r3 ! r9 = future sp 1: - lwz r7, 0(r4) - lwz r8, 0(r5) + lwzx r7, r4, r6 + lwzx r8, r5, r6 cmpw cr0, r7, r8 ! compare words in sets - addi r4, r4, 4 - addi r5, r5, 4 + addi r6, r6, 4 bne cr0, 2f ! branch if not equal bdnz 1b ! loop ctr times - addi r3, r0, 0 ! equal: return 0 + li r3, 0 ! equal: return 0 b 3f 2: - addi r3, r0, 1 ! not equal: return 1 + li r3, 1 ! not equal: return 1 3: - slwi r6, r6, 1 ! r6 = size * 2 - add sp, sp, r6 ! remove sets from stack + mr sp, r9 ! remove sets from stack blr diff --git a/mach/powerpc/libem/ior.s b/mach/powerpc/libem/ior.s index e6cd1844e..b4b0b3fae 100644 --- a/mach/powerpc/libem/ior.s +++ b/mach/powerpc/libem/ior.s @@ -5,20 +5,18 @@ .define .ior .ior: - lwz r3, 0 (sp) - addi sp, sp, 4 - - mr r4, sp ! r4 = ptr to set a - add r5, sp, r3 ! r5 = ptr to set b - srwi r6, r3, 2 - mtspr ctr, r6 ! ctr = r3 / 4 + lwz r3, 0(sp) ! r3 = size + srwi r7, r3, 2 + mtspr ctr, r7 ! ctr = size / 4 + addi r4, sp, 4 ! r4 = ptr to set a + add r5, r4, r3 ! r5 = ptr to set b + li r6, 0 ! r6 = index 1: - lwz r7, 0(r4) - lwz r8, 0(r5) + lwzx r7, r4, r6 + lwzx r8, r5, r6 or r8, r7, r8 ! union of words - stw r8, 0(r5) - addi r4, r4, 4 - addi r5, r5, 4 + stwx r8, r5, r6 + addi r6, r6, 4 bdnz 1b ! loop ctr times - add sp, sp, r3 + mr sp, r5 blr diff --git a/mach/powerpc/libem/xor.s b/mach/powerpc/libem/xor.s index acb02a032..6dc4e7afc 100644 --- a/mach/powerpc/libem/xor.s +++ b/mach/powerpc/libem/xor.s @@ -1,22 +1,22 @@ .sect .text ! Set symmetric difference. -! Stack: ( b a -- a/b ) -! With r3 = size of set +! Stack: ( b a size -- a/b ) .define .xor .xor: - mr r4, sp ! r4 = ptr to set a - add r5, sp, r3 ! r5 = ptr to set b - srwi r6, r3, 2 - mtspr ctr, r6 ! ctr = r3 / 4 + lwz r3, 0(sp) ! r3 = size + srwi r7, r3, 2 + mtspr ctr, r7 ! ctr = size / 4 + addi r4, sp, 4 ! r4 = ptr to set a + add r5, r4, r3 ! r5 = ptr to set b + li r6, 0 ! r6 = index 1: - lwz r7, 0(r4) - lwz r8, 0(r5) + lwzx r7, r4, r6 + lwzx r8, r5, r6 xor r8, r7, r8 ! symmetric difference of words - stw r8, 0(r5) - addi r4, r4, 4 - addi r5, r5, 4 + stwx r8, r5, r6 + addi r6, r6, 4 bdnz 1b ! loop ctr times - add sp, sp, r3 + mr sp, r5 blr diff --git a/mach/powerpc/libem/zer.s b/mach/powerpc/libem/zer.s index a47a150cc..d35744bba 100644 --- a/mach/powerpc/libem/zer.s +++ b/mach/powerpc/libem/zer.s @@ -5,17 +5,15 @@ .define .zer .zer: - lwz r3, 0(sp) - addi sp, sp, 4 - - srwi r7, r3, 2 - li r4, 0 ! r4 = zero - neg r5, r3 - add sp, sp, r5 ! allocate set - mr r6, sp ! r6 = ptr to set - mtspr ctr, r7 ! ctr = r3 / 4 + lwz r3, 0(sp) ! r3 = size + srwi r5, r3, 2 + mtspr ctr, r5 ! ctr = word size - 4 + li r4, 0 ! r4 = 0 + addi sp, sp, 4 + subf sp, r3, sp ! sp = ptr to new set + li r6, 0 ! r6 = index 1: - stw r4, 0(r6) ! store zero in set + stwx r4, sp, r6 ! store zero in set addi r6, r6, 4 bdnz 1b ! loop ctr times blr diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 24822482c..1ba20a85e 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -1374,11 +1374,13 @@ PATTERNS yields {XOR_RC, %2, lo(%1.val)} pat xor defined($1) /* XOR set */ - with STACK - kills ALL - gen - move {CONST, $1}, R3 - bl {LABEL, ".xor"} + leaving + loc $1 + cal ".xor" + + pat xor !defined($1) + leaving + cal ".xor" pat com $1==INT32 /* NOT word */ with AND_RR