[BACK]Return to blockio.S CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / arch / arm / arm

Annotation of src/sys/arch/arm/arm/blockio.S, Revision 1.5

1.5     ! briggs      1: /*     $NetBSD: blockio.S,v 1.4 2001/06/02 11:15:56 bjh21 Exp $        */
1.1       bjh21       2:
                      3: /*
1.3       bjh21       4:  * Copyright (c) 2001 Ben Harris.
1.1       bjh21       5:  * Copyright (c) 1994 Mark Brinicombe.
                      6:  * Copyright (c) 1994 Brini.
                      7:  * All rights reserved.
                      8:  *
                      9:  * This code is derived from software written for Brini by Mark Brinicombe
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by Brini.
                     22:  * 4. The name of the company nor the name of the author may be used to
                     23:  *    endorse or promote products derived from this software without specific
                     24:  *    prior written permission.
                     25:  *
                     26:  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
                     27:  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
                     28:  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     29:  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
                     30:  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
                     31:  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
                     32:  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     33:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     34:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     35:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     36:  * SUCH DAMAGE.
                     37:  *
                     38:  * RiscBSD kernel project
                     39:  *
                     40:  * blockio.S
                     41:  *
                     42:  * optimised block read/write from/to IO routines.
                     43:  *
                     44:  * Created      : 08/10/94
                     45:  * Modified    : 22/01/99  -- R.Earnshaw
                     46:  *                            Faster, and small tweaks for StrongARM
                     47:  */
                     48:
                     49: #include <machine/asm.h>
1.2       bjh21      50:
1.5     ! briggs     51: RCSID("$NetBSD: blockio.S,v 1.4 2001/06/02 11:15:56 bjh21 Exp $")
1.2       bjh21      52:
                     53: /*
                     54:  * Read bytes from an I/O address into a block of memory
                     55:  *
                     56:  * r0 = address to read from (IO)
                     57:  * r1 = address to write to (memory)
                     58:  * r2 = length
                     59:  */
                     60:
                     61: /* This code will look very familiar if you've read _memcpy(). */
                     62: ENTRY(read_multi_1)
1.4       bjh21      63:        mov     ip, sp
                     64:        stmfd   sp!, {fp, ip, lr, pc}
                     65:        sub     fp, ip, #4
1.2       bjh21      66:        subs    r2, r2, #4              /* r2 = length - 4 */
1.5     ! briggs     67:        blt     .Lrm1_l4                        /* less than 4 bytes */
1.2       bjh21      68:        ands    r12, r1, #3
1.5     ! briggs     69:        beq     .Lrm1_main              /* aligned destination */
1.2       bjh21      70:        rsb     r12, r12, #4
                     71:        cmp     r12, #2
                     72:        ldrb    r3, [r0]
                     73:        strb    r3, [r1], #1
                     74:        ldrgeb  r3, [r0]
                     75:        strgeb  r3, [r1], #1
                     76:        ldrgtb  r3, [r0]
                     77:        strgtb  r3, [r1], #1
                     78:        subs    r2, r2, r12
1.5     ! briggs     79:        blt     .Lrm1_l4
        !            80: .Lrm1_main:
        !            81: .Lrm1loop:
1.2       bjh21      82:        ldrb    r3, [r0]
                     83:        ldrb    r12, [r0]
                     84:        orr     r3, r3, r12, lsl #8
                     85:        ldrb    r12, [r0]
                     86:        orr     r3, r3, r12, lsl #16
                     87:        ldrb    r12, [r0]
                     88:        orr     r3, r3, r12, lsl #24
                     89:        str     r3, [r1], #4
                     90:        subs    r2, r2, #4
1.5     ! briggs     91:        bge     .Lrm1loop
        !            92: .Lrm1_l4:
1.2       bjh21      93:        adds    r2, r2, #4                      /* r2 = length again */
1.4       bjh21      94:        ldmeqdb fp, {fp, sp, pc}
1.2       bjh21      95:        moveq   pc, r14
                     96:        cmp     r2, #2
                     97:        ldrb    r3, [r0]
                     98:        strb    r3, [r1], #1
                     99:        ldrgeb  r3, [r0]
                    100:        strgeb  r3, [r1], #1
                    101:        ldrgtb  r3, [r0]
                    102:        strgtb  r3, [r1], #1
1.4       bjh21     103:        ldmdb   fp, {fp, sp, pc}
1.3       bjh21     104:
                    105: /*
                    106:  * Write bytes to an I/O address from a block of memory
                    107:  *
                    108:  * r0 = address to write to (IO)
                    109:  * r1 = address to read from (memory)
                    110:  * r2 = length
                    111:  */
                    112:
                    113: /* This code will look very familiar if you've read _memcpy(). */
                    114: ENTRY(write_multi_1)
1.4       bjh21     115:        mov     ip, sp
                    116:        stmfd   sp!, {fp, ip, lr, pc}
                    117:        sub     fp, ip, #4
1.3       bjh21     118:        subs    r2, r2, #4              /* r2 = length - 4 */
1.5     ! briggs    119:        blt     .Lwm1_l4                /* less than 4 bytes */
1.3       bjh21     120:        ands    r12, r1, #3
1.5     ! briggs    121:        beq     .Lwm1_main              /* aligned source */
1.3       bjh21     122:        rsb     r12, r12, #4
                    123:        cmp     r12, #2
                    124:        ldrb    r3, [r1], #1
                    125:        strb    r3, [r0]
                    126:        ldrgeb  r3, [r1], #1
                    127:        strgeb  r3, [r0]
                    128:        ldrgtb  r3, [r1], #1
                    129:        strgtb  r3, [r0]
                    130:        subs    r2, r2, r12
1.5     ! briggs    131:        blt     .Lwm1_l4
        !           132: .Lwm1_main:
        !           133: .Lwm1loop:
1.3       bjh21     134:        ldr     r3, [r1], #4
                    135:        strb    r3, [r0]
                    136:        mov     r3, r3, lsr #8
                    137:        strb    r3, [r0]
                    138:        mov     r3, r3, lsr #8
                    139:        strb    r3, [r0]
                    140:        mov     r3, r3, lsr #8
                    141:        strb    r3, [r0]
                    142:        subs    r2, r2, #4
1.5     ! briggs    143:        bge     .Lwm1loop
        !           144: .Lwm1_l4:
1.3       bjh21     145:        adds    r2, r2, #4                      /* r2 = length again */
1.4       bjh21     146:        ldmeqdb fp, {fp, sp, pc}
1.3       bjh21     147:        cmp     r2, #2
                    148:        ldrb    r3, [r1], #1
                    149:        strb    r3, [r0]
                    150:        ldrgeb  r3, [r1], #1
                    151:        strgeb  r3, [r0]
                    152:        ldrgtb  r3, [r1], #1
                    153:        strgtb  r3, [r0]
1.4       bjh21     154:        ldmdb   fp, {fp, sp, pc}
1.1       bjh21     155:
                    156: /*
                    157:  * Reads short ints (16 bits) from an I/O address into a block of memory
                    158:  *
                    159:  * r0 = address to read from (IO)
                    160:  * r1 = address to write to (memory)
                    161:  * r2 = length
                    162:  */
                    163:
                    164: ENTRY(insw)
                    165: /* Make sure that we have a positive length */
                    166:        cmp     r2, #0x00000000
                    167:        movle   pc, lr
                    168:
                    169: /* If the destination address and the size is word aligned, do it fast */
                    170:
                    171:        tst     r2, #0x00000001
                    172:        tsteq   r1, #0x00000003
1.5     ! briggs    173:        beq     .Lfastinsw
1.1       bjh21     174:
                    175: /* Non aligned insw */
                    176:
1.5     ! briggs    177: .Linswloop:
1.1       bjh21     178:        ldr     r3, [r0]
                    179:        subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
                    180:        strb    r3, [r1], #0x0001
                    181:        mov     r3, r3, lsr #8
                    182:        strb    r3, [r1], #0x0001
1.5     ! briggs    183:        bgt     .Linswloop
1.1       bjh21     184:
                    185:        mov     pc, lr
                    186:
                    187: /* Word aligned insw */
                    188:
1.5     ! briggs    189: .Lfastinsw:
1.1       bjh21     190:
1.5     ! briggs    191: .Lfastinswloop:
1.1       bjh21     192:        ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
                    193:                                         * word accesses */
                    194:        ldr     ip, [r0]
                    195:        mov     r3, r3, lsr #16         /* Put the two shorts together */
                    196:        orr     r3, r3, ip, lsl #16
                    197:        str     r3, [r1], #0x0004       /* Store */
                    198:        subs    r2, r2, #0x00000002     /* Next */
1.5     ! briggs    199:        bgt     .Lfastinswloop
1.1       bjh21     200:
                    201:        mov     pc, lr
                    202:
                    203:
                    204: /*
                    205:  * Writes short ints (16 bits) from a block of memory to an I/O address
                    206:  *
                    207:  * r0 = address to write to (IO)
                    208:  * r1 = address to read from (memory)
                    209:  * r2 = length
                    210:  */
                    211:
                    212: ENTRY(outsw)
                    213: /* Make sure that we have a positive length */
                    214:        cmp     r2, #0x00000000
                    215:        movle   pc, lr
                    216:
                    217: /* If the destination address and the size is word aligned, do it fast */
                    218:
                    219:        tst     r2, #0x00000001
                    220:        tsteq   r1, #0x00000003
1.5     ! briggs    221:        beq     .Lfastoutsw
1.1       bjh21     222:
                    223: /* Non aligned outsw */
                    224:
1.5     ! briggs    225: .Loutswloop:
1.1       bjh21     226:        ldrb    r3, [r1], #0x0001
                    227:        ldrb    ip, [r1], #0x0001
                    228:        subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
                    229:        orr     r3, r3, ip, lsl #8
                    230:        orr     r3, r3, r3, lsl #16
                    231:        str     r3, [r0]
1.5     ! briggs    232:        bgt     .Loutswloop
1.1       bjh21     233:
                    234:        mov     pc, lr
                    235:
                    236: /* Word aligned outsw */
                    237:
1.5     ! briggs    238: .Lfastoutsw:
1.1       bjh21     239:
1.5     ! briggs    240: .Lfastoutswloop:
1.1       bjh21     241:        ldr     r3, [r1], #0x0004       /* r3 = (H)(L) */
                    242:        subs    r2, r2, #0x00000002     /* Loop test in load delay slot */
                    243:
                    244:        eor     ip, r3, r3, lsr #16     /* ip = (H)(H^L) */
                    245:        eor     r3, r3, ip, lsl #16     /* r3 = (H^H^L)(L) = (L)(L) */
                    246:        eor     ip, ip, r3, lsr #16     /* ip = (H)(H^L^L) = (H)(H) */
                    247:
                    248:        str     r3, [r0]
                    249:        str     ip, [r0]
                    250:
                    251: /*     mov     ip, r3, lsl #16
                    252:  *     orr     ip, ip, ip, lsr #16
                    253:  *     str     ip, [r0]
                    254:  *
                    255:  *     mov     ip, r3, lsr #16
                    256:  *     orr     ip, ip, ip, lsl #16
                    257:  *     str     ip, [r0]
                    258:  */
                    259:
1.5     ! briggs    260:        bgt     .Lfastoutswloop
1.1       bjh21     261:
                    262:        mov     pc, lr
                    263:
                    264: /*
                    265:  * reads short ints (16 bits) from an I/O address into a block of memory
                    266:  * with a length garenteed to be a multiple of 16 bytes
                    267:  * with a word aligned destination address
                    268:  *
                    269:  * r0 = address to read from (IO)
                    270:  * r1 = address to write to (memory)
                    271:  * r2 = length
                    272:  */
                    273:
                    274: ENTRY(insw16)
                    275: /* Make sure that we have a positive length */
                    276:        cmp     r2, #0x00000000
                    277:        movle   pc, lr
                    278:
                    279: /* If the destination address is word aligned and the size suitably
                    280:    aligned, do it fast */
                    281:
                    282:        tst     r2, #0x00000007
                    283:        tsteq   r1, #0x00000003
                    284:
                    285:        bne     _C_LABEL(insw)
                    286:
                    287: /* Word aligned insw */
                    288:
                    289:        stmfd   sp!, {r4,r5,lr}
                    290:
1.5     ! briggs    291: .Linsw16loop:
1.1       bjh21     292:        ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
                    293:                                         * word accesses */
                    294:        ldr     lr, [r0]
                    295:        mov     r3, r3, lsr #16         /* Put the two shorts together */
                    296:        orr     r3, r3, lr, lsl #16
                    297:
                    298:        ldr     r4, [r0, #0x0002]       /* take advantage of nonaligned
                    299:                                         * word accesses */
                    300:        ldr     lr, [r0]
                    301:        mov     r4, r4, lsr #16         /* Put the two shorts together */
                    302:        orr     r4, r4, lr, lsl #16
                    303:
                    304:        ldr     r5, [r0, #0x0002]       /* take advantage of nonaligned
                    305:                                         * word accesses */
                    306:        ldr     lr, [r0]
                    307:        mov     r5, r5, lsr #16         /* Put the two shorts together */
                    308:        orr     r5, r5, lr, lsl #16
                    309:
                    310:        ldr     ip, [r0, #0x0002]       /* take advantage of nonaligned
                    311:                                         * word accesses */
                    312:        ldr     lr, [r0]
                    313:        mov     ip, ip, lsr #16         /* Put the two shorts together */
                    314:        orr     ip, ip, lr, lsl #16
                    315:
                    316:        stmia   r1!, {r3-r5,ip}
                    317:        subs    r2, r2, #0x00000008     /* Next */
1.5     ! briggs    318:        bgt     .Linsw16loop
1.1       bjh21     319:
                    320:        ldmfd   sp!, {r4,r5,pc}         /* Restore regs and go home */
                    321:
                    322:
                    323: /*
                    324:  * Writes short ints (16 bits) from a block of memory to an I/O address
                    325:  *
                    326:  * r0 = address to write to (IO)
                    327:  * r1 = address to read from (memory)
                    328:  * r2 = length
                    329:  */
                    330:
                    331: ENTRY(outsw16)
                    332: /* Make sure that we have a positive length */
                    333:        cmp     r2, #0x00000000
                    334:        movle   pc, lr
                    335:
                    336: /* If the destination address is word aligned and the size suitably
                    337:    aligned, do it fast */
                    338:
                    339:        tst     r2, #0x00000007
                    340:        tsteq   r1, #0x00000003
                    341:
                    342:        bne     _C_LABEL(outsw)
                    343:
                    344: /* Word aligned outsw */
                    345:
                    346:        stmfd   sp!, {r4,r5,lr}
                    347:
1.5     ! briggs    348: .Loutsw16loop:
1.1       bjh21     349:        ldmia   r1!, {r4,r5,ip,lr}
                    350:
                    351:        eor     r3, r4, r4, lsl #16     /* r3 = (A^B)(B) */
                    352:        eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    353:        eor     r3, r3, r4, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    354:        str     r3, [r0]
                    355:        str     r4, [r0]
                    356:
                    357: /*     mov     r3, r4, lsl #16
                    358:  *     orr     r3, r3, r3, lsr #16
                    359:  *     str     r3, [r0]
                    360:  *
                    361:  *     mov     r3, r4, lsr #16
                    362:  *     orr     r3, r3, r3, lsl #16
                    363:  *     str     r3, [r0]
                    364:  */
                    365:
                    366:        eor     r3, r5, r5, lsl #16     /* r3 = (A^B)(B) */
                    367:        eor     r5, r5, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    368:        eor     r3, r3, r5, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    369:        str     r3, [r0]
                    370:        str     r5, [r0]
                    371:
                    372:        eor     r3, ip, ip, lsl #16     /* r3 = (A^B)(B) */
                    373:        eor     ip, ip, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    374:        eor     r3, r3, ip, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    375:        str     r3, [r0]
                    376:        str     ip, [r0]
                    377:
                    378:        eor     r3, lr, lr, lsl #16     /* r3 = (A^B)(B) */
                    379:        eor     lr, lr, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    380:        eor     r3, r3, lr, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    381:        str     r3, [r0]
                    382:        str     lr, [r0]
                    383:
                    384:        subs    r2, r2, #0x00000008
1.5     ! briggs    385:        bgt     .Loutsw16loop
1.1       bjh21     386:
                    387:        ldmfd   sp!, {r4,r5,pc}         /* and go home */
                    388:
                    389: /*
                    390:  * reads short ints (16 bits) from an I/O address into a block of memory
                    391:  * The I/O address is assumed to be mapped multiple times in a block of
                    392:  * 8 words.
                    393:  * The destination address should be word aligned.
                    394:  *
                    395:  * r0 = address to read from (IO)
                    396:  * r1 = address to write to (memory)
                    397:  * r2 = length
                    398:  */
                    399:
                    400: ENTRY(inswm8)
                    401: /* Make sure that we have a positive length */
                    402:        cmp     r2, #0x00000000
                    403:        movle   pc, lr
                    404:
                    405: /* If the destination address is word aligned and the size suitably
                    406:    aligned, do it fast */
                    407:
                    408:        tst     r1, #0x00000003
                    409:
                    410:        bne     _C_LABEL(insw)
                    411:
                    412: /* Word aligned insw */
                    413:
                    414:        stmfd   sp!, {r4-r9,lr}
                    415:
                    416:        mov     lr, #0xff000000
                    417:        orr     lr, lr, #0x00ff0000
                    418:
1.5     ! briggs    419: .Linswm8_loop8:
1.1       bjh21     420:        cmp     r2, #8
1.5     ! briggs    421:        bcc     .Linswm8_l8
1.1       bjh21     422:
                    423:        ldmia   r0, {r3-r9,ip}
                    424:
                    425:        bic     r3, r3, lr
                    426:        orr     r3, r3, r4, lsl #16
                    427:        bic     r5, r5, lr
                    428:        orr     r4, r5, r6, lsl #16
                    429:        bic     r7, r7, lr
                    430:        orr     r5, r7, r8, lsl #16
                    431:        bic     r9, r9, lr
                    432:        orr     r6, r9, ip, lsl #16
                    433:
                    434:        stmia   r1!, {r3-r6}
                    435:
                    436:        subs    r2, r2, #0x00000008     /* Next */
1.5     ! briggs    437:        bne     .Linswm8_loop8
        !           438:        beq     .Linswm8_l1
1.1       bjh21     439:
1.5     ! briggs    440: .Linswm8_l8:
1.1       bjh21     441:        cmp     r2, #4
1.5     ! briggs    442:        bcc     .Linswm8_l4
1.1       bjh21     443:
                    444:        ldmia   r0, {r3-r6}
                    445:
                    446:        bic     r3, r3, lr
                    447:        orr     r3, r3, r4, lsl #16
                    448:        bic     r5, r5, lr
                    449:        orr     r4, r5, r6, lsl #16
                    450:
                    451:        stmia   r1!, {r3-r4}
                    452:
                    453:        subs    r2, r2, #0x00000004
1.5     ! briggs    454:        beq     .Linswm8_l1
1.1       bjh21     455:
1.5     ! briggs    456: .Linswm8_l4:
1.1       bjh21     457:        cmp     r2, #2
1.5     ! briggs    458:        bcc     .Linswm8_l2
1.1       bjh21     459:
                    460:        ldmia   r0, {r3-r4}
                    461:
                    462:        bic     r3, r3, lr
                    463:        orr     r3, r3, r4, lsl #16
                    464:        str     r3, [r1], #0x0004
                    465:
                    466:        subs    r2, r2, #0x00000002
1.5     ! briggs    467:        beq     .Linswm8_l1
1.1       bjh21     468:
1.5     ! briggs    469: .Linswm8_l2:
1.1       bjh21     470:        cmp     r2, #1
1.5     ! briggs    471:        bcc     .Linswm8_l1
1.1       bjh21     472:
                    473:        ldr     r3, [r0]
                    474:        subs    r2, r2, #0x00000001     /* Test in load delay slot */
                    475:                                        /* XXX, why don't we use result?  */
                    476:
                    477:        strb    r3, [r1], #0x0001
                    478:        mov     r3, r3, lsr #8
                    479:        strb    r3, [r1], #0x0001
                    480:
                    481:
1.5     ! briggs    482: .Linswm8_l1:
1.1       bjh21     483:        ldmfd   sp!, {r4-r9,pc}         /* And go home */
                    484:
                    485: /*
                    486:  * write short ints (16 bits) to an I/O address from a block of memory
                    487:  * The I/O address is assumed to be mapped multiple times in a block of
                    488:  * 8 words.
                    489:  * The source address should be word aligned.
                    490:  *
                    491:  * r0 = address to read to (IO)
                    492:  * r1 = address to write from (memory)
                    493:  * r2 = length
                    494:  */
                    495:
                    496: ENTRY(outswm8)
                    497: /* Make sure that we have a positive length */
                    498:        cmp     r2, #0x00000000
                    499:        movle   pc, lr
                    500:
                    501: /* If the destination address is word aligned and the size suitably
                    502:    aligned, do it fast */
                    503:
                    504:        tst     r1, #0x00000003
                    505:
                    506:        bne     _C_LABEL(outsw)
                    507:
                    508: /* Word aligned outsw */
                    509:
                    510:        stmfd   sp!, {r4-r8,lr}
                    511:
1.5     ! briggs    512: .Loutswm8_loop8:
1.1       bjh21     513:        cmp     r2, #8
1.5     ! briggs    514:        bcc     .Loutswm8_l8
1.1       bjh21     515:
                    516:        ldmia   r1!, {r3,r5,r7,ip}
                    517:
                    518:        eor     r4, r3, r3, lsr #16     /* r4 = (A)(A^B) */
                    519:        eor     r3, r3, r4, lsl #16     /* r3 = (A^A^B)(B) = (B)(B) */
                    520:        eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    521:
                    522:        eor     r6, r5, r5, lsr #16     /* r6 = (A)(A^B) */
                    523:        eor     r5, r5, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
                    524:        eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
                    525:
                    526:        eor     r8, r7, r7, lsr #16     /* r8 = (A)(A^B) */
                    527:        eor     r7, r7, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
                    528:        eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
                    529:
                    530:        eor     lr, ip, ip, lsr #16     /* lr = (A)(A^B) */
                    531:        eor     ip, ip, lr, lsl #16     /* ip = (A^A^B)(B) = (B)(B) */
                    532:        eor     lr, lr, ip, lsr #16     /* lr = (A)(B^A^B) = (A)(A) */
                    533:
                    534:        stmia   r0, {r3-r8,ip,lr}
                    535:
                    536:        subs    r2, r2, #0x00000008     /* Next */
1.5     ! briggs    537:        bne     .Loutswm8_loop8
        !           538:        beq     .Loutswm8_l1
1.1       bjh21     539:
1.5     ! briggs    540: .Loutswm8_l8:
1.1       bjh21     541:        cmp     r2, #4
1.5     ! briggs    542:        bcc     .Loutswm8_l4
1.1       bjh21     543:
                    544:        ldmia   r1!, {r3-r4}
                    545:
                    546:        eor     r6, r3, r3, lsr #16     /* r6 = (A)(A^B) */
                    547:        eor     r5, r3, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
                    548:        eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
                    549:
                    550:        eor     r8, r4, r4, lsr #16     /* r8 = (A)(A^B) */
                    551:        eor     r7, r4, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
                    552:        eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
                    553:
                    554:        stmia   r0, {r5-r8}
                    555:
                    556:        subs    r2, r2, #0x00000004
1.5     ! briggs    557:        beq     .Loutswm8_l1
1.1       bjh21     558:
1.5     ! briggs    559: .Loutswm8_l4:
1.1       bjh21     560:        cmp     r2, #2
1.5     ! briggs    561:        bcc     .Loutswm8_l2
1.1       bjh21     562:
                    563:        ldr     r3, [r1], #0x0004       /* r3 = (A)(B) */
                    564:        subs    r2, r2, #0x00000002     /* Done test in Load delay slot */
                    565:
                    566:        eor     r5, r3, r3, lsr #16     /* r5 = (A)(A^B)*/
                    567:        eor     r4, r3, r5, lsl #16     /* r4 = (A^A^B)(B) = (B)(B) */
                    568:        eor     r5, r5, r4, lsr #16     /* r5 = (A)(B^A^B) = (A)(A) */
                    569:
                    570:        stmia   r0, {r4, r5}
                    571:
1.5     ! briggs    572:        beq     .Loutswm8_l1
1.1       bjh21     573:
1.5     ! briggs    574: .Loutswm8_l2:
1.1       bjh21     575:        cmp     r2, #1
1.5     ! briggs    576:        bcc     .Loutswm8_l1
1.1       bjh21     577:
                    578:        ldrb    r3, [r1], #0x0001
                    579:        ldrb    r4, [r1], #0x0001
                    580:        subs    r2, r2, #0x00000001     /* Done test in load delay slot */
                    581:                                        /* XXX This test isn't used?  */
                    582:        orr     r3, r3, r4, lsl #8
                    583:        orr     r3, r3, r3, lsl #16
                    584:        str     r3, [r0]
                    585:
1.5     ! briggs    586: .Loutswm8_l1:
1.1       bjh21     587:        ldmfd   sp!, {r4-r8,pc}         /* And go home */

CVSweb <webmaster@jp.NetBSD.org>