[BACK]Return to blockio.S CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / arch / arm / arm

Annotation of src/sys/arch/arm/arm/blockio.S, Revision 1.5.160.1

1.5.160.1! tls         1: /*     $NetBSD$        */
1.1       bjh21       2:
                      3: /*
1.3       bjh21       4:  * Copyright (c) 2001 Ben Harris.
1.1       bjh21       5:  * Copyright (c) 1994 Mark Brinicombe.
                      6:  * Copyright (c) 1994 Brini.
                      7:  * All rights reserved.
                      8:  *
                      9:  * This code is derived from software written for Brini by Mark Brinicombe
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by Brini.
                     22:  * 4. The name of the company nor the name of the author may be used to
                     23:  *    endorse or promote products derived from this software without specific
                     24:  *    prior written permission.
                     25:  *
                     26:  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
                     27:  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
                     28:  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     29:  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
                     30:  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
                     31:  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
                     32:  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     33:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     34:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     35:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     36:  * SUCH DAMAGE.
                     37:  *
                     38:  * RiscBSD kernel project
                     39:  *
                     40:  * blockio.S
                     41:  *
                     42:  * optimised block read/write from/to IO routines.
                     43:  *
                     44:  * Created      : 08/10/94
                     45:  * Modified    : 22/01/99  -- R.Earnshaw
                     46:  *                            Faster, and small tweaks for StrongARM
                     47:  */
                     48:
                     49: #include <machine/asm.h>
1.2       bjh21      50:
1.5.160.1! tls        51: RCSID("$NetBSD$")
1.2       bjh21      52:
                     53: /*
                     54:  * Read bytes from an I/O address into a block of memory
                     55:  *
                     56:  * r0 = address to read from (IO)
                     57:  * r1 = address to write to (memory)
                     58:  * r2 = length
                     59:  */
                     60:
                     61: /* This code will look very familiar if you've read _memcpy(). */
                     62: ENTRY(read_multi_1)
1.4       bjh21      63:        mov     ip, sp
1.5.160.1! tls        64:        push    {fp, ip, lr, pc}
1.4       bjh21      65:        sub     fp, ip, #4
1.2       bjh21      66:        subs    r2, r2, #4              /* r2 = length - 4 */
1.5       briggs     67:        blt     .Lrm1_l4                        /* less than 4 bytes */
1.5.160.1! tls        68:        ands    ip, r1, #3
1.5       briggs     69:        beq     .Lrm1_main              /* aligned destination */
1.5.160.1! tls        70:        rsb     ip, ip, #4
        !            71:        cmp     ip, #2
1.2       bjh21      72:        ldrb    r3, [r0]
                     73:        strb    r3, [r1], #1
1.5.160.1! tls        74:        ldrbge  r3, [r0]
        !            75:        strbge  r3, [r1], #1
        !            76:        ldrbgt  r3, [r0]
        !            77:        strbgt  r3, [r1], #1
        !            78:        subs    r2, r2, ip
1.5       briggs     79:        blt     .Lrm1_l4
                     80: .Lrm1_main:
                     81: .Lrm1loop:
1.2       bjh21      82:        ldrb    r3, [r0]
1.5.160.1! tls        83:        ldrb    ip, [r0]
        !            84:        orr     r3, r3, ip, lsl #8
        !            85:        ldrb    ip, [r0]
        !            86:        orr     r3, r3, ip, lsl #16
        !            87:        ldrb    ip, [r0]
        !            88:        orr     r3, r3, ip, lsl #24
1.2       bjh21      89:        str     r3, [r1], #4
                     90:        subs    r2, r2, #4
1.5       briggs     91:        bge     .Lrm1loop
                     92: .Lrm1_l4:
1.2       bjh21      93:        adds    r2, r2, #4                      /* r2 = length again */
1.5.160.1! tls        94:        ldmdbeq fp, {fp, sp, pc}
        !            95:        RETc(eq)                                /* ??? not needed */
1.2       bjh21      96:        cmp     r2, #2
                     97:        ldrb    r3, [r0]
                     98:        strb    r3, [r1], #1
1.5.160.1! tls        99:        ldrbge  r3, [r0]
        !           100:        strbge  r3, [r1], #1
        !           101:        ldrbgt  r3, [r0]
        !           102:        strbgt  r3, [r1], #1
1.4       bjh21     103:        ldmdb   fp, {fp, sp, pc}
1.5.160.1! tls       104: END(read_multi_1)
1.3       bjh21     105:
                    106: /*
                    107:  * Write bytes to an I/O address from a block of memory
                    108:  *
                    109:  * r0 = address to write to (IO)
                    110:  * r1 = address to read from (memory)
                    111:  * r2 = length
                    112:  */
                    113:
                    114: /* This code will look very familiar if you've read _memcpy(). */
                    115: ENTRY(write_multi_1)
1.4       bjh21     116:        mov     ip, sp
1.5.160.1! tls       117:        push    {fp, ip, lr, pc}
1.4       bjh21     118:        sub     fp, ip, #4
1.3       bjh21     119:        subs    r2, r2, #4              /* r2 = length - 4 */
1.5       briggs    120:        blt     .Lwm1_l4                /* less than 4 bytes */
1.5.160.1! tls       121:        ands    ip, r1, #3
1.5       briggs    122:        beq     .Lwm1_main              /* aligned source */
1.5.160.1! tls       123:        rsb     ip, ip, #4
        !           124:        cmp     ip, #2
1.3       bjh21     125:        ldrb    r3, [r1], #1
                    126:        strb    r3, [r0]
1.5.160.1! tls       127:        ldrbge  r3, [r1], #1
        !           128:        strbge  r3, [r0]
        !           129:        ldrbgt  r3, [r1], #1
        !           130:        strbgt  r3, [r0]
        !           131:        subs    r2, r2, ip
1.5       briggs    132:        blt     .Lwm1_l4
                    133: .Lwm1_main:
                    134: .Lwm1loop:
1.3       bjh21     135:        ldr     r3, [r1], #4
                    136:        strb    r3, [r0]
                    137:        mov     r3, r3, lsr #8
                    138:        strb    r3, [r0]
                    139:        mov     r3, r3, lsr #8
                    140:        strb    r3, [r0]
                    141:        mov     r3, r3, lsr #8
                    142:        strb    r3, [r0]
                    143:        subs    r2, r2, #4
1.5       briggs    144:        bge     .Lwm1loop
                    145: .Lwm1_l4:
1.3       bjh21     146:        adds    r2, r2, #4                      /* r2 = length again */
1.5.160.1! tls       147:        ldmdbeq fp, {fp, sp, pc}
1.3       bjh21     148:        cmp     r2, #2
                    149:        ldrb    r3, [r1], #1
                    150:        strb    r3, [r0]
1.5.160.1! tls       151:        ldrbge  r3, [r1], #1
        !           152:        strbge  r3, [r0]
        !           153:        ldrbgt  r3, [r1], #1
        !           154:        strbgt  r3, [r0]
1.4       bjh21     155:        ldmdb   fp, {fp, sp, pc}
1.5.160.1! tls       156: END(write_multi_1)
1.1       bjh21     157:
                    158: /*
                    159:  * Reads short ints (16 bits) from an I/O address into a block of memory
                    160:  *
                    161:  * r0 = address to read from (IO)
                    162:  * r1 = address to write to (memory)
                    163:  * r2 = length
                    164:  */
                    165:
                    166: ENTRY(insw)
                    167: /* Make sure that we have a positive length */
                    168:        cmp     r2, #0x00000000
1.5.160.1! tls       169:        RETc(le)
1.1       bjh21     170:
                    171: /* If the destination address and the size is word aligned, do it fast */
                    172:
                    173:        tst     r2, #0x00000001
                    174:        tsteq   r1, #0x00000003
1.5       briggs    175:        beq     .Lfastinsw
1.1       bjh21     176:
                    177: /* Non aligned insw */
                    178:
1.5       briggs    179: .Linswloop:
1.1       bjh21     180:        ldr     r3, [r0]
                    181:        subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
                    182:        strb    r3, [r1], #0x0001
                    183:        mov     r3, r3, lsr #8
                    184:        strb    r3, [r1], #0x0001
1.5       briggs    185:        bgt     .Linswloop
1.1       bjh21     186:
1.5.160.1! tls       187:        RET
1.1       bjh21     188:
                    189: /* Word aligned insw */
                    190:
1.5       briggs    191: .Lfastinsw:
1.1       bjh21     192:
1.5       briggs    193: .Lfastinswloop:
1.1       bjh21     194:        ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
                    195:                                         * word accesses */
                    196:        ldr     ip, [r0]
                    197:        mov     r3, r3, lsr #16         /* Put the two shorts together */
                    198:        orr     r3, r3, ip, lsl #16
                    199:        str     r3, [r1], #0x0004       /* Store */
                    200:        subs    r2, r2, #0x00000002     /* Next */
1.5       briggs    201:        bgt     .Lfastinswloop
1.1       bjh21     202:
1.5.160.1! tls       203:        RET
        !           204: END(insw)
1.1       bjh21     205:
                    206:
                    207: /*
                    208:  * Writes short ints (16 bits) from a block of memory to an I/O address
                    209:  *
                    210:  * r0 = address to write to (IO)
                    211:  * r1 = address to read from (memory)
                    212:  * r2 = length
                    213:  */
                    214:
                    215: ENTRY(outsw)
                    216: /* Make sure that we have a positive length */
                    217:        cmp     r2, #0x00000000
1.5.160.1! tls       218:        RETc(le)
1.1       bjh21     219:
                    220: /* If the destination address and the size is word aligned, do it fast */
                    221:
                    222:        tst     r2, #0x00000001
                    223:        tsteq   r1, #0x00000003
1.5       briggs    224:        beq     .Lfastoutsw
1.1       bjh21     225:
                    226: /* Non aligned outsw */
                    227:
1.5       briggs    228: .Loutswloop:
1.1       bjh21     229:        ldrb    r3, [r1], #0x0001
                    230:        ldrb    ip, [r1], #0x0001
                    231:        subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
                    232:        orr     r3, r3, ip, lsl #8
                    233:        orr     r3, r3, r3, lsl #16
                    234:        str     r3, [r0]
1.5       briggs    235:        bgt     .Loutswloop
1.1       bjh21     236:
1.5.160.1! tls       237:        RET
1.1       bjh21     238:
                    239: /* Word aligned outsw */
                    240:
1.5       briggs    241: .Lfastoutsw:
1.1       bjh21     242:
1.5       briggs    243: .Lfastoutswloop:
1.1       bjh21     244:        ldr     r3, [r1], #0x0004       /* r3 = (H)(L) */
                    245:        subs    r2, r2, #0x00000002     /* Loop test in load delay slot */
                    246:
                    247:        eor     ip, r3, r3, lsr #16     /* ip = (H)(H^L) */
                    248:        eor     r3, r3, ip, lsl #16     /* r3 = (H^H^L)(L) = (L)(L) */
                    249:        eor     ip, ip, r3, lsr #16     /* ip = (H)(H^L^L) = (H)(H) */
                    250:
                    251:        str     r3, [r0]
                    252:        str     ip, [r0]
                    253:
                    254: /*     mov     ip, r3, lsl #16
                    255:  *     orr     ip, ip, ip, lsr #16
                    256:  *     str     ip, [r0]
                    257:  *
                    258:  *     mov     ip, r3, lsr #16
                    259:  *     orr     ip, ip, ip, lsl #16
                    260:  *     str     ip, [r0]
                    261:  */
                    262:
1.5       briggs    263:        bgt     .Lfastoutswloop
1.1       bjh21     264:
1.5.160.1! tls       265:        RET
        !           266: END(outsw)
1.1       bjh21     267:
                    268: /*
                    269:  * reads short ints (16 bits) from an I/O address into a block of memory
                    270:  * with a length garenteed to be a multiple of 16 bytes
                    271:  * with a word aligned destination address
                    272:  *
                    273:  * r0 = address to read from (IO)
                    274:  * r1 = address to write to (memory)
                    275:  * r2 = length
                    276:  */
                    277:
                    278: ENTRY(insw16)
                    279: /* Make sure that we have a positive length */
                    280:        cmp     r2, #0x00000000
1.5.160.1! tls       281:        RETc(le)
1.1       bjh21     282:
                    283: /* If the destination address is word aligned and the size suitably
                    284:    aligned, do it fast */
                    285:
                    286:        tst     r2, #0x00000007
                    287:        tsteq   r1, #0x00000003
                    288:
                    289:        bne     _C_LABEL(insw)
                    290:
                    291: /* Word aligned insw */
                    292:
1.5.160.1! tls       293:        push    {r4,r5,lr}
1.1       bjh21     294:
1.5       briggs    295: .Linsw16loop:
1.1       bjh21     296:        ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
                    297:                                         * word accesses */
                    298:        ldr     lr, [r0]
                    299:        mov     r3, r3, lsr #16         /* Put the two shorts together */
                    300:        orr     r3, r3, lr, lsl #16
                    301:
                    302:        ldr     r4, [r0, #0x0002]       /* take advantage of nonaligned
                    303:                                         * word accesses */
                    304:        ldr     lr, [r0]
                    305:        mov     r4, r4, lsr #16         /* Put the two shorts together */
                    306:        orr     r4, r4, lr, lsl #16
                    307:
                    308:        ldr     r5, [r0, #0x0002]       /* take advantage of nonaligned
                    309:                                         * word accesses */
                    310:        ldr     lr, [r0]
                    311:        mov     r5, r5, lsr #16         /* Put the two shorts together */
                    312:        orr     r5, r5, lr, lsl #16
                    313:
                    314:        ldr     ip, [r0, #0x0002]       /* take advantage of nonaligned
                    315:                                         * word accesses */
                    316:        ldr     lr, [r0]
                    317:        mov     ip, ip, lsr #16         /* Put the two shorts together */
                    318:        orr     ip, ip, lr, lsl #16
                    319:
                    320:        stmia   r1!, {r3-r5,ip}
                    321:        subs    r2, r2, #0x00000008     /* Next */
1.5       briggs    322:        bgt     .Linsw16loop
1.1       bjh21     323:
1.5.160.1! tls       324:        pop     {r4,r5,pc}              /* Restore regs and go home */
        !           325: END(insw16)
1.1       bjh21     326:
                    327:
                    328: /*
                    329:  * Writes short ints (16 bits) from a block of memory to an I/O address
                    330:  *
                    331:  * r0 = address to write to (IO)
                    332:  * r1 = address to read from (memory)
                    333:  * r2 = length
                    334:  */
                    335:
                    336: ENTRY(outsw16)
                    337: /* Make sure that we have a positive length */
                    338:        cmp     r2, #0x00000000
1.5.160.1! tls       339:        RETc(le)
1.1       bjh21     340:
                    341: /* If the destination address is word aligned and the size suitably
                    342:    aligned, do it fast */
                    343:
                    344:        tst     r2, #0x00000007
                    345:        tsteq   r1, #0x00000003
                    346:
                    347:        bne     _C_LABEL(outsw)
                    348:
                    349: /* Word aligned outsw */
                    350:
1.5.160.1! tls       351:        push    {r4,r5,lr}
1.1       bjh21     352:
1.5       briggs    353: .Loutsw16loop:
1.1       bjh21     354:        ldmia   r1!, {r4,r5,ip,lr}
                    355:
                    356:        eor     r3, r4, r4, lsl #16     /* r3 = (A^B)(B) */
                    357:        eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    358:        eor     r3, r3, r4, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    359:        str     r3, [r0]
                    360:        str     r4, [r0]
                    361:
                    362: /*     mov     r3, r4, lsl #16
                    363:  *     orr     r3, r3, r3, lsr #16
                    364:  *     str     r3, [r0]
                    365:  *
                    366:  *     mov     r3, r4, lsr #16
                    367:  *     orr     r3, r3, r3, lsl #16
                    368:  *     str     r3, [r0]
                    369:  */
                    370:
                    371:        eor     r3, r5, r5, lsl #16     /* r3 = (A^B)(B) */
                    372:        eor     r5, r5, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    373:        eor     r3, r3, r5, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    374:        str     r3, [r0]
                    375:        str     r5, [r0]
                    376:
                    377:        eor     r3, ip, ip, lsl #16     /* r3 = (A^B)(B) */
                    378:        eor     ip, ip, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    379:        eor     r3, r3, ip, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    380:        str     r3, [r0]
                    381:        str     ip, [r0]
                    382:
                    383:        eor     r3, lr, lr, lsl #16     /* r3 = (A^B)(B) */
                    384:        eor     lr, lr, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    385:        eor     r3, r3, lr, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    386:        str     r3, [r0]
                    387:        str     lr, [r0]
                    388:
                    389:        subs    r2, r2, #0x00000008
1.5       briggs    390:        bgt     .Loutsw16loop
1.1       bjh21     391:
1.5.160.1! tls       392:        pop     {r4,r5,pc}              /* and go home */
        !           393: END(outsw16)
1.1       bjh21     394:
                    395: /*
                    396:  * reads short ints (16 bits) from an I/O address into a block of memory
                    397:  * The I/O address is assumed to be mapped multiple times in a block of
                    398:  * 8 words.
                    399:  * The destination address should be word aligned.
                    400:  *
                    401:  * r0 = address to read from (IO)
                    402:  * r1 = address to write to (memory)
                    403:  * r2 = length
                    404:  */
                    405:
                    406: ENTRY(inswm8)
                    407: /* Make sure that we have a positive length */
                    408:        cmp     r2, #0x00000000
1.5.160.1! tls       409:        RETc(le)
1.1       bjh21     410:
                    411: /* If the destination address is word aligned and the size suitably
                    412:    aligned, do it fast */
                    413:
                    414:        tst     r1, #0x00000003
                    415:
                    416:        bne     _C_LABEL(insw)
                    417:
                    418: /* Word aligned insw */
                    419:
1.5.160.1! tls       420:        push    {r4-r9,lr}
1.1       bjh21     421:
                    422:        mov     lr, #0xff000000
                    423:        orr     lr, lr, #0x00ff0000
                    424:
1.5       briggs    425: .Linswm8_loop8:
1.1       bjh21     426:        cmp     r2, #8
1.5       briggs    427:        bcc     .Linswm8_l8
1.1       bjh21     428:
                    429:        ldmia   r0, {r3-r9,ip}
                    430:
                    431:        bic     r3, r3, lr
                    432:        orr     r3, r3, r4, lsl #16
                    433:        bic     r5, r5, lr
                    434:        orr     r4, r5, r6, lsl #16
                    435:        bic     r7, r7, lr
                    436:        orr     r5, r7, r8, lsl #16
                    437:        bic     r9, r9, lr
                    438:        orr     r6, r9, ip, lsl #16
                    439:
                    440:        stmia   r1!, {r3-r6}
                    441:
                    442:        subs    r2, r2, #0x00000008     /* Next */
1.5       briggs    443:        bne     .Linswm8_loop8
                    444:        beq     .Linswm8_l1
1.1       bjh21     445:
1.5       briggs    446: .Linswm8_l8:
1.1       bjh21     447:        cmp     r2, #4
1.5       briggs    448:        bcc     .Linswm8_l4
1.1       bjh21     449:
                    450:        ldmia   r0, {r3-r6}
                    451:
                    452:        bic     r3, r3, lr
                    453:        orr     r3, r3, r4, lsl #16
                    454:        bic     r5, r5, lr
                    455:        orr     r4, r5, r6, lsl #16
                    456:
                    457:        stmia   r1!, {r3-r4}
                    458:
                    459:        subs    r2, r2, #0x00000004
1.5       briggs    460:        beq     .Linswm8_l1
1.1       bjh21     461:
1.5       briggs    462: .Linswm8_l4:
1.1       bjh21     463:        cmp     r2, #2
1.5       briggs    464:        bcc     .Linswm8_l2
1.1       bjh21     465:
                    466:        ldmia   r0, {r3-r4}
                    467:
                    468:        bic     r3, r3, lr
                    469:        orr     r3, r3, r4, lsl #16
                    470:        str     r3, [r1], #0x0004
                    471:
                    472:        subs    r2, r2, #0x00000002
1.5       briggs    473:        beq     .Linswm8_l1
1.1       bjh21     474:
1.5       briggs    475: .Linswm8_l2:
1.1       bjh21     476:        cmp     r2, #1
1.5       briggs    477:        bcc     .Linswm8_l1
1.1       bjh21     478:
                    479:        ldr     r3, [r0]
                    480:        subs    r2, r2, #0x00000001     /* Test in load delay slot */
                    481:                                        /* XXX, why don't we use result?  */
                    482:
                    483:        strb    r3, [r1], #0x0001
                    484:        mov     r3, r3, lsr #8
                    485:        strb    r3, [r1], #0x0001
                    486:
                    487:
1.5       briggs    488: .Linswm8_l1:
1.5.160.1! tls       489:        pop     {r4-r9,pc}              /* And go home */
        !           490: END(inswm8)
1.1       bjh21     491:
                    492: /*
                    493:  * write short ints (16 bits) to an I/O address from a block of memory
                    494:  * The I/O address is assumed to be mapped multiple times in a block of
                    495:  * 8 words.
                    496:  * The source address should be word aligned.
                    497:  *
                    498:  * r0 = address to read to (IO)
                    499:  * r1 = address to write from (memory)
                    500:  * r2 = length
                    501:  */
                    502:
                    503: ENTRY(outswm8)
                    504: /* Make sure that we have a positive length */
                    505:        cmp     r2, #0x00000000
1.5.160.1! tls       506:        RETc(le)
1.1       bjh21     507:
                    508: /* If the destination address is word aligned and the size suitably
                    509:    aligned, do it fast */
                    510:
                    511:        tst     r1, #0x00000003
                    512:
                    513:        bne     _C_LABEL(outsw)
                    514:
                    515: /* Word aligned outsw */
                    516:
1.5.160.1! tls       517:        push    {r4-r8,lr}
1.1       bjh21     518:
1.5       briggs    519: .Loutswm8_loop8:
1.1       bjh21     520:        cmp     r2, #8
1.5       briggs    521:        bcc     .Loutswm8_l8
1.1       bjh21     522:
                    523:        ldmia   r1!, {r3,r5,r7,ip}
                    524:
                    525:        eor     r4, r3, r3, lsr #16     /* r4 = (A)(A^B) */
                    526:        eor     r3, r3, r4, lsl #16     /* r3 = (A^A^B)(B) = (B)(B) */
                    527:        eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    528:
                    529:        eor     r6, r5, r5, lsr #16     /* r6 = (A)(A^B) */
                    530:        eor     r5, r5, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
                    531:        eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
                    532:
                    533:        eor     r8, r7, r7, lsr #16     /* r8 = (A)(A^B) */
                    534:        eor     r7, r7, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
                    535:        eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
                    536:
                    537:        eor     lr, ip, ip, lsr #16     /* lr = (A)(A^B) */
                    538:        eor     ip, ip, lr, lsl #16     /* ip = (A^A^B)(B) = (B)(B) */
                    539:        eor     lr, lr, ip, lsr #16     /* lr = (A)(B^A^B) = (A)(A) */
                    540:
                    541:        stmia   r0, {r3-r8,ip,lr}
                    542:
                    543:        subs    r2, r2, #0x00000008     /* Next */
1.5       briggs    544:        bne     .Loutswm8_loop8
                    545:        beq     .Loutswm8_l1
1.1       bjh21     546:
1.5       briggs    547: .Loutswm8_l8:
1.1       bjh21     548:        cmp     r2, #4
1.5       briggs    549:        bcc     .Loutswm8_l4
1.1       bjh21     550:
                    551:        ldmia   r1!, {r3-r4}
                    552:
                    553:        eor     r6, r3, r3, lsr #16     /* r6 = (A)(A^B) */
                    554:        eor     r5, r3, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
                    555:        eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
                    556:
                    557:        eor     r8, r4, r4, lsr #16     /* r8 = (A)(A^B) */
                    558:        eor     r7, r4, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
                    559:        eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
                    560:
                    561:        stmia   r0, {r5-r8}
                    562:
                    563:        subs    r2, r2, #0x00000004
1.5       briggs    564:        beq     .Loutswm8_l1
1.1       bjh21     565:
1.5       briggs    566: .Loutswm8_l4:
1.1       bjh21     567:        cmp     r2, #2
1.5       briggs    568:        bcc     .Loutswm8_l2
1.1       bjh21     569:
                    570:        ldr     r3, [r1], #0x0004       /* r3 = (A)(B) */
                    571:        subs    r2, r2, #0x00000002     /* Done test in Load delay slot */
                    572:
                    573:        eor     r5, r3, r3, lsr #16     /* r5 = (A)(A^B)*/
                    574:        eor     r4, r3, r5, lsl #16     /* r4 = (A^A^B)(B) = (B)(B) */
                    575:        eor     r5, r5, r4, lsr #16     /* r5 = (A)(B^A^B) = (A)(A) */
                    576:
                    577:        stmia   r0, {r4, r5}
                    578:
1.5       briggs    579:        beq     .Loutswm8_l1
1.1       bjh21     580:
1.5       briggs    581: .Loutswm8_l2:
1.1       bjh21     582:        cmp     r2, #1
1.5       briggs    583:        bcc     .Loutswm8_l1
1.1       bjh21     584:
                    585:        ldrb    r3, [r1], #0x0001
                    586:        ldrb    r4, [r1], #0x0001
                    587:        subs    r2, r2, #0x00000001     /* Done test in load delay slot */
                    588:                                        /* XXX This test isn't used?  */
                    589:        orr     r3, r3, r4, lsl #8
                    590:        orr     r3, r3, r3, lsl #16
                    591:        str     r3, [r0]
                    592:
1.5       briggs    593: .Loutswm8_l1:
1.5.160.1! tls       594:        pop     {r4-r8,pc}              /* And go home */
        !           595: END(outswm8)

CVSweb <webmaster@jp.NetBSD.org>