Annotation of src/sys/arch/arm/arm/blockio.S, Revision 1.5
1.5 ! briggs 1: /* $NetBSD: blockio.S,v 1.4 2001/06/02 11:15:56 bjh21 Exp $ */
1.1 bjh21 2:
3: /*
1.3 bjh21 4: * Copyright (c) 2001 Ben Harris.
1.1 bjh21 5: * Copyright (c) 1994 Mark Brinicombe.
6: * Copyright (c) 1994 Brini.
7: * All rights reserved.
8: *
9: * This code is derived from software written for Brini by Mark Brinicombe
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by Brini.
22: * 4. The name of the company nor the name of the author may be used to
23: * endorse or promote products derived from this software without specific
24: * prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
27: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29: * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
30: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: *
38: * RiscBSD kernel project
39: *
40: * blockio.S
41: *
42: * optimised block read/write from/to IO routines.
43: *
44: * Created : 08/10/94
45: * Modified : 22/01/99 -- R.Earnshaw
46: * Faster, and small tweaks for StrongARM
47: */
48:
49: #include <machine/asm.h>
1.2 bjh21 50:
1.5 ! briggs 51: RCSID("$NetBSD: blockio.S,v 1.4 2001/06/02 11:15:56 bjh21 Exp $")
1.2 bjh21 52:
53: /*
54: * Read bytes from an I/O address into a block of memory
55: *
56: * r0 = address to read from (IO)
57: * r1 = address to write to (memory)
58: * r2 = length
59: */
60:
61: /* This code will look very familiar if you've read _memcpy(). */
62: ENTRY(read_multi_1)
1.4 bjh21 63: mov ip, sp
64: stmfd sp!, {fp, ip, lr, pc}
65: sub fp, ip, #4
1.2 bjh21 66: subs r2, r2, #4 /* r2 = length - 4 */
1.5 ! briggs 67: blt .Lrm1_l4 /* less than 4 bytes */
1.2 bjh21 68: ands r12, r1, #3
1.5 ! briggs 69: beq .Lrm1_main /* aligned destination */
1.2 bjh21 70: rsb r12, r12, #4
71: cmp r12, #2
72: ldrb r3, [r0]
73: strb r3, [r1], #1
74: ldrgeb r3, [r0]
75: strgeb r3, [r1], #1
76: ldrgtb r3, [r0]
77: strgtb r3, [r1], #1
78: subs r2, r2, r12
1.5 ! briggs 79: blt .Lrm1_l4
! 80: .Lrm1_main:
! 81: .Lrm1loop:
1.2 bjh21 82: ldrb r3, [r0]
83: ldrb r12, [r0]
84: orr r3, r3, r12, lsl #8
85: ldrb r12, [r0]
86: orr r3, r3, r12, lsl #16
87: ldrb r12, [r0]
88: orr r3, r3, r12, lsl #24
89: str r3, [r1], #4
90: subs r2, r2, #4
1.5 ! briggs 91: bge .Lrm1loop
! 92: .Lrm1_l4:
1.2 bjh21 93: adds r2, r2, #4 /* r2 = length again */
1.4 bjh21 94: ldmeqdb fp, {fp, sp, pc}
1.2 bjh21 95: moveq pc, r14
96: cmp r2, #2
97: ldrb r3, [r0]
98: strb r3, [r1], #1
99: ldrgeb r3, [r0]
100: strgeb r3, [r1], #1
101: ldrgtb r3, [r0]
102: strgtb r3, [r1], #1
1.4 bjh21 103: ldmdb fp, {fp, sp, pc}
1.3 bjh21 104:
105: /*
106: * Write bytes to an I/O address from a block of memory
107: *
108: * r0 = address to write to (IO)
109: * r1 = address to read from (memory)
110: * r2 = length
111: */
112:
113: /* This code will look very familiar if you've read _memcpy(). */
114: ENTRY(write_multi_1)
1.4 bjh21 115: mov ip, sp
116: stmfd sp!, {fp, ip, lr, pc}
117: sub fp, ip, #4
1.3 bjh21 118: subs r2, r2, #4 /* r2 = length - 4 */
1.5 ! briggs 119: blt .Lwm1_l4 /* less than 4 bytes */
1.3 bjh21 120: ands r12, r1, #3
1.5 ! briggs 121: beq .Lwm1_main /* aligned source */
1.3 bjh21 122: rsb r12, r12, #4
123: cmp r12, #2
124: ldrb r3, [r1], #1
125: strb r3, [r0]
126: ldrgeb r3, [r1], #1
127: strgeb r3, [r0]
128: ldrgtb r3, [r1], #1
129: strgtb r3, [r0]
130: subs r2, r2, r12
1.5 ! briggs 131: blt .Lwm1_l4
! 132: .Lwm1_main:
! 133: .Lwm1loop:
1.3 bjh21 134: ldr r3, [r1], #4
135: strb r3, [r0]
136: mov r3, r3, lsr #8
137: strb r3, [r0]
138: mov r3, r3, lsr #8
139: strb r3, [r0]
140: mov r3, r3, lsr #8
141: strb r3, [r0]
142: subs r2, r2, #4
1.5 ! briggs 143: bge .Lwm1loop
! 144: .Lwm1_l4:
1.3 bjh21 145: adds r2, r2, #4 /* r2 = length again */
1.4 bjh21 146: ldmeqdb fp, {fp, sp, pc}
1.3 bjh21 147: cmp r2, #2
148: ldrb r3, [r1], #1
149: strb r3, [r0]
150: ldrgeb r3, [r1], #1
151: strgeb r3, [r0]
152: ldrgtb r3, [r1], #1
153: strgtb r3, [r0]
1.4 bjh21 154: ldmdb fp, {fp, sp, pc}
1.1 bjh21 155:
156: /*
157: * Reads short ints (16 bits) from an I/O address into a block of memory
158: *
159: * r0 = address to read from (IO)
160: * r1 = address to write to (memory)
161: * r2 = length
162: */
163:
164: ENTRY(insw)
165: /* Make sure that we have a positive length */
166: cmp r2, #0x00000000
167: movle pc, lr
168:
169: /* If the destination address and the size is word aligned, do it fast */
170:
171: tst r2, #0x00000001
172: tsteq r1, #0x00000003
1.5 ! briggs 173: beq .Lfastinsw
1.1 bjh21 174:
175: /* Non aligned insw */
176:
1.5 ! briggs 177: .Linswloop:
1.1 bjh21 178: ldr r3, [r0]
179: subs r2, r2, #0x00000001 /* Loop test in load delay slot */
180: strb r3, [r1], #0x0001
181: mov r3, r3, lsr #8
182: strb r3, [r1], #0x0001
1.5 ! briggs 183: bgt .Linswloop
1.1 bjh21 184:
185: mov pc, lr
186:
187: /* Word aligned insw */
188:
1.5 ! briggs 189: .Lfastinsw:
1.1 bjh21 190:
1.5 ! briggs 191: .Lfastinswloop:
1.1 bjh21 192: ldr r3, [r0, #0x0002] /* take advantage of nonaligned
193: * word accesses */
194: ldr ip, [r0]
195: mov r3, r3, lsr #16 /* Put the two shorts together */
196: orr r3, r3, ip, lsl #16
197: str r3, [r1], #0x0004 /* Store */
198: subs r2, r2, #0x00000002 /* Next */
1.5 ! briggs 199: bgt .Lfastinswloop
1.1 bjh21 200:
201: mov pc, lr
202:
203:
204: /*
205: * Writes short ints (16 bits) from a block of memory to an I/O address
206: *
207: * r0 = address to write to (IO)
208: * r1 = address to read from (memory)
209: * r2 = length
210: */
211:
212: ENTRY(outsw)
213: /* Make sure that we have a positive length */
214: cmp r2, #0x00000000
215: movle pc, lr
216:
217: /* If the destination address and the size is word aligned, do it fast */
218:
219: tst r2, #0x00000001
220: tsteq r1, #0x00000003
1.5 ! briggs 221: beq .Lfastoutsw
1.1 bjh21 222:
223: /* Non aligned outsw */
224:
1.5 ! briggs 225: .Loutswloop:
1.1 bjh21 226: ldrb r3, [r1], #0x0001
227: ldrb ip, [r1], #0x0001
228: subs r2, r2, #0x00000001 /* Loop test in load delay slot */
229: orr r3, r3, ip, lsl #8
230: orr r3, r3, r3, lsl #16
231: str r3, [r0]
1.5 ! briggs 232: bgt .Loutswloop
1.1 bjh21 233:
234: mov pc, lr
235:
236: /* Word aligned outsw */
237:
1.5 ! briggs 238: .Lfastoutsw:
1.1 bjh21 239:
1.5 ! briggs 240: .Lfastoutswloop:
1.1 bjh21 241: ldr r3, [r1], #0x0004 /* r3 = (H)(L) */
242: subs r2, r2, #0x00000002 /* Loop test in load delay slot */
243:
244: eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */
245: eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */
246: eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */
247:
248: str r3, [r0]
249: str ip, [r0]
250:
251: /* mov ip, r3, lsl #16
252: * orr ip, ip, ip, lsr #16
253: * str ip, [r0]
254: *
255: * mov ip, r3, lsr #16
256: * orr ip, ip, ip, lsl #16
257: * str ip, [r0]
258: */
259:
1.5 ! briggs 260: bgt .Lfastoutswloop
1.1 bjh21 261:
262: mov pc, lr
263:
264: /*
265: * reads short ints (16 bits) from an I/O address into a block of memory
266: * with a length garenteed to be a multiple of 16 bytes
267: * with a word aligned destination address
268: *
269: * r0 = address to read from (IO)
270: * r1 = address to write to (memory)
271: * r2 = length
272: */
273:
274: ENTRY(insw16)
275: /* Make sure that we have a positive length */
276: cmp r2, #0x00000000
277: movle pc, lr
278:
279: /* If the destination address is word aligned and the size suitably
280: aligned, do it fast */
281:
282: tst r2, #0x00000007
283: tsteq r1, #0x00000003
284:
285: bne _C_LABEL(insw)
286:
287: /* Word aligned insw */
288:
289: stmfd sp!, {r4,r5,lr}
290:
1.5 ! briggs 291: .Linsw16loop:
1.1 bjh21 292: ldr r3, [r0, #0x0002] /* take advantage of nonaligned
293: * word accesses */
294: ldr lr, [r0]
295: mov r3, r3, lsr #16 /* Put the two shorts together */
296: orr r3, r3, lr, lsl #16
297:
298: ldr r4, [r0, #0x0002] /* take advantage of nonaligned
299: * word accesses */
300: ldr lr, [r0]
301: mov r4, r4, lsr #16 /* Put the two shorts together */
302: orr r4, r4, lr, lsl #16
303:
304: ldr r5, [r0, #0x0002] /* take advantage of nonaligned
305: * word accesses */
306: ldr lr, [r0]
307: mov r5, r5, lsr #16 /* Put the two shorts together */
308: orr r5, r5, lr, lsl #16
309:
310: ldr ip, [r0, #0x0002] /* take advantage of nonaligned
311: * word accesses */
312: ldr lr, [r0]
313: mov ip, ip, lsr #16 /* Put the two shorts together */
314: orr ip, ip, lr, lsl #16
315:
316: stmia r1!, {r3-r5,ip}
317: subs r2, r2, #0x00000008 /* Next */
1.5 ! briggs 318: bgt .Linsw16loop
1.1 bjh21 319:
320: ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */
321:
322:
323: /*
324: * Writes short ints (16 bits) from a block of memory to an I/O address
325: *
326: * r0 = address to write to (IO)
327: * r1 = address to read from (memory)
328: * r2 = length
329: */
330:
331: ENTRY(outsw16)
332: /* Make sure that we have a positive length */
333: cmp r2, #0x00000000
334: movle pc, lr
335:
336: /* If the destination address is word aligned and the size suitably
337: aligned, do it fast */
338:
339: tst r2, #0x00000007
340: tsteq r1, #0x00000003
341:
342: bne _C_LABEL(outsw)
343:
344: /* Word aligned outsw */
345:
346: stmfd sp!, {r4,r5,lr}
347:
1.5 ! briggs 348: .Loutsw16loop:
1.1 bjh21 349: ldmia r1!, {r4,r5,ip,lr}
350:
351: eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */
352: eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
353: eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
354: str r3, [r0]
355: str r4, [r0]
356:
357: /* mov r3, r4, lsl #16
358: * orr r3, r3, r3, lsr #16
359: * str r3, [r0]
360: *
361: * mov r3, r4, lsr #16
362: * orr r3, r3, r3, lsl #16
363: * str r3, [r0]
364: */
365:
366: eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */
367: eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
368: eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
369: str r3, [r0]
370: str r5, [r0]
371:
372: eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */
373: eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
374: eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
375: str r3, [r0]
376: str ip, [r0]
377:
378: eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */
379: eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
380: eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
381: str r3, [r0]
382: str lr, [r0]
383:
384: subs r2, r2, #0x00000008
1.5 ! briggs 385: bgt .Loutsw16loop
1.1 bjh21 386:
387: ldmfd sp!, {r4,r5,pc} /* and go home */
388:
389: /*
390: * reads short ints (16 bits) from an I/O address into a block of memory
391: * The I/O address is assumed to be mapped multiple times in a block of
392: * 8 words.
393: * The destination address should be word aligned.
394: *
395: * r0 = address to read from (IO)
396: * r1 = address to write to (memory)
397: * r2 = length
398: */
399:
400: ENTRY(inswm8)
401: /* Make sure that we have a positive length */
402: cmp r2, #0x00000000
403: movle pc, lr
404:
405: /* If the destination address is word aligned and the size suitably
406: aligned, do it fast */
407:
408: tst r1, #0x00000003
409:
410: bne _C_LABEL(insw)
411:
412: /* Word aligned insw */
413:
414: stmfd sp!, {r4-r9,lr}
415:
416: mov lr, #0xff000000
417: orr lr, lr, #0x00ff0000
418:
1.5 ! briggs 419: .Linswm8_loop8:
1.1 bjh21 420: cmp r2, #8
1.5 ! briggs 421: bcc .Linswm8_l8
1.1 bjh21 422:
423: ldmia r0, {r3-r9,ip}
424:
425: bic r3, r3, lr
426: orr r3, r3, r4, lsl #16
427: bic r5, r5, lr
428: orr r4, r5, r6, lsl #16
429: bic r7, r7, lr
430: orr r5, r7, r8, lsl #16
431: bic r9, r9, lr
432: orr r6, r9, ip, lsl #16
433:
434: stmia r1!, {r3-r6}
435:
436: subs r2, r2, #0x00000008 /* Next */
1.5 ! briggs 437: bne .Linswm8_loop8
! 438: beq .Linswm8_l1
1.1 bjh21 439:
1.5 ! briggs 440: .Linswm8_l8:
1.1 bjh21 441: cmp r2, #4
1.5 ! briggs 442: bcc .Linswm8_l4
1.1 bjh21 443:
444: ldmia r0, {r3-r6}
445:
446: bic r3, r3, lr
447: orr r3, r3, r4, lsl #16
448: bic r5, r5, lr
449: orr r4, r5, r6, lsl #16
450:
451: stmia r1!, {r3-r4}
452:
453: subs r2, r2, #0x00000004
1.5 ! briggs 454: beq .Linswm8_l1
1.1 bjh21 455:
1.5 ! briggs 456: .Linswm8_l4:
1.1 bjh21 457: cmp r2, #2
1.5 ! briggs 458: bcc .Linswm8_l2
1.1 bjh21 459:
460: ldmia r0, {r3-r4}
461:
462: bic r3, r3, lr
463: orr r3, r3, r4, lsl #16
464: str r3, [r1], #0x0004
465:
466: subs r2, r2, #0x00000002
1.5 ! briggs 467: beq .Linswm8_l1
1.1 bjh21 468:
1.5 ! briggs 469: .Linswm8_l2:
1.1 bjh21 470: cmp r2, #1
1.5 ! briggs 471: bcc .Linswm8_l1
1.1 bjh21 472:
473: ldr r3, [r0]
474: subs r2, r2, #0x00000001 /* Test in load delay slot */
475: /* XXX, why don't we use result? */
476:
477: strb r3, [r1], #0x0001
478: mov r3, r3, lsr #8
479: strb r3, [r1], #0x0001
480:
481:
1.5 ! briggs 482: .Linswm8_l1:
1.1 bjh21 483: ldmfd sp!, {r4-r9,pc} /* And go home */
484:
485: /*
486: * write short ints (16 bits) to an I/O address from a block of memory
487: * The I/O address is assumed to be mapped multiple times in a block of
488: * 8 words.
489: * The source address should be word aligned.
490: *
491: * r0 = address to read to (IO)
492: * r1 = address to write from (memory)
493: * r2 = length
494: */
495:
496: ENTRY(outswm8)
497: /* Make sure that we have a positive length */
498: cmp r2, #0x00000000
499: movle pc, lr
500:
501: /* If the destination address is word aligned and the size suitably
502: aligned, do it fast */
503:
504: tst r1, #0x00000003
505:
506: bne _C_LABEL(outsw)
507:
508: /* Word aligned outsw */
509:
510: stmfd sp!, {r4-r8,lr}
511:
1.5 ! briggs 512: .Loutswm8_loop8:
1.1 bjh21 513: cmp r2, #8
1.5 ! briggs 514: bcc .Loutswm8_l8
1.1 bjh21 515:
516: ldmia r1!, {r3,r5,r7,ip}
517:
518: eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */
519: eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */
520: eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
521:
522: eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */
523: eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
524: eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
525:
526: eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */
527: eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
528: eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
529:
530: eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */
531: eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */
532: eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */
533:
534: stmia r0, {r3-r8,ip,lr}
535:
536: subs r2, r2, #0x00000008 /* Next */
1.5 ! briggs 537: bne .Loutswm8_loop8
! 538: beq .Loutswm8_l1
1.1 bjh21 539:
1.5 ! briggs 540: .Loutswm8_l8:
1.1 bjh21 541: cmp r2, #4
1.5 ! briggs 542: bcc .Loutswm8_l4
1.1 bjh21 543:
544: ldmia r1!, {r3-r4}
545:
546: eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */
547: eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
548: eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
549:
550: eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */
551: eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
552: eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
553:
554: stmia r0, {r5-r8}
555:
556: subs r2, r2, #0x00000004
1.5 ! briggs 557: beq .Loutswm8_l1
1.1 bjh21 558:
1.5 ! briggs 559: .Loutswm8_l4:
1.1 bjh21 560: cmp r2, #2
1.5 ! briggs 561: bcc .Loutswm8_l2
1.1 bjh21 562:
563: ldr r3, [r1], #0x0004 /* r3 = (A)(B) */
564: subs r2, r2, #0x00000002 /* Done test in Load delay slot */
565:
566: eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/
567: eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */
568: eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */
569:
570: stmia r0, {r4, r5}
571:
1.5 ! briggs 572: beq .Loutswm8_l1
1.1 bjh21 573:
1.5 ! briggs 574: .Loutswm8_l2:
1.1 bjh21 575: cmp r2, #1
1.5 ! briggs 576: bcc .Loutswm8_l1
1.1 bjh21 577:
578: ldrb r3, [r1], #0x0001
579: ldrb r4, [r1], #0x0001
580: subs r2, r2, #0x00000001 /* Done test in load delay slot */
581: /* XXX This test isn't used? */
582: orr r3, r3, r4, lsl #8
583: orr r3, r3, r3, lsl #16
584: str r3, [r0]
585:
1.5 ! briggs 586: .Loutswm8_l1:
1.1 bjh21 587: ldmfd sp!, {r4-r8,pc} /* And go home */
CVSweb <webmaster@jp.NetBSD.org>