Annotation of src/sys/arch/arm/arm/blockio.S, Revision 1.3
1.3 ! bjh21 1: /* $NetBSD: blockio.S,v 1.2 2001/05/30 00:14:09 bjh21 Exp $ */
1.1 bjh21 2:
3: /*
1.3 ! bjh21 4: * Copyright (c) 2001 Ben Harris.
1.1 bjh21 5: * Copyright (c) 1994 Mark Brinicombe.
6: * Copyright (c) 1994 Brini.
7: * All rights reserved.
8: *
9: * This code is derived from software written for Brini by Mark Brinicombe
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by Brini.
22: * 4. The name of the company nor the name of the author may be used to
23: * endorse or promote products derived from this software without specific
24: * prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
27: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29: * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
30: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: *
38: * RiscBSD kernel project
39: *
40: * blockio.S
41: *
42: * optimised block read/write from/to IO routines.
43: *
44: * Created : 08/10/94
45: * Modified : 22/01/99 -- R.Earnshaw
46: * Faster, and small tweaks for StrongARM
47: */
48:
49: #include <machine/asm.h>
1.2 bjh21 50:
1.3 ! bjh21 51: RCSID("$NetBSD: blockio.S,v 1.2 2001/05/30 00:14:09 bjh21 Exp $")
1.2 bjh21 52:
53: /*
54: * Read bytes from an I/O address into a block of memory
55: *
56: * r0 = address to read from (IO)
57: * r1 = address to write to (memory)
58: * r2 = length
59: */
60:
61: /* This code will look very familiar if you've read _memcpy(). */
62: ENTRY(read_multi_1)
63: subs r2, r2, #4 /* r2 = length - 4 */
64: blt Lrm1_l4 /* less than 4 bytes */
65: ands r12, r1, #3
66: beq Lrm1_main /* aligned destination */
67: rsb r12, r12, #4
68: cmp r12, #2
69: ldrb r3, [r0]
70: strb r3, [r1], #1
71: ldrgeb r3, [r0]
72: strgeb r3, [r1], #1
73: ldrgtb r3, [r0]
74: strgtb r3, [r1], #1
75: subs r2, r2, r12
76: blt Lrm1_l4
77: Lrm1_main:
78: Lrm1loop:
79: ldrb r3, [r0]
80: ldrb r12, [r0]
81: orr r3, r3, r12, lsl #8
82: ldrb r12, [r0]
83: orr r3, r3, r12, lsl #16
84: ldrb r12, [r0]
85: orr r3, r3, r12, lsl #24
86: str r3, [r1], #4
87: subs r2, r2, #4
88: bge Lrm1loop
89: Lrm1_l4:
90: adds r2, r2, #4 /* r2 = length again */
91: moveq pc, r14
92: cmp r2, #2
93: ldrb r3, [r0]
94: strb r3, [r1], #1
95: ldrgeb r3, [r0]
96: strgeb r3, [r1], #1
97: ldrgtb r3, [r0]
98: strgtb r3, [r1], #1
1.3 ! bjh21 99: mov pc, r14
! 100:
! 101: /*
! 102: * Write bytes to an I/O address from a block of memory
! 103: *
! 104: * r0 = address to write to (IO)
! 105: * r1 = address to read from (memory)
! 106: * r2 = length
! 107: */
! 108:
! 109: /* This code will look very familiar if you've read _memcpy(). */
! 110: ENTRY(write_multi_1)
! 111: subs r2, r2, #4 /* r2 = length - 4 */
! 112: blt Lwm1_l4 /* less than 4 bytes */
! 113: ands r12, r1, #3
! 114: beq Lwm1_main /* aligned source */
! 115: rsb r12, r12, #4
! 116: cmp r12, #2
! 117: ldrb r3, [r1], #1
! 118: strb r3, [r0]
! 119: ldrgeb r3, [r1], #1
! 120: strgeb r3, [r0]
! 121: ldrgtb r3, [r1], #1
! 122: strgtb r3, [r0]
! 123: subs r2, r2, r12
! 124: blt Lwm1_l4
! 125: Lwm1_main:
! 126: Lwm1loop:
! 127: ldr r3, [r1], #4
! 128: strb r3, [r0]
! 129: mov r3, r3, lsr #8
! 130: strb r3, [r0]
! 131: mov r3, r3, lsr #8
! 132: strb r3, [r0]
! 133: mov r3, r3, lsr #8
! 134: strb r3, [r0]
! 135: subs r2, r2, #4
! 136: bge Lwm1loop
! 137: Lwm1_l4:
! 138: adds r2, r2, #4 /* r2 = length again */
! 139: moveq pc, r14
! 140: cmp r2, #2
! 141: ldrb r3, [r1], #1
! 142: strb r3, [r0]
! 143: ldrgeb r3, [r1], #1
! 144: strgeb r3, [r0]
! 145: ldrgtb r3, [r1], #1
! 146: strgtb r3, [r0]
1.2 bjh21 147: mov pc, r14
1.1 bjh21 148:
149: /*
150: * Reads short ints (16 bits) from an I/O address into a block of memory
151: *
152: * r0 = address to read from (IO)
153: * r1 = address to write to (memory)
154: * r2 = length
155: */
156:
157: ENTRY(insw)
158: /* Make sure that we have a positive length */
159: cmp r2, #0x00000000
160: movle pc, lr
161:
162: /* If the destination address and the size is word aligned, do it fast */
163:
164: tst r2, #0x00000001
165: tsteq r1, #0x00000003
166: beq fastinsw
167:
168: /* Non aligned insw */
169:
170: inswloop:
171: ldr r3, [r0]
172: subs r2, r2, #0x00000001 /* Loop test in load delay slot */
173: strb r3, [r1], #0x0001
174: mov r3, r3, lsr #8
175: strb r3, [r1], #0x0001
176: bgt inswloop
177:
178: mov pc, lr
179:
180: /* Word aligned insw */
181:
182: fastinsw:
183:
184: fastinswloop:
185: ldr r3, [r0, #0x0002] /* take advantage of nonaligned
186: * word accesses */
187: ldr ip, [r0]
188: mov r3, r3, lsr #16 /* Put the two shorts together */
189: orr r3, r3, ip, lsl #16
190: str r3, [r1], #0x0004 /* Store */
191: subs r2, r2, #0x00000002 /* Next */
192: bgt fastinswloop
193:
194: mov pc, lr
195:
196:
197: /*
198: * Writes short ints (16 bits) from a block of memory to an I/O address
199: *
200: * r0 = address to write to (IO)
201: * r1 = address to read from (memory)
202: * r2 = length
203: */
204:
205: ENTRY(outsw)
206: /* Make sure that we have a positive length */
207: cmp r2, #0x00000000
208: movle pc, lr
209:
210: /* If the destination address and the size is word aligned, do it fast */
211:
212: tst r2, #0x00000001
213: tsteq r1, #0x00000003
214: beq fastoutsw
215:
216: /* Non aligned outsw */
217:
218: outswloop:
219: ldrb r3, [r1], #0x0001
220: ldrb ip, [r1], #0x0001
221: subs r2, r2, #0x00000001 /* Loop test in load delay slot */
222: orr r3, r3, ip, lsl #8
223: orr r3, r3, r3, lsl #16
224: str r3, [r0]
225: bgt outswloop
226:
227: mov pc, lr
228:
229: /* Word aligned outsw */
230:
231: fastoutsw:
232:
233: fastoutswloop:
234: ldr r3, [r1], #0x0004 /* r3 = (H)(L) */
235: subs r2, r2, #0x00000002 /* Loop test in load delay slot */
236:
237: eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */
238: eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */
239: eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */
240:
241: str r3, [r0]
242: str ip, [r0]
243:
244: /* mov ip, r3, lsl #16
245: * orr ip, ip, ip, lsr #16
246: * str ip, [r0]
247: *
248: * mov ip, r3, lsr #16
249: * orr ip, ip, ip, lsl #16
250: * str ip, [r0]
251: */
252:
253: bgt fastoutswloop
254:
255: mov pc, lr
256:
257: /*
258: * reads short ints (16 bits) from an I/O address into a block of memory
259: * with a length garenteed to be a multiple of 16 bytes
260: * with a word aligned destination address
261: *
262: * r0 = address to read from (IO)
263: * r1 = address to write to (memory)
264: * r2 = length
265: */
266:
267: ENTRY(insw16)
268: /* Make sure that we have a positive length */
269: cmp r2, #0x00000000
270: movle pc, lr
271:
272: /* If the destination address is word aligned and the size suitably
273: aligned, do it fast */
274:
275: tst r2, #0x00000007
276: tsteq r1, #0x00000003
277:
278: bne _C_LABEL(insw)
279:
280: /* Word aligned insw */
281:
282: stmfd sp!, {r4,r5,lr}
283:
284: insw16loop:
285: ldr r3, [r0, #0x0002] /* take advantage of nonaligned
286: * word accesses */
287: ldr lr, [r0]
288: mov r3, r3, lsr #16 /* Put the two shorts together */
289: orr r3, r3, lr, lsl #16
290:
291: ldr r4, [r0, #0x0002] /* take advantage of nonaligned
292: * word accesses */
293: ldr lr, [r0]
294: mov r4, r4, lsr #16 /* Put the two shorts together */
295: orr r4, r4, lr, lsl #16
296:
297: ldr r5, [r0, #0x0002] /* take advantage of nonaligned
298: * word accesses */
299: ldr lr, [r0]
300: mov r5, r5, lsr #16 /* Put the two shorts together */
301: orr r5, r5, lr, lsl #16
302:
303: ldr ip, [r0, #0x0002] /* take advantage of nonaligned
304: * word accesses */
305: ldr lr, [r0]
306: mov ip, ip, lsr #16 /* Put the two shorts together */
307: orr ip, ip, lr, lsl #16
308:
309: stmia r1!, {r3-r5,ip}
310: subs r2, r2, #0x00000008 /* Next */
311: bgt insw16loop
312:
313: ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */
314:
315:
316: /*
317: * Writes short ints (16 bits) from a block of memory to an I/O address
318: *
319: * r0 = address to write to (IO)
320: * r1 = address to read from (memory)
321: * r2 = length
322: */
323:
324: ENTRY(outsw16)
325: /* Make sure that we have a positive length */
326: cmp r2, #0x00000000
327: movle pc, lr
328:
329: /* If the destination address is word aligned and the size suitably
330: aligned, do it fast */
331:
332: tst r2, #0x00000007
333: tsteq r1, #0x00000003
334:
335: bne _C_LABEL(outsw)
336:
337: /* Word aligned outsw */
338:
339: stmfd sp!, {r4,r5,lr}
340:
341: outsw16loop:
342: ldmia r1!, {r4,r5,ip,lr}
343:
344: eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */
345: eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
346: eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
347: str r3, [r0]
348: str r4, [r0]
349:
350: /* mov r3, r4, lsl #16
351: * orr r3, r3, r3, lsr #16
352: * str r3, [r0]
353: *
354: * mov r3, r4, lsr #16
355: * orr r3, r3, r3, lsl #16
356: * str r3, [r0]
357: */
358:
359: eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */
360: eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
361: eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
362: str r3, [r0]
363: str r5, [r0]
364:
365: eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */
366: eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
367: eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
368: str r3, [r0]
369: str ip, [r0]
370:
371: eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */
372: eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
373: eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
374: str r3, [r0]
375: str lr, [r0]
376:
377: subs r2, r2, #0x00000008
378: bgt outsw16loop
379:
380: ldmfd sp!, {r4,r5,pc} /* and go home */
381:
382: /*
383: * reads short ints (16 bits) from an I/O address into a block of memory
384: * The I/O address is assumed to be mapped multiple times in a block of
385: * 8 words.
386: * The destination address should be word aligned.
387: *
388: * r0 = address to read from (IO)
389: * r1 = address to write to (memory)
390: * r2 = length
391: */
392:
393: ENTRY(inswm8)
394: /* Make sure that we have a positive length */
395: cmp r2, #0x00000000
396: movle pc, lr
397:
398: /* If the destination address is word aligned and the size suitably
399: aligned, do it fast */
400:
401: tst r1, #0x00000003
402:
403: bne _C_LABEL(insw)
404:
405: /* Word aligned insw */
406:
407: stmfd sp!, {r4-r9,lr}
408:
409: mov lr, #0xff000000
410: orr lr, lr, #0x00ff0000
411:
412: inswm8_loop8:
413: cmp r2, #8
414: bcc inswm8_l8
415:
416: ldmia r0, {r3-r9,ip}
417:
418: bic r3, r3, lr
419: orr r3, r3, r4, lsl #16
420: bic r5, r5, lr
421: orr r4, r5, r6, lsl #16
422: bic r7, r7, lr
423: orr r5, r7, r8, lsl #16
424: bic r9, r9, lr
425: orr r6, r9, ip, lsl #16
426:
427: stmia r1!, {r3-r6}
428:
429: subs r2, r2, #0x00000008 /* Next */
430: bne inswm8_loop8
431: beq inswm8_l1
432:
433: inswm8_l8:
434: cmp r2, #4
435: bcc inswm8_l4
436:
437: ldmia r0, {r3-r6}
438:
439: bic r3, r3, lr
440: orr r3, r3, r4, lsl #16
441: bic r5, r5, lr
442: orr r4, r5, r6, lsl #16
443:
444: stmia r1!, {r3-r4}
445:
446: subs r2, r2, #0x00000004
447: beq inswm8_l1
448:
449: inswm8_l4:
450: cmp r2, #2
451: bcc inswm8_l2
452:
453: ldmia r0, {r3-r4}
454:
455: bic r3, r3, lr
456: orr r3, r3, r4, lsl #16
457: str r3, [r1], #0x0004
458:
459: subs r2, r2, #0x00000002
460: beq inswm8_l1
461:
462: inswm8_l2:
463: cmp r2, #1
464: bcc inswm8_l1
465:
466: ldr r3, [r0]
467: subs r2, r2, #0x00000001 /* Test in load delay slot */
468: /* XXX, why don't we use result? */
469:
470: strb r3, [r1], #0x0001
471: mov r3, r3, lsr #8
472: strb r3, [r1], #0x0001
473:
474:
475: inswm8_l1:
476: ldmfd sp!, {r4-r9,pc} /* And go home */
477:
478: /*
479: * write short ints (16 bits) to an I/O address from a block of memory
480: * The I/O address is assumed to be mapped multiple times in a block of
481: * 8 words.
482: * The source address should be word aligned.
483: *
484: * r0 = address to read to (IO)
485: * r1 = address to write from (memory)
486: * r2 = length
487: */
488:
489: ENTRY(outswm8)
490: /* Make sure that we have a positive length */
491: cmp r2, #0x00000000
492: movle pc, lr
493:
494: /* If the destination address is word aligned and the size suitably
495: aligned, do it fast */
496:
497: tst r1, #0x00000003
498:
499: bne _C_LABEL(outsw)
500:
501: /* Word aligned outsw */
502:
503: stmfd sp!, {r4-r8,lr}
504:
505: outswm8_loop8:
506: cmp r2, #8
507: bcc outswm8_l8
508:
509: ldmia r1!, {r3,r5,r7,ip}
510:
511: eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */
512: eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */
513: eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
514:
515: eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */
516: eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
517: eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
518:
519: eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */
520: eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
521: eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
522:
523: eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */
524: eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */
525: eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */
526:
527: stmia r0, {r3-r8,ip,lr}
528:
529: subs r2, r2, #0x00000008 /* Next */
530: bne outswm8_loop8
531: beq outswm8_l1
532:
533: outswm8_l8:
534: cmp r2, #4
535: bcc outswm8_l4
536:
537: ldmia r1!, {r3-r4}
538:
539: eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */
540: eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
541: eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
542:
543: eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */
544: eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
545: eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
546:
547: stmia r0, {r5-r8}
548:
549: subs r2, r2, #0x00000004
550: beq outswm8_l1
551:
552: outswm8_l4:
553: cmp r2, #2
554: bcc outswm8_l2
555:
556: ldr r3, [r1], #0x0004 /* r3 = (A)(B) */
557: subs r2, r2, #0x00000002 /* Done test in Load delay slot */
558:
559: eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/
560: eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */
561: eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */
562:
563: stmia r0, {r4, r5}
564:
565: beq outswm8_l1
566:
567: outswm8_l2:
568: cmp r2, #1
569: bcc outswm8_l1
570:
571: ldrb r3, [r1], #0x0001
572: ldrb r4, [r1], #0x0001
573: subs r2, r2, #0x00000001 /* Done test in load delay slot */
574: /* XXX This test isn't used? */
575: orr r3, r3, r4, lsl #8
576: orr r3, r3, r3, lsl #16
577: str r3, [r0]
578:
579: outswm8_l1:
580: ldmfd sp!, {r4-r8,pc} /* And go home */
CVSweb <webmaster@jp.NetBSD.org>