Annotation of src/sys/arch/mips/mips/cache_r5k.c, Revision 1.8
1.8 ! rafal 1: /* $NetBSD: cache_r5k.c,v 1.7 2003/02/17 12:32:13 simonb Exp $ */
1.2 thorpej 2:
3: /*
4: * Copyright 2001 Wasabi Systems, Inc.
5: * All rights reserved.
6: *
7: * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8: *
9: * Redistribution and use in source and binary forms, with or without
10: * modification, are permitted provided that the following conditions
11: * are met:
12: * 1. Redistributions of source code must retain the above copyright
13: * notice, this list of conditions and the following disclaimer.
14: * 2. Redistributions in binary form must reproduce the above copyright
15: * notice, this list of conditions and the following disclaimer in the
16: * documentation and/or other materials provided with the distribution.
17: * 3. All advertising materials mentioning features or use of this software
18: * must display the following acknowledgement:
19: * This product includes software developed for the NetBSD Project by
20: * Wasabi Systems, Inc.
21: * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22: * or promote products derived from this software without specific prior
23: * written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35: * POSSIBILITY OF SUCH DAMAGE.
36: */
37:
38: #include <sys/param.h>
39:
40: #include <mips/cache.h>
41: #include <mips/cache_r4k.h>
1.8 ! rafal 42: #include <mips/cache_r5k.h>
1.2 thorpej 43: #include <mips/locore.h>
44:
45: /*
46: * Cache operations for R5000-style caches:
47: *
48: * - 2-way set-associative
49: * - Write-back
50: * - Virtually indexed, physically tagged
51: *
52: * Since the R4600 is so similar (2-way set-associative, 32b/l),
53: * we handle that here, too. Note for R4600, we have to work
54: * around some chip bugs. From the v1.7 errata:
55: *
56: * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
57: * Hit_Invalidate_D and Create_Dirty_Excl_D should only be
58: * executed if there is no other dcache activity. If the dcache is
59: * accessed for another instruction immeidately preceding when these
60: * cache instructions are executing, it is possible that the dcache
61: * tag match outputs used by these cache instructions will be
62: * incorrect. These cache instructions should be preceded by at least
63: * four instructions that are not any kind of load or store
64: * instruction.
65: *
66: * ...and from the v2.0 errata:
67: *
68: * The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D,
69: * Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate
70: * correctly if the internal data cache refill buffer is empty. These
71: * CACHE instructions should be separated from any potential data cache
72: * miss by a load instruction to an uncached address to empty the response
73: * buffer.
74: *
75: * XXX Does not handle split secondary caches.
76: */
77:
1.3 takemura 78: #define round_line16(x) (((x) + 15) & ~15)
79: #define trunc_line16(x) ((x) & ~15)
1.2 thorpej 80: #define round_line(x) (((x) + 31) & ~31)
81: #define trunc_line(x) ((x) & ~31)
82:
83: __asm(".set mips3");
84:
85: void
86: r5k_icache_sync_all_32(void)
87: {
88: vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
89: vaddr_t eva = va + mips_picache_size;
90:
91: /*
92: * Since we're hitting the whole thing, we don't have to
93: * worry about the 2 different "ways".
94: */
95:
96: mips_dcache_wbinv_all();
97:
98: __asm __volatile("sync");
99:
100: while (va < eva) {
101: cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
102: va += (32 * 32);
103: }
104: }
105:
106: void
107: r5k_icache_sync_range_32(vaddr_t va, vsize_t size)
108: {
109: vaddr_t eva = round_line(va + size);
110:
111: va = trunc_line(va);
112:
113: mips_dcache_wb_range(va, (eva - va));
114:
115: __asm __volatile("sync");
116:
117: while ((eva - va) >= (32 * 32)) {
118: cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
119: va += (32 * 32);
120: }
121:
122: while (va < eva) {
123: cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
124: va += 32;
125: }
126: }
127:
128: void
129: r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size)
130: {
1.6 cgd 131: vaddr_t w2va, eva, orig_va;
132:
133: orig_va = va;
1.2 thorpej 134:
135: eva = round_line(va + size);
136: va = trunc_line(va);
137:
138: mips_dcache_wbinv_range_index(va, (eva - va));
139:
140: __asm __volatile("sync");
141:
142: /*
143: * Since we're doing Index ops, we expect to not be able
144: * to access the address we've been given. So, get the
145: * bits that determine the cache index, and make a KSEG0
146: * address out of them.
147: */
1.6 cgd 148: va = MIPS_PHYS_TO_KSEG0(orig_va & mips_picache_way_mask);
1.2 thorpej 149:
150: eva = round_line(va + size);
151: va = trunc_line(va);
152: w2va = va + mips_picache_way_size;
153:
154: while ((eva - va) >= (16 * 32)) {
155: cache_r4k_op_16lines_32_2way(va, w2va,
156: CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
1.7 simonb 157: va += (16 * 32);
1.2 thorpej 158: w2va += (16 * 32);
159: }
160:
161: while (va < eva) {
1.7 simonb 162: cache_op_r4k_line( va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
1.2 thorpej 163: cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
1.7 simonb 164: va += 32;
1.2 thorpej 165: w2va += 32;
166: }
167: }
168:
169: void
1.3 takemura 170: r5k_pdcache_wbinv_all_16(void)
171: {
172: vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
173: vaddr_t eva = va + mips_pdcache_size;
174:
175: /*
176: * Since we're hitting the whole thing, we don't have to
177: * worry about the 2 different "ways".
178: */
179:
180: while (va < eva) {
181: cache_r4k_op_32lines_16(va,
182: CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
183: va += (32 * 16);
184: }
185: }
186:
187: void
1.2 thorpej 188: r5k_pdcache_wbinv_all_32(void)
189: {
190: vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
191: vaddr_t eva = va + mips_pdcache_size;
192:
193: /*
194: * Since we're hitting the whole thing, we don't have to
195: * worry about the 2 different "ways".
196: */
197:
198: while (va < eva) {
199: cache_r4k_op_32lines_32(va,
200: CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
201: va += (32 * 32);
202: }
203: }
204:
205: void
206: r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
207: {
208: vaddr_t eva = round_line(va + size);
209: uint32_t ostatus;
210:
211: /*
212: * This is pathetically slow, but the chip bug is pretty
213: * nasty, and we hope that not too many v1.x R4600s are
214: * around.
215: */
216:
217: va = trunc_line(va);
218:
219: /*
220: * To make this a little less painful, just hit the entire
221: * cache if we have a range >= the cache size.
222: */
223: if ((eva - va) >= mips_pdcache_size) {
224: r5k_pdcache_wbinv_all_32();
225: return;
226: }
227:
228: ostatus = mips_cp0_status_read();
229:
230: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
231:
232: while (va < eva) {
233: __asm __volatile("nop; nop; nop; nop;");
234: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
235: va += 32;
236: }
237:
238: mips_cp0_status_write(ostatus);
239: }
240:
241: void
242: r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
243: {
244: vaddr_t eva = round_line(va + size);
245: uint32_t ostatus;
246:
247: va = trunc_line(va);
248:
249: ostatus = mips_cp0_status_read();
250:
251: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
252:
253: while ((eva - va) >= (32 * 32)) {
254: (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
255: cache_r4k_op_32lines_32(va,
256: CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
257: va += (32 * 32);
258: }
259:
260: (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
261: while (va < eva) {
262: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
263: va += 32;
264: }
265:
266: mips_cp0_status_write(ostatus);
1.5 shin 267: }
268:
269: void
270: vr4131v1_pdcache_wbinv_range_16(vaddr_t va, vsize_t size)
271: {
272: vaddr_t eva = round_line16(va + size);
273:
274: va = trunc_line16(va);
275:
276: while ((eva - va) >= (32 * 16)) {
277: cache_r4k_op_32lines_16(va,
278: CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
279: cache_r4k_op_32lines_16(va,
280: CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
281: va += (32 * 16);
282: }
283:
284: while (va < eva) {
285: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
286: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
287: va += 16;
288: }
1.2 thorpej 289: }
290:
291: void
1.3 takemura 292: r5k_pdcache_wbinv_range_16(vaddr_t va, vsize_t size)
293: {
294: vaddr_t eva = round_line16(va + size);
295:
1.4 shin 296: va = trunc_line16(va);
1.3 takemura 297:
298: while ((eva - va) >= (32 * 16)) {
299: cache_r4k_op_32lines_16(va,
300: CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
301: va += (32 * 16);
302: }
303:
304: while (va < eva) {
305: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
306: va += 16;
307: }
308: }
309:
310: void
1.2 thorpej 311: r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
312: {
313: vaddr_t eva = round_line(va + size);
314:
315: va = trunc_line(va);
316:
317: while ((eva - va) >= (32 * 32)) {
318: cache_r4k_op_32lines_32(va,
319: CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
320: va += (32 * 32);
321: }
322:
323: while (va < eva) {
324: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
325: va += 32;
326: }
327: }
328:
329: void
1.3 takemura 330: r5k_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size)
331: {
332: vaddr_t w2va, eva;
333:
334: /*
335: * Since we're doing Index ops, we expect to not be able
336: * to access the address we've been given. So, get the
337: * bits that determine the cache index, and make a KSEG0
338: * address out of them.
339: */
340: va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
341:
342: eva = round_line16(va + size);
343: va = trunc_line16(va);
344: w2va = va + mips_pdcache_way_size;
345:
346: while ((eva - va) >= (16 * 16)) {
347: cache_r4k_op_16lines_16_2way(va, w2va,
348: CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1.7 simonb 349: va += (16 * 16);
1.3 takemura 350: w2va += (16 * 16);
351: }
352:
353: while (va < eva) {
1.7 simonb 354: cache_op_r4k_line( va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1.3 takemura 355: cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1.7 simonb 356: va += 16;
1.3 takemura 357: w2va += 16;
358: }
359: }
360:
361: void
1.2 thorpej 362: r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size)
363: {
364: vaddr_t w2va, eva;
365:
366: /*
367: * Since we're doing Index ops, we expect to not be able
368: * to access the address we've been given. So, get the
369: * bits that determine the cache index, and make a KSEG0
370: * address out of them.
371: */
372: va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
373:
374: eva = round_line(va + size);
375: va = trunc_line(va);
376: w2va = va + mips_pdcache_way_size;
377:
378: while ((eva - va) >= (16 * 32)) {
379: cache_r4k_op_16lines_32_2way(va, w2va,
380: CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1.7 simonb 381: va += (16 * 32);
1.2 thorpej 382: w2va += (16 * 32);
383: }
384:
385: while (va < eva) {
1.7 simonb 386: cache_op_r4k_line( va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1.2 thorpej 387: cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1.7 simonb 388: va += 32;
1.2 thorpej 389: w2va += 32;
390: }
391: }
392:
393: void
394: r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size)
395: {
396: vaddr_t eva = round_line(va + size);
397: uint32_t ostatus;
398:
399: /*
400: * This is pathetically slow, but the chip bug is pretty
401: * nasty, and we hope that not too many v1.x R4600s are
402: * around.
403: */
404:
405: va = trunc_line(va);
406:
407: ostatus = mips_cp0_status_read();
408:
409: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
410:
411: while (va < eva) {
412: __asm __volatile("nop; nop; nop; nop;");
413: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
414: va += 32;
415: }
416:
417: mips_cp0_status_write(ostatus);
418: }
419:
420: void
421: r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size)
422: {
423: vaddr_t eva = round_line(va + size);
424: uint32_t ostatus;
425:
426: va = trunc_line(va);
427:
428: ostatus = mips_cp0_status_read();
429:
430: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
431:
432: /*
433: * Between blasts of big cache chunks, give interrupts
434: * a chance to get though.
435: */
436: while ((eva - va) >= (32 * 32)) {
437: (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
438: cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
439: va += (32 * 32);
440: }
441:
442: (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
443: while (va < eva) {
444: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
445: va += 32;
446: }
447:
448: mips_cp0_status_write(ostatus);
449: }
450:
451: void
1.3 takemura 452: r5k_pdcache_inv_range_16(vaddr_t va, vsize_t size)
453: {
454: vaddr_t eva = round_line16(va + size);
455:
456: va = trunc_line16(va);
457:
458: while ((eva - va) >= (32 * 16)) {
459: cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
460: va += (32 * 16);
461: }
462:
463: while (va < eva) {
464: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
465: va += 16;
466: }
467: }
468:
469: void
1.2 thorpej 470: r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size)
471: {
472: vaddr_t eva = round_line(va + size);
473:
474: va = trunc_line(va);
475:
476: while ((eva - va) >= (32 * 32)) {
477: cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
478: va += (32 * 32);
479: }
480:
481: while (va < eva) {
482: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
483: va += 32;
484: }
485: }
486:
487: void
488: r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size)
489: {
490: vaddr_t eva = round_line(va + size);
491: uint32_t ostatus;
492:
493: /*
494: * This is pathetically slow, but the chip bug is pretty
495: * nasty, and we hope that not too many v1.x R4600s are
496: * around.
497: */
498:
499: va = trunc_line(va);
500:
501: ostatus = mips_cp0_status_read();
502:
503: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
504:
505: while (va < eva) {
506: __asm __volatile("nop; nop; nop; nop;");
507: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
508: va += 32;
509: }
510:
511: mips_cp0_status_write(ostatus);
512: }
513:
514: void
515: r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size)
516: {
517: vaddr_t eva = round_line(va + size);
518: uint32_t ostatus;
519:
520: va = trunc_line(va);
521:
522: ostatus = mips_cp0_status_read();
523:
524: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
525:
526: /*
527: * Between blasts of big cache chunks, give interrupts
528: * a chance to get though.
529: */
530: while ((eva - va) >= (32 * 32)) {
531: (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
532: cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
533: va += (32 * 32);
534: }
535:
536: (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
537: while (va < eva) {
538: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
539: va += 32;
540: }
541:
542: mips_cp0_status_write(ostatus);
543: }
544:
545: void
1.3 takemura 546: r5k_pdcache_wb_range_16(vaddr_t va, vsize_t size)
547: {
548: vaddr_t eva = round_line16(va + size);
549:
550: va = trunc_line16(va);
551:
552: while ((eva - va) >= (32 * 16)) {
553: cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
554: va += (32 * 16);
555: }
556:
557: while (va < eva) {
558: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
559: va += 16;
560: }
561: }
562:
563: void
1.2 thorpej 564: r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size)
565: {
566: vaddr_t eva = round_line(va + size);
567:
568: va = trunc_line(va);
569:
570: while ((eva - va) >= (32 * 32)) {
571: cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
572: va += (32 * 32);
573: }
574:
575: while (va < eva) {
576: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
577: va += 32;
578: }
579: }
580:
1.3 takemura 581: #undef round_line16
582: #undef trunc_line16
1.2 thorpej 583: #undef round_line
584: #undef trunc_line
1.8 ! rafal 585:
! 586: /*
! 587: * Cache operations for R5000-style secondary caches:
! 588: *
! 589: * - Direct-mapped
! 590: * - Write-through
! 591: * - Physically indexed, physically tagged
! 592: *
! 593: */
! 594:
! 595:
! 596: __asm(".set mips3");
! 597:
! 598: #define R5K_Page_Invalidate_S 0x17
! 599:
! 600: void
! 601: r5k_sdcache_wbinv_all(void)
! 602: {
! 603: vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
! 604: vaddr_t eva = va + mips_sdcache_size;
! 605:
! 606: while (va < eva) {
! 607: cache_op_r4k_line(va, R5K_Page_Invalidate_S);
! 608: va += (128 * 32);
! 609: }
! 610: }
! 611:
! 612: /* XXX: want wbinv_range_index here instead? */
! 613: void
! 614: r5k_sdcache_wbinv_rangeall(vaddr_t va, vsize_t size)
! 615: {
! 616: r5k_sdcache_wbinv_all();
! 617: }
! 618:
! 619: #define round_page(x) (((x) + (128 * 32 - 1)) & ~(128 * 32 - 1))
! 620: #define trunc_page(x) ((x) & ~(128 * 32 - 1))
! 621:
! 622: void
! 623: r5k_sdcache_wbinv_range(vaddr_t va, vsize_t size)
! 624: {
! 625: vaddr_t eva = round_page(va + size);
! 626: va = trunc_page(va);
! 627:
! 628: while (va < eva) {
! 629: cache_op_r4k_line(va, R5K_Page_Invalidate_S);
! 630: va += (128 * 32);
! 631: }
! 632: }
! 633:
! 634: void
! 635: r5k_sdcache_wb_range(vaddr_t va, vsize_t size)
! 636: {
! 637: /* Write-through cache, no need to WB */
! 638: }
CVSweb <webmaster@jp.NetBSD.org>