Annotation of src/sys/arch/mips/mips/cache_r5k.c, Revision 1.19.6.1
1.19.6.1! pgoyette 1: /* $NetBSD: cache_r5k.c,v 1.20 2017/04/27 20:05:09 skrll Exp $ */
1.2 thorpej 2:
3: /*
4: * Copyright 2001 Wasabi Systems, Inc.
5: * All rights reserved.
6: *
7: * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8: *
9: * Redistribution and use in source and binary forms, with or without
10: * modification, are permitted provided that the following conditions
11: * are met:
12: * 1. Redistributions of source code must retain the above copyright
13: * notice, this list of conditions and the following disclaimer.
14: * 2. Redistributions in binary form must reproduce the above copyright
15: * notice, this list of conditions and the following disclaimer in the
16: * documentation and/or other materials provided with the distribution.
17: * 3. All advertising materials mentioning features or use of this software
18: * must display the following acknowledgement:
19: * This product includes software developed for the NetBSD Project by
20: * Wasabi Systems, Inc.
21: * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22: * or promote products derived from this software without specific prior
23: * written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35: * POSSIBILITY OF SUCH DAMAGE.
36: */
1.9 lukem 37:
38: #include <sys/cdefs.h>
1.19.6.1! pgoyette 39: __KERNEL_RCSID(0, "$NetBSD: cache_r5k.c,v 1.20 2017/04/27 20:05:09 skrll Exp $");
1.2 thorpej 40:
41: #include <sys/param.h>
42:
43: #include <mips/cache.h>
44: #include <mips/cache_r4k.h>
1.8 rafal 45: #include <mips/cache_r5k.h>
1.2 thorpej 46: #include <mips/locore.h>
47:
48: /*
49: * Cache operations for R5000-style caches:
50: *
51: * - 2-way set-associative
52: * - Write-back
53: * - Virtually indexed, physically tagged
54: *
55: * Since the R4600 is so similar (2-way set-associative, 32b/l),
56: * we handle that here, too. Note for R4600, we have to work
57: * around some chip bugs. From the v1.7 errata:
58: *
59: * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
60: * Hit_Invalidate_D and Create_Dirty_Excl_D should only be
61: * executed if there is no other dcache activity. If the dcache is
62: * accessed for another instruction immeidately preceding when these
63: * cache instructions are executing, it is possible that the dcache
64: * tag match outputs used by these cache instructions will be
65: * incorrect. These cache instructions should be preceded by at least
66: * four instructions that are not any kind of load or store
67: * instruction.
68: *
69: * ...and from the v2.0 errata:
70: *
71: * The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D,
72: * Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate
73: * correctly if the internal data cache refill buffer is empty. These
74: * CACHE instructions should be separated from any potential data cache
75: * miss by a load instruction to an uncached address to empty the response
76: * buffer.
77: *
78: * XXX Does not handle split secondary caches.
79: */
80:
1.16 matt 81: #define round_line16(x) round_line(x, 16)
82: #define trunc_line16(x) trunc_line(x, 16)
83: #define round_line32(x) round_line(x, 32)
84: #define trunc_line32(x) trunc_line(x, 32)
85: #define round_line(x,n) (((x) + (register_t)(n) - 1) & -(register_t)(n))
86: #define trunc_line(x,n) ((x) & -(register_t)(n))
1.2 thorpej 87:
88: __asm(".set mips3");
89:
90: void
1.16 matt 91: r5k_picache_sync_all(void)
1.2 thorpej 92: {
1.16 matt 93: struct mips_cache_info * const mci = &mips_cache_info;
1.2 thorpej 94:
1.16 matt 95: /*
96: * Since we're hitting the whole thing, we don't have to
97: * worry about the N different "ways".
98: */
99: mips_intern_dcache_sync_all();
1.12 perry 100: __asm volatile("sync");
1.16 matt 101: mips_intern_icache_sync_range_index(MIPS_KSEG0_START,
102: mci->mci_picache_size);
1.2 thorpej 103: }
104:
105: void
1.16 matt 106: r5k_picache_sync_range(register_t va, vsize_t size)
1.2 thorpej 107: {
108:
1.16 matt 109: mips_intern_dcache_sync_range(va, size);
110: mips_intern_icache_sync_range(va, size);
1.2 thorpej 111: }
112:
113: void
1.16 matt 114: r5k_picache_sync_range_index(vaddr_t va, vsize_t size)
1.2 thorpej 115: {
1.16 matt 116: struct mips_cache_info * const mci = &mips_cache_info;
117: const size_t ways = mci->mci_picache_ways;
118: const size_t line_size = mci->mci_picache_line_size;
119: const size_t way_size = mci->mci_picache_way_size;
120: const size_t way_mask = way_size - 1;
121: vaddr_t eva;
1.2 thorpej 122:
123: /*
124: * Since we're doing Index ops, we expect to not be able
125: * to access the address we've been given. So, get the
126: * bits that determine the cache index, and make a KSEG0
127: * address out of them.
128: */
1.16 matt 129: va = MIPS_PHYS_TO_KSEG0(va & way_mask);
1.2 thorpej 130:
1.16 matt 131: eva = round_line(va + size, line_size);
132: va = trunc_line(va, line_size);
133: size = eva - va;
134:
135: /*
136: * If we are going to flush more than is in a way (or the stride
1.19.6.1! pgoyette 137: * needed for that way), we are flushing everything.
1.16 matt 138: */
139: if (size >= way_size) {
140: r5k_picache_sync_all();
141: return;
1.2 thorpej 142: }
143:
1.16 matt 144: for (size_t way = 0; way < ways; way++) {
145: mips_intern_dcache_sync_range_index(va, size);
146: mips_intern_icache_sync_range_index(va, size);
147: va += way_size;
148: eva += way_size;
1.2 thorpej 149: }
150: }
151:
152: void
1.16 matt 153: r5k_pdcache_wbinv_all(void)
1.3 takemura 154: {
1.16 matt 155: struct mips_cache_info * const mci = &mips_cache_info;
1.3 takemura 156:
157: /*
158: * Since we're hitting the whole thing, we don't have to
1.16 matt 159: * worry about the N different "ways".
1.3 takemura 160: */
1.16 matt 161: mips_intern_pdcache_wbinv_range_index(MIPS_KSEG0_START,
162: mci->mci_pdcache_size);
1.3 takemura 163: }
164:
165: void
1.16 matt 166: r5k_pdcache_wbinv_range_index(vaddr_t va, vsize_t size)
1.2 thorpej 167: {
1.16 matt 168: struct mips_cache_info * const mci = &mips_cache_info;
169: const size_t ways = mci->mci_pdcache_ways;
170: const size_t line_size = mci->mci_pdcache_line_size;
171: const vaddr_t way_size = mci->mci_pdcache_way_size;
172: const vaddr_t way_mask = way_size - 1;
173: vaddr_t eva;
1.2 thorpej 174:
175: /*
1.16 matt 176: * Since we're doing Index ops, we expect to not be able
177: * to access the address we've been given. So, get the
178: * bits that determine the cache index, and make a KSEG0
179: * address out of them.
1.2 thorpej 180: */
1.16 matt 181: va = MIPS_PHYS_TO_KSEG0(va & way_mask);
182: eva = round_line(va + size, line_size);
183: va = trunc_line(va, line_size);
184: size = eva - va;
185:
186: /*
187: * If we are going to flush more than is in a way, we are flushing
188: * everything.
189: */
190: if (size >= way_size) {
191: mips_intern_pdcache_wbinv_range_index(MIPS_KSEG0_START,
192: mci->mci_pdcache_size);
193: return;
194: }
1.2 thorpej 195:
1.16 matt 196: /*
197: * Invalidate each way. If the address range wraps past the end of
198: * the way, we will be invalidating in two ways but eventually things
199: * work out since the last way will wrap into the first way.
200: */
201: for (size_t way = 0; way < ways; way++) {
202: mips_intern_pdcache_wbinv_range_index(va, size);
203: va += way_size;
204: eva += way_size;
1.2 thorpej 205: }
206: }
207:
208: void
1.16 matt 209: r4600v1_pdcache_wbinv_range_32(register_t va, vsize_t size)
1.2 thorpej 210: {
1.16 matt 211: const register_t eva = round_line32(va + size);
1.2 thorpej 212:
213: /*
214: * This is pathetically slow, but the chip bug is pretty
215: * nasty, and we hope that not too many v1.x R4600s are
216: * around.
217: */
218:
1.16 matt 219: va = trunc_line32(va);
1.2 thorpej 220:
221: /*
222: * To make this a little less painful, just hit the entire
223: * cache if we have a range >= the cache size.
224: */
1.16 matt 225: if (eva - va >= mips_cache_info.mci_pdcache_size) {
226: r5k_pdcache_wbinv_all();
1.2 thorpej 227: return;
228: }
229:
1.16 matt 230: const uint32_t ostatus = mips_cp0_status_read();
1.2 thorpej 231:
232: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
233:
234: while (va < eva) {
1.16 matt 235: __asm volatile("nop; nop; nop; nop");
1.2 thorpej 236: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
237: va += 32;
238: }
239:
240: mips_cp0_status_write(ostatus);
241: }
242:
243: void
1.16 matt 244: r4600v2_pdcache_wbinv_range_32(register_t va, vsize_t size)
1.2 thorpej 245: {
1.16 matt 246: const register_t eva = round_line32(va + size);
1.2 thorpej 247:
1.16 matt 248: va = trunc_line32(va);
1.2 thorpej 249:
1.16 matt 250: const uint32_t ostatus = mips_cp0_status_read();
1.2 thorpej 251:
252: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
253:
1.16 matt 254: for (; (eva - va) >= (32 * 32); va += (32 * 32)) {
1.12 perry 255: (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
1.16 matt 256: cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
1.2 thorpej 257: }
258:
1.12 perry 259: (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
1.16 matt 260: for (; va < eva; va += 32) {
1.2 thorpej 261: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
1.19 skrll 262:
1.2 thorpej 263: }
264:
265: mips_cp0_status_write(ostatus);
1.5 shin 266: }
267:
268: void
1.16 matt 269: vr4131v1_pdcache_wbinv_range_16(register_t va, vsize_t size)
1.5 shin 270: {
1.16 matt 271: register_t eva = round_line16(va + size);
1.5 shin 272:
273: va = trunc_line16(va);
274:
1.16 matt 275: for (; (eva - va) >= (32 * 16); va += (32 * 16)) {
276: cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
277: cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1.19 skrll 278:
1.5 shin 279: }
280:
1.16 matt 281: for (; va < eva; va += 16) {
1.5 shin 282: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
283: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1.2 thorpej 284: }
285: }
286:
287: void
1.16 matt 288: r4600v1_pdcache_inv_range_32(register_t va, vsize_t size)
1.2 thorpej 289: {
1.18 skrll 290: const register_t eva = round_line32(va + size);
1.2 thorpej 291:
292: /*
293: * This is pathetically slow, but the chip bug is pretty
294: * nasty, and we hope that not too many v1.x R4600s are
295: * around.
296: */
297:
1.16 matt 298: va = trunc_line32(va);
1.2 thorpej 299:
1.16 matt 300: const uint32_t ostatus = mips_cp0_status_read();
1.2 thorpej 301:
302: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
303:
1.16 matt 304: for (; va < eva; va += 32) {
1.12 perry 305: __asm volatile("nop; nop; nop; nop;");
1.2 thorpej 306: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1.19 skrll 307:
1.2 thorpej 308: }
309:
310: mips_cp0_status_write(ostatus);
311: }
312:
313: void
1.16 matt 314: r4600v2_pdcache_inv_range_32(register_t va, vsize_t size)
1.2 thorpej 315: {
1.18 skrll 316: const register_t eva = round_line32(va + size);
1.2 thorpej 317:
1.16 matt 318: va = trunc_line32(va);
1.2 thorpej 319:
1.16 matt 320: const uint32_t ostatus = mips_cp0_status_read();
1.2 thorpej 321:
322: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
323:
324: /*
325: * Between blasts of big cache chunks, give interrupts
326: * a chance to get though.
327: */
1.16 matt 328: for (; (eva - va) >= (32 * 32); va += (32 * 32)) {
1.12 perry 329: (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
1.2 thorpej 330: cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1.19 skrll 331:
1.2 thorpej 332: }
333:
1.12 perry 334: (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
1.16 matt 335: for (; va < eva; va += 32) {
1.2 thorpej 336: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1.19 skrll 337:
1.2 thorpej 338: }
339:
340: mips_cp0_status_write(ostatus);
341: }
342:
343: void
1.16 matt 344: r4600v1_pdcache_wb_range_32(register_t va, vsize_t size)
1.2 thorpej 345: {
1.16 matt 346: const register_t eva = round_line32(va + size);
1.2 thorpej 347:
348: /*
349: * This is pathetically slow, but the chip bug is pretty
350: * nasty, and we hope that not too many v1.x R4600s are
351: * around.
352: */
353:
1.16 matt 354: va = trunc_line32(va);
1.2 thorpej 355:
1.16 matt 356: const uint32_t ostatus = mips_cp0_status_read();
1.2 thorpej 357:
358: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
359:
1.16 matt 360: for (; va < eva; va += 32) {
1.12 perry 361: __asm volatile("nop; nop; nop; nop;");
1.2 thorpej 362: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
1.19 skrll 363:
1.2 thorpej 364: }
365:
366: mips_cp0_status_write(ostatus);
367: }
368:
369: void
1.16 matt 370: r4600v2_pdcache_wb_range_32(register_t va, vsize_t size)
1.2 thorpej 371: {
1.16 matt 372: const register_t eva = round_line32(va + size);
1.2 thorpej 373:
1.16 matt 374: va = trunc_line32(va);
1.2 thorpej 375:
1.16 matt 376: const uint32_t ostatus = mips_cp0_status_read();
1.2 thorpej 377:
378: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
379:
380: /*
381: * Between blasts of big cache chunks, give interrupts
382: * a chance to get though.
383: */
1.16 matt 384: for (; (eva - va) >= (32 * 32); va += (32 * 32)) {
1.12 perry 385: (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
1.2 thorpej 386: cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
1.19 skrll 387:
1.2 thorpej 388: }
389:
1.12 perry 390: (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
1.16 matt 391: for (; va < eva; va += 32) {
1.2 thorpej 392: cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
393: }
394:
395: mips_cp0_status_write(ostatus);
396: }
397:
1.8 rafal 398: /*
399: * Cache operations for R5000-style secondary caches:
400: *
401: * - Direct-mapped
402: * - Write-through
403: * - Physically indexed, physically tagged
404: *
405: */
406:
407:
408: __asm(".set mips3");
409:
410: #define R5K_Page_Invalidate_S 0x17
1.16 matt 411: CTASSERT(R5K_Page_Invalidate_S == (CACHEOP_R4K_HIT_WB_INV|CACHE_R4K_SD));
1.8 rafal 412:
413: void
414: r5k_sdcache_wbinv_all(void)
415: {
416:
1.14 matt 417: r5k_sdcache_wbinv_range(MIPS_PHYS_TO_KSEG0(0), mips_cache_info.mci_sdcache_size);
1.8 rafal 418: }
419:
420: void
1.10 sekiya 421: r5k_sdcache_wbinv_range_index(vaddr_t va, vsize_t size)
1.8 rafal 422: {
1.10 sekiya 423:
424: /*
425: * Since we're doing Index ops, we expect to not be able
426: * to access the address we've been given. So, get the
427: * bits that determine the cache index, and make a KSEG0
428: * address out of them.
429: */
1.14 matt 430: va = MIPS_PHYS_TO_KSEG0(va & (mips_cache_info.mci_sdcache_size - 1));
1.17 macallan 431: r5k_sdcache_wbinv_range((intptr_t)va, size);
1.8 rafal 432: }
433:
1.17 macallan 434: #define mips_r5k_round_page(x) round_line(x, PAGE_SIZE)
435: #define mips_r5k_trunc_page(x) trunc_line(x, PAGE_SIZE)
1.8 rafal 436:
437: void
1.16 matt 438: r5k_sdcache_wbinv_range(register_t va, vsize_t size)
1.8 rafal 439: {
1.10 sekiya 440: uint32_t ostatus, taglo;
1.16 matt 441: register_t eva = mips_r5k_round_page(va + size);
1.10 sekiya 442:
1.13 uebayasi 443: va = mips_r5k_trunc_page(va);
1.8 rafal 444:
1.17 macallan 445: ostatus = mips_cp0_status_read();
446: mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
1.10 sekiya 447:
1.12 perry 448: __asm volatile("mfc0 %0, $28" : "=r"(taglo));
449: __asm volatile("mtc0 $0, $28");
1.10 sekiya 450:
1.16 matt 451: for (; va < eva; va += (128 * 32)) {
452: cache_op_r4k_line(va, CACHEOP_R4K_HIT_WB_INV|CACHE_R4K_SD);
1.8 rafal 453: }
1.10 sekiya 454:
1.17 macallan 455: mips_cp0_status_write(ostatus);
1.12 perry 456: __asm volatile("mtc0 %0, $28; nop" :: "r"(taglo));
1.8 rafal 457: }
CVSweb <webmaster@jp.NetBSD.org>