[BACK]Return to x86_xpmap.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / arch / xen / x86

Annotation of src/sys/arch/xen/x86/x86_xpmap.c, Revision 1.34.2.2

1.34.2.2! yamt        1: /*     $NetBSD: x86_xpmap.c,v 1.34.2.1 2011/11/10 14:31:44 yamt Exp $  */
1.2       bouyer      2:
                      3: /*
                      4:  * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
                      5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
                      7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
                      9:  *
                     10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     17:  */
                     18:
                     19: /*
                     20:  * Copyright (c) 2006, 2007 Manuel Bouyer.
                     21:  *
                     22:  * Redistribution and use in source and binary forms, with or without
                     23:  * modification, are permitted provided that the following conditions
                     24:  * are met:
                     25:  * 1. Redistributions of source code must retain the above copyright
                     26:  *    notice, this list of conditions and the following disclaimer.
                     27:  * 2. Redistributions in binary form must reproduce the above copyright
                     28:  *    notice, this list of conditions and the following disclaimer in the
                     29:  *    documentation and/or other materials provided with the distribution.
                     30:  *
                     31:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
                     32:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     33:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     34:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
                     35:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
                     36:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     37:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     38:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     39:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     40:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     41:  *
                     42:  */
                     43:
                     44: /*
                     45:  *
                     46:  * Copyright (c) 2004 Christian Limpach.
                     47:  * All rights reserved.
                     48:  *
                     49:  * Redistribution and use in source and binary forms, with or without
                     50:  * modification, are permitted provided that the following conditions
                     51:  * are met:
                     52:  * 1. Redistributions of source code must retain the above copyright
                     53:  *    notice, this list of conditions and the following disclaimer.
                     54:  * 2. Redistributions in binary form must reproduce the above copyright
                     55:  *    notice, this list of conditions and the following disclaimer in the
                     56:  *    documentation and/or other materials provided with the distribution.
                     57:  *
                     58:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
                     59:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     60:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     61:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
                     62:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
                     63:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     64:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     65:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     66:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     67:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     68:  */
                     69:
                     70:
                     71: #include <sys/cdefs.h>
1.34.2.2! yamt       72: __KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.34.2.1 2011/11/10 14:31:44 yamt Exp $");
1.2       bouyer     73:
                     74: #include "opt_xen.h"
1.4       bouyer     75: #include "opt_ddb.h"
                     76: #include "ksyms.h"
1.2       bouyer     77:
                     78: #include <sys/param.h>
                     79: #include <sys/systm.h>
1.34.2.2! yamt       80: #include <sys/mutex.h>
        !            81: #include <sys/cpu.h>
1.2       bouyer     82:
                     83: #include <uvm/uvm.h>
                     84:
1.34.2.2! yamt       85: #include <x86/pmap.h>
1.2       bouyer     86: #include <machine/gdt.h>
                     87: #include <xen/xenfunc.h>
                     88:
                     89: #include <dev/isa/isareg.h>
                     90: #include <machine/isa_machdep.h>
                     91:
                     92: #undef XENDEBUG
                     93: /* #define XENDEBUG_SYNC */
                     94: /* #define     XENDEBUG_LOW */
                     95:
                     96: #ifdef XENDEBUG
                     97: #define        XENPRINTF(x) printf x
                     98: #define        XENPRINTK(x) printk x
                     99: #define        XENPRINTK2(x) /* printk x */
                    100:
                    101: static char XBUF[256];
                    102: #else
                    103: #define        XENPRINTF(x)
                    104: #define        XENPRINTK(x)
                    105: #define        XENPRINTK2(x)
                    106: #endif
                    107: #define        PRINTF(x) printf x
                    108: #define        PRINTK(x) printk x
                    109:
                    110: volatile shared_info_t *HYPERVISOR_shared_info;
1.11      jym       111: /* Xen requires the start_info struct to be page aligned */
                    112: union start_info_union start_info_union __aligned(PAGE_SIZE);
1.6       bouyer    113: unsigned long *xpmap_phys_to_machine_mapping;
1.34.2.2! yamt      114: kmutex_t pte_lock;
1.2       bouyer    115:
                    116: void xen_failsafe_handler(void);
                    117:
                    118: #define HYPERVISOR_mmu_update_self(req, count, success_count) \
                    119:        HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
                    120:
                    121: void
                    122: xen_failsafe_handler(void)
                    123: {
                    124:
                    125:        panic("xen_failsafe_handler called!\n");
                    126: }
                    127:
                    128:
                    129: void
                    130: xen_set_ldt(vaddr_t base, uint32_t entries)
                    131: {
                    132:        vaddr_t va;
                    133:        vaddr_t end;
1.4       bouyer    134:        pt_entry_t *ptp;
1.2       bouyer    135:        int s;
                    136:
                    137: #ifdef __x86_64__
                    138:        end = base + (entries << 3);
                    139: #else
                    140:        end = base + entries * sizeof(union descriptor);
                    141: #endif
                    142:
                    143:        for (va = base; va < end; va += PAGE_SIZE) {
                    144:                KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
                    145:                ptp = kvtopte(va);
1.19      jym       146:                XENPRINTF(("xen_set_ldt %#" PRIxVADDR " %d %p\n",
                    147:                    base, entries, ptp));
1.4       bouyer    148:                pmap_pte_clearbits(ptp, PG_RW);
1.2       bouyer    149:        }
                    150:        s = splvm();
                    151:        xpq_queue_set_ldt(base, entries);
                    152:        splx(s);
                    153: }
                    154:
                    155: #ifdef XENDEBUG
                    156: void xpq_debug_dump(void);
                    157: #endif
                    158:
                    159: #define XPQUEUE_SIZE 2048
1.34.2.1  yamt      160: static mmu_update_t xpq_queue_array[MAXCPUS][XPQUEUE_SIZE];
                    161: static int xpq_idx_array[MAXCPUS];
1.30      cherry    162:
1.34.2.2! yamt      163: #ifdef i386
        !           164: extern union descriptor tmpgdt[];
        !           165: #endif /* i386 */
1.2       bouyer    166: void
1.8       cegger    167: xpq_flush_queue(void)
1.2       bouyer    168: {
1.34.2.1  yamt      169:        int i, ok = 0, ret;
                    170:
1.34.2.2! yamt      171:        mmu_update_t *xpq_queue = xpq_queue_array[curcpu()->ci_cpuid];
        !           172:        int xpq_idx = xpq_idx_array[curcpu()->ci_cpuid];
1.2       bouyer    173:
                    174:        XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
                    175:        for (i = 0; i < xpq_idx; i++)
1.19      jym       176:                XENPRINTK2(("%d: 0x%08" PRIx64 " 0x%08" PRIx64 "\n", i,
                    177:                    xpq_queue[i].ptr, xpq_queue[i].val));
1.23      jym       178:
1.34.2.1  yamt      179: retry:
1.23      jym       180:        ret = HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok);
                    181:
                    182:        if (xpq_idx != 0 && ret < 0) {
1.34.2.2! yamt      183:                struct cpu_info *ci;
        !           184:                CPU_INFO_ITERATOR cii;
        !           185:
        !           186:                printf("xpq_flush_queue: %d entries (%d successful) on "
        !           187:                    "cpu%d (%ld)\n",
        !           188:                    xpq_idx, ok, curcpu()->ci_index, curcpu()->ci_cpuid);
1.34.2.1  yamt      189:
                    190:                if (ok != 0) {
                    191:                        xpq_queue += ok;
                    192:                        xpq_idx -= ok;
                    193:                        ok = 0;
                    194:                        goto retry;
                    195:                }
                    196:
1.34.2.2! yamt      197:                for (CPU_INFO_FOREACH(cii, ci)) {
        !           198:                        xpq_queue = xpq_queue_array[ci->ci_cpuid];
        !           199:                        xpq_idx = xpq_idx_array[ci->ci_cpuid];
        !           200:                        printf("cpu%d (%ld):\n", ci->ci_index, ci->ci_cpuid);
        !           201:                        for (i = 0; i < xpq_idx; i++) {
        !           202:                                printf("  0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
        !           203:                                   xpq_queue[i].ptr, xpq_queue[i].val);
        !           204:                        }
        !           205: #ifdef __x86_64__
        !           206:                        for (i = 0; i < PDIR_SLOT_PTE; i++) {
        !           207:                                if (ci->ci_kpm_pdir[i] == 0)
        !           208:                                        continue;
        !           209:                                printf(" kpm_pdir[%d]: 0x%" PRIx64 "\n",
        !           210:                                    i, ci->ci_kpm_pdir[i]);
        !           211:                        }
        !           212: #endif
        !           213:                }
1.23      jym       214:                panic("HYPERVISOR_mmu_update failed, ret: %d\n", ret);
1.2       bouyer    215:        }
1.34.2.2! yamt      216:        xpq_idx_array[curcpu()->ci_cpuid] = 0;
1.2       bouyer    217: }
                    218:
                    219: static inline void
                    220: xpq_increment_idx(void)
                    221: {
                    222:
1.34.2.2! yamt      223:        if (__predict_false(++xpq_idx_array[curcpu()->ci_cpuid] == XPQUEUE_SIZE))
1.2       bouyer    224:                xpq_flush_queue();
                    225: }
                    226:
                    227: void
                    228: xpq_queue_machphys_update(paddr_t ma, paddr_t pa)
                    229: {
1.34.2.1  yamt      230:
1.34.2.2! yamt      231:        mmu_update_t *xpq_queue = xpq_queue_array[curcpu()->ci_cpuid];
        !           232:        int xpq_idx = xpq_idx_array[curcpu()->ci_cpuid];
1.34.2.1  yamt      233:
1.6       bouyer    234:        XENPRINTK2(("xpq_queue_machphys_update ma=0x%" PRIx64 " pa=0x%" PRIx64
                    235:            "\n", (int64_t)ma, (int64_t)pa));
1.34.2.1  yamt      236:
1.2       bouyer    237:        xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE;
                    238:        xpq_queue[xpq_idx].val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
                    239:        xpq_increment_idx();
                    240: #ifdef XENDEBUG_SYNC
                    241:        xpq_flush_queue();
                    242: #endif
                    243: }
                    244:
                    245: void
1.6       bouyer    246: xpq_queue_pte_update(paddr_t ptr, pt_entry_t val)
1.2       bouyer    247: {
                    248:
1.34.2.2! yamt      249:        mmu_update_t *xpq_queue = xpq_queue_array[curcpu()->ci_cpuid];
        !           250:        int xpq_idx = xpq_idx_array[curcpu()->ci_cpuid];
1.34.2.1  yamt      251:
1.6       bouyer    252:        KASSERT((ptr & 3) == 0);
1.2       bouyer    253:        xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
                    254:        xpq_queue[xpq_idx].val = val;
                    255:        xpq_increment_idx();
                    256: #ifdef XENDEBUG_SYNC
                    257:        xpq_flush_queue();
                    258: #endif
                    259: }
                    260:
                    261: void
                    262: xpq_queue_pt_switch(paddr_t pa)
                    263: {
                    264:        struct mmuext_op op;
                    265:        xpq_flush_queue();
                    266:
1.6       bouyer    267:        XENPRINTK2(("xpq_queue_pt_switch: 0x%" PRIx64 " 0x%" PRIx64 "\n",
                    268:            (int64_t)pa, (int64_t)pa));
1.2       bouyer    269:        op.cmd = MMUEXT_NEW_BASEPTR;
                    270:        op.arg1.mfn = pa >> PAGE_SHIFT;
                    271:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    272:                panic("xpq_queue_pt_switch");
                    273: }
                    274:
                    275: void
1.24      jym       276: xpq_queue_pin_table(paddr_t pa, int lvl)
1.2       bouyer    277: {
                    278:        struct mmuext_op op;
1.29      cherry    279:
1.2       bouyer    280:        xpq_flush_queue();
                    281:
1.24      jym       282:        XENPRINTK2(("xpq_queue_pin_l%d_table: %#" PRIxPADDR "\n",
                    283:            lvl + 1, pa));
1.2       bouyer    284:
1.6       bouyer    285:        op.arg1.mfn = pa >> PAGE_SHIFT;
1.24      jym       286:        op.cmd = lvl;
1.6       bouyer    287:
                    288:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    289:                panic("xpq_queue_pin_table");
                    290: }
                    291:
1.2       bouyer    292: void
                    293: xpq_queue_unpin_table(paddr_t pa)
                    294: {
                    295:        struct mmuext_op op;
1.29      cherry    296:
1.2       bouyer    297:        xpq_flush_queue();
                    298:
1.24      jym       299:        XENPRINTK2(("xpq_queue_unpin_table: %#" PRIxPADDR "\n", pa));
1.2       bouyer    300:        op.arg1.mfn = pa >> PAGE_SHIFT;
                    301:        op.cmd = MMUEXT_UNPIN_TABLE;
                    302:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    303:                panic("xpq_queue_unpin_table");
                    304: }
                    305:
                    306: void
                    307: xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
                    308: {
                    309:        struct mmuext_op op;
1.29      cherry    310:
1.2       bouyer    311:        xpq_flush_queue();
                    312:
                    313:        XENPRINTK2(("xpq_queue_set_ldt\n"));
                    314:        KASSERT(va == (va & ~PAGE_MASK));
                    315:        op.cmd = MMUEXT_SET_LDT;
                    316:        op.arg1.linear_addr = va;
                    317:        op.arg2.nr_ents = entries;
                    318:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    319:                panic("xpq_queue_set_ldt");
                    320: }
                    321:
                    322: void
1.8       cegger    323: xpq_queue_tlb_flush(void)
1.2       bouyer    324: {
                    325:        struct mmuext_op op;
1.29      cherry    326:
1.2       bouyer    327:        xpq_flush_queue();
                    328:
                    329:        XENPRINTK2(("xpq_queue_tlb_flush\n"));
                    330:        op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
                    331:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    332:                panic("xpq_queue_tlb_flush");
                    333: }
                    334:
                    335: void
1.8       cegger    336: xpq_flush_cache(void)
1.2       bouyer    337: {
                    338:        struct mmuext_op op;
1.29      cherry    339:        int s = splvm(), err;
                    340:
1.2       bouyer    341:        xpq_flush_queue();
                    342:
                    343:        XENPRINTK2(("xpq_queue_flush_cache\n"));
                    344:        op.cmd = MMUEXT_FLUSH_CACHE;
1.33      jym       345:        if ((err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) < 0) {
                    346:                panic("xpq_flush_cache, err %d", err);
                    347:        }
1.29      cherry    348:        splx(s); /* XXX: removeme */
1.2       bouyer    349: }
                    350:
                    351: void
                    352: xpq_queue_invlpg(vaddr_t va)
                    353: {
                    354:        struct mmuext_op op;
                    355:        xpq_flush_queue();
                    356:
1.19      jym       357:        XENPRINTK2(("xpq_queue_invlpg %#" PRIxVADDR "\n", va));
1.2       bouyer    358:        op.cmd = MMUEXT_INVLPG_LOCAL;
                    359:        op.arg1.linear_addr = (va & ~PAGE_MASK);
                    360:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    361:                panic("xpq_queue_invlpg");
                    362: }
                    363:
1.29      cherry    364: void
                    365: xen_mcast_invlpg(vaddr_t va, uint32_t cpumask)
                    366: {
                    367:        mmuext_op_t op;
1.34.2.2! yamt      368:        u_long xcpumask = cpumask;
1.29      cherry    369:
                    370:        /* Flush pending page updates */
                    371:        xpq_flush_queue();
                    372:
                    373:        op.cmd = MMUEXT_INVLPG_MULTI;
                    374:        op.arg1.linear_addr = va;
1.34.2.2! yamt      375:        op.arg2.vcpumask = &xcpumask;
1.29      cherry    376:
                    377:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) {
                    378:                panic("xpq_queue_invlpg_all");
                    379:        }
                    380:
                    381:        return;
                    382: }
                    383:
                    384: void
                    385: xen_bcast_invlpg(vaddr_t va)
                    386: {
                    387:        mmuext_op_t op;
                    388:
                    389:        /* Flush pending page updates */
                    390:        xpq_flush_queue();
                    391:
                    392:        op.cmd = MMUEXT_INVLPG_ALL;
                    393:        op.arg1.linear_addr = va;
                    394:
                    395:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) {
                    396:                panic("xpq_queue_invlpg_all");
                    397:        }
                    398:
                    399:        return;
                    400: }
                    401:
                    402: /* This is a synchronous call. */
                    403: void
                    404: xen_mcast_tlbflush(uint32_t cpumask)
                    405: {
                    406:        mmuext_op_t op;
1.34.2.2! yamt      407:        u_long xcpumask = cpumask;
1.29      cherry    408:
                    409:        /* Flush pending page updates */
                    410:        xpq_flush_queue();
                    411:
                    412:        op.cmd = MMUEXT_TLB_FLUSH_MULTI;
1.34.2.2! yamt      413:        op.arg2.vcpumask = &xcpumask;
1.29      cherry    414:
                    415:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) {
                    416:                panic("xpq_queue_invlpg_all");
                    417:        }
                    418:
                    419:        return;
                    420: }
                    421:
                    422: /* This is a synchronous call. */
                    423: void
                    424: xen_bcast_tlbflush(void)
                    425: {
                    426:        mmuext_op_t op;
                    427:
                    428:        /* Flush pending page updates */
                    429:        xpq_flush_queue();
                    430:
                    431:        op.cmd = MMUEXT_TLB_FLUSH_ALL;
                    432:
                    433:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) {
                    434:                panic("xpq_queue_invlpg_all");
                    435:        }
                    436:
                    437:        return;
                    438: }
                    439:
                    440: /* This is a synchronous call. */
                    441: void
                    442: xen_vcpu_mcast_invlpg(vaddr_t sva, vaddr_t eva, uint32_t cpumask)
                    443: {
                    444:        KASSERT(eva > sva);
                    445:
                    446:        /* Flush pending page updates */
                    447:        xpq_flush_queue();
                    448:
                    449:        /* Align to nearest page boundary */
                    450:        sva &= ~PAGE_MASK;
                    451:        eva &= ~PAGE_MASK;
                    452:
                    453:        for ( ; sva <= eva; sva += PAGE_SIZE) {
                    454:                xen_mcast_invlpg(sva, cpumask);
                    455:        }
                    456:
                    457:        return;
                    458: }
                    459:
                    460: /* This is a synchronous call. */
                    461: void
                    462: xen_vcpu_bcast_invlpg(vaddr_t sva, vaddr_t eva)
                    463: {
                    464:        KASSERT(eva > sva);
                    465:
                    466:        /* Flush pending page updates */
                    467:        xpq_flush_queue();
                    468:
                    469:        /* Align to nearest page boundary */
                    470:        sva &= ~PAGE_MASK;
                    471:        eva &= ~PAGE_MASK;
                    472:
                    473:        for ( ; sva <= eva; sva += PAGE_SIZE) {
                    474:                xen_bcast_invlpg(sva);
                    475:        }
                    476:
                    477:        return;
                    478: }
                    479:
1.2       bouyer    480: int
1.6       bouyer    481: xpq_update_foreign(paddr_t ptr, pt_entry_t val, int dom)
1.2       bouyer    482: {
                    483:        mmu_update_t op;
                    484:        int ok;
1.29      cherry    485:
1.2       bouyer    486:        xpq_flush_queue();
                    487:
1.6       bouyer    488:        op.ptr = ptr;
1.2       bouyer    489:        op.val = val;
                    490:        if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0)
                    491:                return EFAULT;
                    492:        return (0);
                    493: }
                    494:
                    495: #ifdef XENDEBUG
                    496: void
1.8       cegger    497: xpq_debug_dump(void)
1.2       bouyer    498: {
                    499:        int i;
                    500:
1.34.2.2! yamt      501:        mmu_update_t *xpq_queue = xpq_queue_array[curcpu()->ci_cpuid];
        !           502:        int xpq_idx = xpq_idx_array[curcpu()->ci_cpuid];
1.34.2.1  yamt      503:
1.2       bouyer    504:        XENPRINTK2(("idx: %d\n", xpq_idx));
                    505:        for (i = 0; i < xpq_idx; i++) {
1.13      cegger    506:                snprintf(XBUF, sizeof(XBUF), "%" PRIx64 " %08" PRIx64,
1.19      jym       507:                    xpq_queue[i].ptr, xpq_queue[i].val);
1.2       bouyer    508:                if (++i < xpq_idx)
1.13      cegger    509:                        snprintf(XBUF + strlen(XBUF),
                    510:                            sizeof(XBUF) - strlen(XBUF),
                    511:                            "%" PRIx64 " %08" PRIx64,
1.19      jym       512:                            xpq_queue[i].ptr, xpq_queue[i].val);
1.2       bouyer    513:                if (++i < xpq_idx)
1.13      cegger    514:                        snprintf(XBUF + strlen(XBUF),
                    515:                            sizeof(XBUF) - strlen(XBUF),
                    516:                            "%" PRIx64 " %08" PRIx64,
1.19      jym       517:                            xpq_queue[i].ptr, xpq_queue[i].val);
1.2       bouyer    518:                if (++i < xpq_idx)
1.13      cegger    519:                        snprintf(XBUF + strlen(XBUF),
                    520:                            sizeof(XBUF) - strlen(XBUF),
                    521:                            "%" PRIx64 " %08" PRIx64,
1.19      jym       522:                            xpq_queue[i].ptr, xpq_queue[i].val);
1.2       bouyer    523:                XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
                    524:        }
                    525: }
                    526: #endif
                    527:
                    528:
                    529: extern volatile struct xencons_interface *xencons_interface; /* XXX */
                    530: extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
                    531:
                    532: static void xen_bt_set_readonly (vaddr_t);
                    533: static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int);
                    534:
                    535: /* How many PDEs ? */
                    536: #if L2_SLOT_KERNBASE > 0
                    537: #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
                    538: #else
                    539: #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
                    540: #endif
                    541:
                    542: /*
                    543:  * Construct and switch to new pagetables
                    544:  * first_avail is the first vaddr we can use after
                    545:  * we get rid of Xen pagetables
                    546:  */
                    547:
                    548: vaddr_t xen_pmap_bootstrap (void);
                    549:
                    550: /*
                    551:  * Function to get rid of Xen bootstrap tables
                    552:  */
                    553:
1.6       bouyer    554: /* How many PDP do we need: */
                    555: #ifdef PAE
                    556: /*
                    557:  * For PAE, we consider a single contigous L2 "superpage" of 4 pages,
                    558:  * all of them mapped by the L3 page. We also need a shadow page
                    559:  * for L3[3].
                    560:  */
                    561: static const int l2_4_count = 6;
1.34.2.1  yamt      562: #elif defined(__x86_64__)
                    563: static const int l2_4_count = PTP_LEVELS;
1.6       bouyer    564: #else
                    565: static const int l2_4_count = PTP_LEVELS - 1;
                    566: #endif
                    567:
1.2       bouyer    568: vaddr_t
1.8       cegger    569: xen_pmap_bootstrap(void)
1.2       bouyer    570: {
1.4       bouyer    571:        int count, oldcount;
                    572:        long mapsize;
1.2       bouyer    573:        vaddr_t bootstrap_tables, init_tables;
                    574:
1.34.2.1  yamt      575:        memset(xpq_idx_array, 0, sizeof xpq_idx_array);
                    576:
1.6       bouyer    577:        xpmap_phys_to_machine_mapping =
                    578:            (unsigned long *)xen_start_info.mfn_list;
1.2       bouyer    579:        init_tables = xen_start_info.pt_base;
                    580:        __PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables));
                    581:
                    582:        /* Space after Xen boostrap tables should be free */
                    583:        bootstrap_tables = xen_start_info.pt_base +
                    584:                (xen_start_info.nr_pt_frames * PAGE_SIZE);
                    585:
1.4       bouyer    586:        /*
                    587:         * Calculate how many space we need
                    588:         * first everything mapped before the Xen bootstrap tables
                    589:         */
                    590:        mapsize = init_tables - KERNTEXTOFF;
                    591:        /* after the tables we'll have:
                    592:         *  - UAREA
                    593:         *  - dummy user PGD (x86_64)
                    594:         *  - HYPERVISOR_shared_info
1.34.2.2! yamt      595:         *  - early_zerop
1.4       bouyer    596:         *  - ISA I/O mem (if needed)
                    597:         */
                    598:        mapsize += UPAGES * NBPG;
                    599: #ifdef __x86_64__
                    600:        mapsize += NBPG;
                    601: #endif
                    602:        mapsize += NBPG;
1.34.2.2! yamt      603:        mapsize += NBPG;
1.2       bouyer    604:
                    605: #ifdef DOM0OPS
1.10      cegger    606:        if (xendomain_is_dom0()) {
1.2       bouyer    607:                /* space for ISA I/O mem */
1.4       bouyer    608:                mapsize += IOM_SIZE;
                    609:        }
                    610: #endif
                    611:        /* at this point mapsize doens't include the table size */
                    612:
                    613: #ifdef __x86_64__
                    614:        count = TABLE_L2_ENTRIES;
                    615: #else
                    616:        count = (mapsize + (NBPD_L2 -1)) >> L2_SHIFT;
                    617: #endif /* __x86_64__ */
                    618:
                    619:        /* now compute how many L2 pages we need exactly */
                    620:        XENPRINTK(("bootstrap_final mapsize 0x%lx count %d\n", mapsize, count));
                    621:        while (mapsize + (count + l2_4_count) * PAGE_SIZE + KERNTEXTOFF >
                    622:            ((long)count << L2_SHIFT) + KERNBASE) {
                    623:                count++;
1.2       bouyer    624:        }
1.4       bouyer    625: #ifndef __x86_64__
1.5       bouyer    626:        /*
                    627:         * one more L2 page: we'll alocate several pages after kva_start
                    628:         * in pmap_bootstrap() before pmap_growkernel(), which have not been
                    629:         * counted here. It's not a big issue to allocate one more L2 as
                    630:         * pmap_growkernel() will be called anyway.
                    631:         */
                    632:        count++;
1.4       bouyer    633:        nkptp[1] = count;
1.2       bouyer    634: #endif
                    635:
1.4       bouyer    636:        /*
                    637:         * install bootstrap pages. We may need more L2 pages than will
                    638:         * have the final table here, as it's installed after the final table
                    639:         */
                    640:        oldcount = count;
                    641:
                    642: bootstrap_again:
                    643:        XENPRINTK(("bootstrap_again oldcount %d\n", oldcount));
1.2       bouyer    644:        /*
                    645:         * Xen space we'll reclaim may not be enough for our new page tables,
                    646:         * move bootstrap tables if necessary
                    647:         */
1.4       bouyer    648:        if (bootstrap_tables < init_tables + ((count + l2_4_count) * PAGE_SIZE))
1.2       bouyer    649:                bootstrap_tables = init_tables +
1.4       bouyer    650:                                        ((count + l2_4_count) * PAGE_SIZE);
                    651:        /* make sure we have enough to map the bootstrap_tables */
                    652:        if (bootstrap_tables + ((oldcount + l2_4_count) * PAGE_SIZE) >
                    653:            ((long)oldcount << L2_SHIFT) + KERNBASE) {
                    654:                oldcount++;
                    655:                goto bootstrap_again;
                    656:        }
1.2       bouyer    657:
                    658:        /* Create temporary tables */
                    659:        xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables,
1.4       bouyer    660:                xen_start_info.nr_pt_frames, oldcount, 0);
1.2       bouyer    661:
                    662:        /* Create final tables */
                    663:        xen_bootstrap_tables(bootstrap_tables, init_tables,
1.4       bouyer    664:            oldcount + l2_4_count, count, 1);
1.2       bouyer    665:
1.4       bouyer    666:        /* zero out free space after tables */
                    667:        memset((void *)(init_tables + ((count + l2_4_count) * PAGE_SIZE)), 0,
                    668:            (UPAGES + 1) * NBPG);
1.28      rmind     669:
                    670:        /* Finally, flush TLB. */
                    671:        xpq_queue_tlb_flush();
                    672:
1.4       bouyer    673:        return (init_tables + ((count + l2_4_count) * PAGE_SIZE));
1.2       bouyer    674: }
                    675:
                    676: /*
                    677:  * Build a new table and switch to it
                    678:  * old_count is # of old tables (including PGD, PDTPE and PDE)
                    679:  * new_count is # of new tables (PTE only)
                    680:  * we assume areas don't overlap
                    681:  */
                    682: static void
                    683: xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
                    684:        int old_count, int new_count, int final)
                    685: {
                    686:        pd_entry_t *pdtpe, *pde, *pte;
                    687:        pd_entry_t *cur_pgd, *bt_pgd;
1.6       bouyer    688:        paddr_t addr;
                    689:        vaddr_t page, avail, text_end, map_end;
1.2       bouyer    690:        int i;
                    691:        extern char __data_start;
1.34.2.2! yamt      692:        extern char *early_zerop; /* from pmap.c */
1.2       bouyer    693:
1.19      jym       694:        __PRINTK(("xen_bootstrap_tables(%#" PRIxVADDR ", %#" PRIxVADDR ","
                    695:            " %d, %d)\n",
1.2       bouyer    696:            old_pgd, new_pgd, old_count, new_count));
                    697:        text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK;
                    698:        /*
                    699:         * size of R/W area after kernel text:
                    700:         *  xencons_interface (if present)
                    701:         *  xenstore_interface (if present)
1.6       bouyer    702:         *  table pages (new_count + l2_4_count entries)
1.2       bouyer    703:         * extra mappings (only when final is true):
1.4       bouyer    704:         *  UAREA
                    705:         *  dummy user PGD (x86_64 only)/gdt page (i386 only)
1.2       bouyer    706:         *  HYPERVISOR_shared_info
1.34.2.2! yamt      707:         *  early_zerop
1.2       bouyer    708:         *  ISA I/O mem (if needed)
                    709:         */
1.6       bouyer    710:        map_end = new_pgd + ((new_count + l2_4_count) * NBPG);
1.2       bouyer    711:        if (final) {
1.4       bouyer    712:                map_end += (UPAGES + 1) * NBPG;
                    713:                HYPERVISOR_shared_info = (shared_info_t *)map_end;
1.2       bouyer    714:                map_end += NBPG;
1.34.2.2! yamt      715:                early_zerop = (char *)map_end;
        !           716:                map_end += NBPG;
1.2       bouyer    717:        }
1.4       bouyer    718:        /*
                    719:         * we always set atdevbase, as it's used by init386 to find the first
                    720:         * available VA. map_end is updated only if we are dom0, so
                    721:         * atdevbase -> atdevbase + IOM_SIZE will be mapped only in
                    722:         * this case.
                    723:         */
                    724:        if (final)
                    725:                atdevbase = map_end;
1.2       bouyer    726: #ifdef DOM0OPS
1.10      cegger    727:        if (final && xendomain_is_dom0()) {
1.2       bouyer    728:                /* ISA I/O mem */
                    729:                map_end += IOM_SIZE;
                    730:        }
                    731: #endif /* DOM0OPS */
                    732:
                    733:        __PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
                    734:            text_end, map_end));
1.19      jym       735:        __PRINTK(("console %#lx ", xen_start_info.console_mfn));
                    736:        __PRINTK(("xenstore %#" PRIx32 "\n", xen_start_info.store_mfn));
1.2       bouyer    737:
                    738:        /*
                    739:         * Create bootstrap page tables
                    740:         * What we need:
                    741:         * - a PGD (level 4)
                    742:         * - a PDTPE (level 3)
                    743:         * - a PDE (level2)
                    744:         * - some PTEs (level 1)
                    745:         */
                    746:
                    747:        cur_pgd = (pd_entry_t *) old_pgd;
                    748:        bt_pgd = (pd_entry_t *) new_pgd;
                    749:        memset (bt_pgd, 0, PAGE_SIZE);
                    750:        avail = new_pgd + PAGE_SIZE;
1.4       bouyer    751: #if PTP_LEVELS > 3
1.34.2.1  yamt      752:        /* per-cpu L4 PD */
                    753:        pd_entry_t *bt_cpu_pgd = bt_pgd;
                    754:        /* pmap_kernel() "shadow" L4 PD */
                    755:        bt_pgd = (pd_entry_t *) avail;
                    756:        memset(bt_pgd, 0, PAGE_SIZE);
                    757:        avail += PAGE_SIZE;
                    758:
1.2       bouyer    759:        /* Install level 3 */
                    760:        pdtpe = (pd_entry_t *) avail;
                    761:        memset (pdtpe, 0, PAGE_SIZE);
                    762:        avail += PAGE_SIZE;
                    763:
1.6       bouyer    764:        addr = ((u_long) pdtpe) - KERNBASE;
1.34.2.1  yamt      765:        bt_pgd[pl4_pi(KERNTEXTOFF)] = bt_cpu_pgd[pl4_pi(KERNTEXTOFF)] =
1.4       bouyer    766:            xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
1.2       bouyer    767:
1.19      jym       768:        __PRINTK(("L3 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
                    769:            " -> L4[%#x]\n",
                    770:            pdtpe, addr, bt_pgd[pl4_pi(KERNTEXTOFF)], pl4_pi(KERNTEXTOFF)));
1.4       bouyer    771: #else
                    772:        pdtpe = bt_pgd;
                    773: #endif /* PTP_LEVELS > 3 */
1.2       bouyer    774:
1.4       bouyer    775: #if PTP_LEVELS > 2
1.2       bouyer    776:        /* Level 2 */
                    777:        pde = (pd_entry_t *) avail;
                    778:        memset(pde, 0, PAGE_SIZE);
                    779:        avail += PAGE_SIZE;
                    780:
1.6       bouyer    781:        addr = ((u_long) pde) - KERNBASE;
1.2       bouyer    782:        pdtpe[pl3_pi(KERNTEXTOFF)] =
1.6       bouyer    783:            xpmap_ptom_masked(addr) | PG_k | PG_V | PG_RW;
1.19      jym       784:        __PRINTK(("L2 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
                    785:            " -> L3[%#x]\n",
                    786:            pde, addr, pdtpe[pl3_pi(KERNTEXTOFF)], pl3_pi(KERNTEXTOFF)));
1.6       bouyer    787: #elif defined(PAE)
                    788:        /* our PAE-style level 2: 5 contigous pages (4 L2 + 1 shadow) */
                    789:        pde = (pd_entry_t *) avail;
                    790:        memset(pde, 0, PAGE_SIZE * 5);
                    791:        avail += PAGE_SIZE * 5;
                    792:        addr = ((u_long) pde) - KERNBASE;
                    793:        /*
                    794:         * enter L2 pages in the L3.
                    795:         * The real L2 kernel PD will be the last one (so that
                    796:         * pde[L2_SLOT_KERN] always point to the shadow).
                    797:         */
                    798:        for (i = 0; i < 3; i++, addr += PAGE_SIZE) {
                    799:                /*
1.25      jym       800:                 * Xen doesn't want R/W mappings in L3 entries, it'll add it
1.6       bouyer    801:                 * itself.
                    802:                 */
                    803:                pdtpe[i] = xpmap_ptom_masked(addr) | PG_k | PG_V;
1.19      jym       804:                __PRINTK(("L2 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
                    805:                    " -> L3[%#x]\n",
                    806:                    (vaddr_t)pde + PAGE_SIZE * i, addr, pdtpe[i], i));
1.6       bouyer    807:        }
                    808:        addr += PAGE_SIZE;
                    809:        pdtpe[3] = xpmap_ptom_masked(addr) | PG_k | PG_V;
1.19      jym       810:        __PRINTK(("L2 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
                    811:            " -> L3[%#x]\n",
                    812:            (vaddr_t)pde + PAGE_SIZE * 4, addr, pdtpe[3], 3));
1.6       bouyer    813:
                    814: #else /* PAE */
1.4       bouyer    815:        pde = bt_pgd;
1.6       bouyer    816: #endif /* PTP_LEVELS > 2 */
1.2       bouyer    817:
                    818:        /* Level 1 */
                    819:        page = KERNTEXTOFF;
                    820:        for (i = 0; i < new_count; i ++) {
1.6       bouyer    821:                vaddr_t cur_page = page;
1.2       bouyer    822:
                    823:                pte = (pd_entry_t *) avail;
                    824:                avail += PAGE_SIZE;
                    825:
                    826:                memset(pte, 0, PAGE_SIZE);
                    827:                while (pl2_pi(page) == pl2_pi (cur_page)) {
                    828:                        if (page >= map_end) {
                    829:                                /* not mapped at all */
                    830:                                pte[pl1_pi(page)] = 0;
                    831:                                page += PAGE_SIZE;
                    832:                                continue;
                    833:                        }
                    834:                        pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE);
                    835:                        if (page == (vaddr_t)HYPERVISOR_shared_info) {
                    836:                                pte[pl1_pi(page)] = xen_start_info.shared_info;
                    837:                                __PRINTK(("HYPERVISOR_shared_info "
1.19      jym       838:                                    "va %#lx pte %#" PRIxPADDR "\n",
                    839:                                    HYPERVISOR_shared_info, pte[pl1_pi(page)]));
1.2       bouyer    840:                        }
1.7       bouyer    841:                        if ((xpmap_ptom_masked(page - KERNBASE) >> PAGE_SHIFT)
1.12      cegger    842:                            == xen_start_info.console.domU.mfn) {
1.2       bouyer    843:                                xencons_interface = (void *)page;
1.19      jym       844:                                pte[pl1_pi(page)] = xen_start_info.console_mfn;
1.6       bouyer    845:                                pte[pl1_pi(page)] <<= PAGE_SHIFT;
1.2       bouyer    846:                                __PRINTK(("xencons_interface "
1.19      jym       847:                                    "va %#lx pte %#" PRIxPADDR "\n",
                    848:                                    xencons_interface, pte[pl1_pi(page)]));
1.2       bouyer    849:                        }
1.7       bouyer    850:                        if ((xpmap_ptom_masked(page - KERNBASE) >> PAGE_SHIFT)
                    851:                            == xen_start_info.store_mfn) {
1.2       bouyer    852:                                xenstore_interface = (void *)page;
1.6       bouyer    853:                                pte[pl1_pi(page)] = xen_start_info.store_mfn;
                    854:                                pte[pl1_pi(page)] <<= PAGE_SHIFT;
1.2       bouyer    855:                                __PRINTK(("xenstore_interface "
1.19      jym       856:                                    "va %#lx pte %#" PRIxPADDR "\n",
                    857:                                    xenstore_interface, pte[pl1_pi(page)]));
1.2       bouyer    858:                        }
                    859: #ifdef DOM0OPS
                    860:                        if (page >= (vaddr_t)atdevbase &&
                    861:                            page < (vaddr_t)atdevbase + IOM_SIZE) {
                    862:                                pte[pl1_pi(page)] =
                    863:                                    IOM_BEGIN + (page - (vaddr_t)atdevbase);
                    864:                        }
                    865: #endif
1.4       bouyer    866:                        pte[pl1_pi(page)] |= PG_k | PG_V;
1.2       bouyer    867:                        if (page < text_end) {
                    868:                                /* map kernel text RO */
                    869:                                pte[pl1_pi(page)] |= 0;
                    870:                        } else if (page >= old_pgd
                    871:                            && page < old_pgd + (old_count * PAGE_SIZE)) {
                    872:                                /* map old page tables RO */
                    873:                                pte[pl1_pi(page)] |= 0;
                    874:                        } else if (page >= new_pgd &&
1.6       bouyer    875:                            page < new_pgd + ((new_count + l2_4_count) * PAGE_SIZE)) {
1.2       bouyer    876:                                /* map new page tables RO */
                    877:                                pte[pl1_pi(page)] |= 0;
1.34.2.2! yamt      878: #ifdef i386
        !           879:                        } else if (page == (vaddr_t)tmpgdt) {
        !           880:                                /*
        !           881:                                 * Map bootstrap gdt R/O. Later, we
        !           882:                                 * will re-add this to page to uvm
        !           883:                                 * after making it writable.
        !           884:                                 */
        !           885:
        !           886:                                pte[pl1_pi(page)] = 0;
        !           887:                                page += PAGE_SIZE;
        !           888:                                continue;
        !           889: #endif /* i386 */
1.2       bouyer    890:                        } else {
                    891:                                /* map page RW */
                    892:                                pte[pl1_pi(page)] |= PG_RW;
                    893:                        }
1.6       bouyer    894:
1.9       tron      895:                        if ((page  >= old_pgd && page < old_pgd + (old_count * PAGE_SIZE))
                    896:                            || page >= new_pgd) {
1.19      jym       897:                                __PRINTK(("va %#lx pa %#lx "
                    898:                                    "entry 0x%" PRIxPADDR " -> L1[%#x]\n",
1.2       bouyer    899:                                    page, page - KERNBASE,
1.19      jym       900:                                    pte[pl1_pi(page)], pl1_pi(page)));
1.9       tron      901:                        }
1.2       bouyer    902:                        page += PAGE_SIZE;
                    903:                }
                    904:
1.6       bouyer    905:                addr = ((u_long) pte) - KERNBASE;
1.2       bouyer    906:                pde[pl2_pi(cur_page)] =
1.4       bouyer    907:                    xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
1.19      jym       908:                __PRINTK(("L1 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
                    909:                    " -> L2[%#x]\n",
                    910:                    pte, addr, pde[pl2_pi(cur_page)], pl2_pi(cur_page)));
1.2       bouyer    911:                /* Mark readonly */
                    912:                xen_bt_set_readonly((vaddr_t) pte);
                    913:        }
                    914:
                    915:        /* Install recursive page tables mapping */
1.6       bouyer    916: #ifdef PAE
                    917:        /*
                    918:         * we need a shadow page for the kernel's L2 page
                    919:         * The real L2 kernel PD will be the last one (so that
                    920:         * pde[L2_SLOT_KERN] always point to the shadow.
                    921:         */
                    922:        memcpy(&pde[L2_SLOT_KERN + NPDPG], &pde[L2_SLOT_KERN], PAGE_SIZE);
1.34.2.1  yamt      923:        cpu_info_primary.ci_kpm_pdir = &pde[L2_SLOT_KERN + NPDPG];
                    924:        cpu_info_primary.ci_kpm_pdirpa =
                    925:            (vaddr_t) cpu_info_primary.ci_kpm_pdir - KERNBASE;
1.6       bouyer    926:
                    927:        /*
                    928:         * We don't enter a recursive entry from the L3 PD. Instead,
                    929:         * we enter the first 4 L2 pages, which includes the kernel's L2
                    930:         * shadow. But we have to entrer the shadow after switching
                    931:         * %cr3, or Xen will refcount some PTE with the wrong type.
                    932:         */
                    933:        addr = (u_long)pde - KERNBASE;
                    934:        for (i = 0; i < 3; i++, addr += PAGE_SIZE) {
                    935:                pde[PDIR_SLOT_PTE + i] = xpmap_ptom_masked(addr) | PG_k | PG_V;
1.19      jym       936:                __PRINTK(("pde[%d] va %#" PRIxVADDR " pa %#" PRIxPADDR
                    937:                    " entry %#" PRIxPADDR "\n",
                    938:                    (int)(PDIR_SLOT_PTE + i), pde + PAGE_SIZE * i,
                    939:                    addr, pde[PDIR_SLOT_PTE + i]));
1.6       bouyer    940:        }
                    941: #if 0
                    942:        addr += PAGE_SIZE; /* point to shadow L2 */
                    943:        pde[PDIR_SLOT_PTE + 3] = xpmap_ptom_masked(addr) | PG_k | PG_V;
                    944:        __PRINTK(("pde[%d] va 0x%lx pa 0x%lx entry 0x%" PRIx64 "\n",
                    945:            (int)(PDIR_SLOT_PTE + 3), pde + PAGE_SIZE * 4, (long)addr,
                    946:            (int64_t)pde[PDIR_SLOT_PTE + 3]));
                    947: #endif
1.14      jym       948:        /* Mark tables RO, and pin the kernel's shadow as L2 */
1.6       bouyer    949:        addr = (u_long)pde - KERNBASE;
                    950:        for (i = 0; i < 5; i++, addr += PAGE_SIZE) {
                    951:                xen_bt_set_readonly(((vaddr_t)pde) + PAGE_SIZE * i);
                    952:                if (i == 2 || i == 3)
                    953:                        continue;
                    954: #if 0
                    955:                __PRINTK(("pin L2 %d addr 0x%" PRIx64 "\n", i, (int64_t)addr));
1.24      jym       956:                xpq_queue_pin_l2_table(xpmap_ptom_masked(addr));
1.6       bouyer    957: #endif
                    958:        }
                    959:        if (final) {
                    960:                addr = (u_long)pde - KERNBASE + 3 * PAGE_SIZE;
1.19      jym       961:                __PRINTK(("pin L2 %d addr %#" PRIxPADDR "\n", 2, addr));
1.24      jym       962:                xpq_queue_pin_l2_table(xpmap_ptom_masked(addr));
1.6       bouyer    963:        }
                    964: #if 0
                    965:        addr = (u_long)pde - KERNBASE + 2 * PAGE_SIZE;
                    966:        __PRINTK(("pin L2 %d addr 0x%" PRIx64 "\n", 2, (int64_t)addr));
1.24      jym       967:        xpq_queue_pin_l2_table(xpmap_ptom_masked(addr));
1.6       bouyer    968: #endif
                    969: #else /* PAE */
1.34.2.1  yamt      970:        /* recursive entry in higher-level per-cpu PD and pmap_kernel() */
                    971:        bt_pgd[PDIR_SLOT_PTE] = xpmap_ptom_masked((paddr_t)bt_pgd - KERNBASE) | PG_k | PG_V;
                    972: #ifdef __x86_64__
                    973:           bt_cpu_pgd[PDIR_SLOT_PTE] =
                    974:                   xpmap_ptom_masked((paddr_t)bt_cpu_pgd - KERNBASE) | PG_k | PG_V;
                    975: #endif /* __x86_64__ */
1.19      jym       976:        __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va %#" PRIxVADDR " pa %#" PRIxPADDR
                    977:            " entry %#" PRIxPADDR "\n", new_pgd, (paddr_t)new_pgd - KERNBASE,
                    978:            bt_pgd[PDIR_SLOT_PTE]));
1.2       bouyer    979:        /* Mark tables RO */
                    980:        xen_bt_set_readonly((vaddr_t) pde);
1.6       bouyer    981: #endif
                    982: #if PTP_LEVELS > 2 || defined(PAE)
1.2       bouyer    983:        xen_bt_set_readonly((vaddr_t) pdtpe);
1.4       bouyer    984: #endif
                    985: #if PTP_LEVELS > 3
1.2       bouyer    986:        xen_bt_set_readonly(new_pgd);
1.4       bouyer    987: #endif
1.2       bouyer    988:        /* Pin the PGD */
1.26      jym       989:        __PRINTK(("pin PGD: %"PRIxVADDR"\n", new_pgd - KERNBASE));
1.24      jym       990: #ifdef __x86_64__
                    991:        xpq_queue_pin_l4_table(xpmap_ptom_masked(new_pgd - KERNBASE));
                    992: #elif PAE
1.6       bouyer    993:        xpq_queue_pin_l3_table(xpmap_ptom_masked(new_pgd - KERNBASE));
                    994: #else
1.24      jym       995:        xpq_queue_pin_l2_table(xpmap_ptom_masked(new_pgd - KERNBASE));
1.6       bouyer    996: #endif
1.21      jym       997:
1.4       bouyer    998:        /* Save phys. addr of PDP, for libkvm. */
1.6       bouyer    999: #ifdef PAE
1.21      jym      1000:        PDPpaddr = (u_long)pde - KERNBASE; /* PDP is the L2 with PAE */
                   1001: #else
1.34.2.1  yamt     1002:        PDPpaddr = (u_long)bt_pgd - KERNBASE;
1.21      jym      1003: #endif
                   1004:
1.2       bouyer   1005:        /* Switch to new tables */
1.14      jym      1006:        __PRINTK(("switch to PGD\n"));
1.2       bouyer   1007:        xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
1.19      jym      1008:        __PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry %#" PRIxPADDR "\n",
                   1009:            bt_pgd[PDIR_SLOT_PTE]));
1.21      jym      1010:
1.6       bouyer   1011: #ifdef PAE
                   1012:        if (final) {
1.21      jym      1013:                /* save the address of the L3 page */
                   1014:                cpu_info_primary.ci_pae_l3_pdir = pdtpe;
                   1015:                cpu_info_primary.ci_pae_l3_pdirpa = (new_pgd - KERNBASE);
                   1016:
1.6       bouyer   1017:                /* now enter kernel's PTE mappings */
                   1018:                addr =  (u_long)pde - KERNBASE + PAGE_SIZE * 3;
                   1019:                xpq_queue_pte_update(
                   1020:                    xpmap_ptom(((vaddr_t)&pde[PDIR_SLOT_PTE + 3]) - KERNBASE),
                   1021:                    xpmap_ptom_masked(addr) | PG_k | PG_V);
                   1022:                xpq_flush_queue();
                   1023:        }
1.34.2.1  yamt     1024: #elif defined(__x86_64__)
                   1025:        if (final) {
                   1026:                /* save the address of the real per-cpu L4 pgd page */
                   1027:                cpu_info_primary.ci_kpm_pdir = bt_cpu_pgd;
                   1028:                cpu_info_primary.ci_kpm_pdirpa = ((paddr_t) bt_cpu_pgd - KERNBASE);
                   1029:        }
1.6       bouyer   1030: #endif
                   1031:
1.2       bouyer   1032:        /* Now we can safely reclaim space taken by old tables */
                   1033:
1.14      jym      1034:        __PRINTK(("unpin old PGD\n"));
1.2       bouyer   1035:        /* Unpin old PGD */
                   1036:        xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE));
                   1037:        /* Mark old tables RW */
                   1038:        page = old_pgd;
                   1039:        addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME;
                   1040:        addr = xpmap_mtop(addr);
1.6       bouyer   1041:        pte = (pd_entry_t *) ((u_long)addr + KERNBASE);
1.2       bouyer   1042:        pte += pl1_pi(page);
1.19      jym      1043:        __PRINTK(("*pde %#" PRIxPADDR " addr %#" PRIxPADDR " pte %#lx\n",
                   1044:            pde[pl2_pi(page)], addr, (long)pte));
1.2       bouyer   1045:        while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) {
1.6       bouyer   1046:                addr = xpmap_ptom(((u_long) pte) - KERNBASE);
1.19      jym      1047:                XENPRINTK(("addr %#" PRIxPADDR " pte %#lx "
                   1048:                   "*pte %#" PRIxPADDR "\n",
                   1049:                   addr, (long)pte, *pte));
1.6       bouyer   1050:                xpq_queue_pte_update(addr, *pte | PG_RW);
1.2       bouyer   1051:                page += PAGE_SIZE;
                   1052:                /*
                   1053:                 * Our ptes are contiguous
                   1054:                 * so it's safe to just "++" here
                   1055:                 */
                   1056:                pte++;
                   1057:        }
                   1058:        xpq_flush_queue();
                   1059: }
                   1060:
                   1061:
                   1062: /*
                   1063:  * Bootstrap helper functions
                   1064:  */
                   1065:
                   1066: /*
                   1067:  * Mark a page readonly
                   1068:  * XXX: assuming vaddr = paddr + KERNBASE
                   1069:  */
                   1070:
                   1071: static void
                   1072: xen_bt_set_readonly (vaddr_t page)
                   1073: {
                   1074:        pt_entry_t entry;
                   1075:
                   1076:        entry = xpmap_ptom_masked(page - KERNBASE);
1.4       bouyer   1077:        entry |= PG_k | PG_V;
1.2       bouyer   1078:
                   1079:        HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG);
                   1080: }
1.4       bouyer   1081:
                   1082: #ifdef __x86_64__
                   1083: void
                   1084: xen_set_user_pgd(paddr_t page)
                   1085: {
                   1086:        struct mmuext_op op;
                   1087:        int s = splvm();
                   1088:
                   1089:        xpq_flush_queue();
                   1090:        op.cmd = MMUEXT_NEW_USER_BASEPTR;
1.34      jym      1091:        op.arg1.mfn = pfn_to_mfn(page >> PAGE_SHIFT);
1.4       bouyer   1092:         if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                   1093:                panic("xen_set_user_pgd: failed to install new user page"
1.19      jym      1094:                        " directory %#" PRIxPADDR, page);
1.4       bouyer   1095:        splx(s);
                   1096: }
                   1097: #endif /* __x86_64__ */

CVSweb <webmaster@jp.NetBSD.org>