[BACK]Return to x86_xpmap.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / arch / xen / x86

Annotation of src/sys/arch/xen/x86/x86_xpmap.c, Revision 1.52.2.1

1.52.2.1! tls         1: /*     $NetBSD: x86_xpmap.c,v 1.53 2014/05/06 04:26:24 cherry Exp $    */
1.2       bouyer      2:
                      3: /*
                      4:  * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
                      5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
                      7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
                      9:  *
                     10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     17:  */
                     18:
                     19: /*
                     20:  * Copyright (c) 2006, 2007 Manuel Bouyer.
                     21:  *
                     22:  * Redistribution and use in source and binary forms, with or without
                     23:  * modification, are permitted provided that the following conditions
                     24:  * are met:
                     25:  * 1. Redistributions of source code must retain the above copyright
                     26:  *    notice, this list of conditions and the following disclaimer.
                     27:  * 2. Redistributions in binary form must reproduce the above copyright
                     28:  *    notice, this list of conditions and the following disclaimer in the
                     29:  *    documentation and/or other materials provided with the distribution.
                     30:  *
                     31:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
                     32:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     33:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     34:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
                     35:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
                     36:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     37:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     38:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     39:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     40:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     41:  *
                     42:  */
                     43:
                     44: /*
                     45:  *
                     46:  * Copyright (c) 2004 Christian Limpach.
                     47:  * All rights reserved.
                     48:  *
                     49:  * Redistribution and use in source and binary forms, with or without
                     50:  * modification, are permitted provided that the following conditions
                     51:  * are met:
                     52:  * 1. Redistributions of source code must retain the above copyright
                     53:  *    notice, this list of conditions and the following disclaimer.
                     54:  * 2. Redistributions in binary form must reproduce the above copyright
                     55:  *    notice, this list of conditions and the following disclaimer in the
                     56:  *    documentation and/or other materials provided with the distribution.
                     57:  *
                     58:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
                     59:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     60:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     61:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
                     62:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
                     63:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     64:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     65:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     66:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     67:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     68:  */
                     69:
                     70:
                     71: #include <sys/cdefs.h>
1.52.2.1! tls        72: __KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.53 2014/05/06 04:26:24 cherry Exp $");
1.2       bouyer     73:
                     74: #include "opt_xen.h"
1.4       bouyer     75: #include "opt_ddb.h"
                     76: #include "ksyms.h"
1.2       bouyer     77:
                     78: #include <sys/param.h>
                     79: #include <sys/systm.h>
1.38      cherry     80: #include <sys/mutex.h>
1.42      bouyer     81: #include <sys/cpu.h>
1.2       bouyer     82:
                     83: #include <uvm/uvm.h>
                     84:
1.42      bouyer     85: #include <x86/pmap.h>
1.2       bouyer     86: #include <machine/gdt.h>
                     87: #include <xen/xenfunc.h>
                     88:
                     89: #include <dev/isa/isareg.h>
                     90: #include <machine/isa_machdep.h>
                     91:
                     92: #undef XENDEBUG
                     93: /* #define XENDEBUG_SYNC */
                     94: /* #define     XENDEBUG_LOW */
                     95:
                     96: #ifdef XENDEBUG
                     97: #define        XENPRINTF(x) printf x
                     98: #define        XENPRINTK(x) printk x
                     99: #define        XENPRINTK2(x) /* printk x */
                    100:
                    101: static char XBUF[256];
                    102: #else
                    103: #define        XENPRINTF(x)
                    104: #define        XENPRINTK(x)
                    105: #define        XENPRINTK2(x)
                    106: #endif
                    107: #define        PRINTF(x) printf x
                    108: #define        PRINTK(x) printk x
                    109:
                    110: volatile shared_info_t *HYPERVISOR_shared_info;
1.11      jym       111: /* Xen requires the start_info struct to be page aligned */
                    112: union start_info_union start_info_union __aligned(PAGE_SIZE);
1.6       bouyer    113: unsigned long *xpmap_phys_to_machine_mapping;
1.37      cherry    114: kmutex_t pte_lock;
1.2       bouyer    115:
                    116: void xen_failsafe_handler(void);
                    117:
                    118: #define HYPERVISOR_mmu_update_self(req, count, success_count) \
                    119:        HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
                    120:
1.48      bouyer    121: /*
                    122:  * kcpuset internally uses an array of uint32_t while xen uses an array of
                    123:  * u_long. As we're little-endian we can cast one to the other.
                    124:  */
                    125: typedef union {
                    126: #ifdef _LP64
                    127:        uint32_t xcpum_km[2];
                    128: #else
                    129:        uint32_t xcpum_km[1];
                    130: #endif
                    131:        u_long   xcpum_xm;
                    132: } xcpumask_t;
                    133:
1.2       bouyer    134: void
                    135: xen_failsafe_handler(void)
                    136: {
                    137:
                    138:        panic("xen_failsafe_handler called!\n");
                    139: }
                    140:
                    141:
                    142: void
                    143: xen_set_ldt(vaddr_t base, uint32_t entries)
                    144: {
                    145:        vaddr_t va;
                    146:        vaddr_t end;
1.4       bouyer    147:        pt_entry_t *ptp;
1.2       bouyer    148:        int s;
                    149:
                    150: #ifdef __x86_64__
                    151:        end = base + (entries << 3);
                    152: #else
                    153:        end = base + entries * sizeof(union descriptor);
                    154: #endif
                    155:
                    156:        for (va = base; va < end; va += PAGE_SIZE) {
                    157:                KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
                    158:                ptp = kvtopte(va);
1.19      jym       159:                XENPRINTF(("xen_set_ldt %#" PRIxVADDR " %d %p\n",
                    160:                    base, entries, ptp));
1.4       bouyer    161:                pmap_pte_clearbits(ptp, PG_RW);
1.2       bouyer    162:        }
                    163:        s = splvm();
                    164:        xpq_queue_set_ldt(base, entries);
                    165:        splx(s);
                    166: }
                    167:
                    168: #ifdef XENDEBUG
                    169: void xpq_debug_dump(void);
                    170: #endif
                    171:
                    172: #define XPQUEUE_SIZE 2048
1.35      cherry    173: static mmu_update_t xpq_queue_array[MAXCPUS][XPQUEUE_SIZE];
                    174: static int xpq_idx_array[MAXCPUS];
1.30      cherry    175:
1.41      cherry    176: #ifdef i386
                    177: extern union descriptor tmpgdt[];
                    178: #endif /* i386 */
1.2       bouyer    179: void
1.35      cherry    180: xpq_flush_queue(void)
1.30      cherry    181: {
1.35      cherry    182:        int i, ok = 0, ret;
1.30      cherry    183:
1.41      cherry    184:        mmu_update_t *xpq_queue = xpq_queue_array[curcpu()->ci_cpuid];
                    185:        int xpq_idx = xpq_idx_array[curcpu()->ci_cpuid];
1.2       bouyer    186:
                    187:        XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
                    188:        for (i = 0; i < xpq_idx; i++)
1.19      jym       189:                XENPRINTK2(("%d: 0x%08" PRIx64 " 0x%08" PRIx64 "\n", i,
                    190:                    xpq_queue[i].ptr, xpq_queue[i].val));
1.23      jym       191:
1.35      cherry    192: retry:
1.23      jym       193:        ret = HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok);
                    194:
                    195:        if (xpq_idx != 0 && ret < 0) {
1.39      bouyer    196:                struct cpu_info *ci;
                    197:                CPU_INFO_ITERATOR cii;
                    198:
                    199:                printf("xpq_flush_queue: %d entries (%d successful) on "
                    200:                    "cpu%d (%ld)\n",
1.41      cherry    201:                    xpq_idx, ok, curcpu()->ci_index, curcpu()->ci_cpuid);
1.35      cherry    202:
                    203:                if (ok != 0) {
                    204:                        xpq_queue += ok;
                    205:                        xpq_idx -= ok;
                    206:                        ok = 0;
                    207:                        goto retry;
                    208:                }
                    209:
1.39      bouyer    210:                for (CPU_INFO_FOREACH(cii, ci)) {
                    211:                        xpq_queue = xpq_queue_array[ci->ci_cpuid];
                    212:                        xpq_idx = xpq_idx_array[ci->ci_cpuid];
                    213:                        printf("cpu%d (%ld):\n", ci->ci_index, ci->ci_cpuid);
                    214:                        for (i = 0; i < xpq_idx; i++) {
                    215:                                printf("  0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
                    216:                                   xpq_queue[i].ptr, xpq_queue[i].val);
                    217:                        }
                    218: #ifdef __x86_64__
                    219:                        for (i = 0; i < PDIR_SLOT_PTE; i++) {
                    220:                                if (ci->ci_kpm_pdir[i] == 0)
                    221:                                        continue;
                    222:                                printf(" kpm_pdir[%d]: 0x%" PRIx64 "\n",
                    223:                                    i, ci->ci_kpm_pdir[i]);
                    224:                        }
                    225: #endif
                    226:                }
1.23      jym       227:                panic("HYPERVISOR_mmu_update failed, ret: %d\n", ret);
1.2       bouyer    228:        }
1.41      cherry    229:        xpq_idx_array[curcpu()->ci_cpuid] = 0;
1.2       bouyer    230: }
                    231:
                    232: static inline void
                    233: xpq_increment_idx(void)
                    234: {
                    235:
1.41      cherry    236:        if (__predict_false(++xpq_idx_array[curcpu()->ci_cpuid] == XPQUEUE_SIZE))
1.2       bouyer    237:                xpq_flush_queue();
                    238: }
                    239:
                    240: void
                    241: xpq_queue_machphys_update(paddr_t ma, paddr_t pa)
                    242: {
1.35      cherry    243:
1.41      cherry    244:        mmu_update_t *xpq_queue = xpq_queue_array[curcpu()->ci_cpuid];
                    245:        int xpq_idx = xpq_idx_array[curcpu()->ci_cpuid];
1.35      cherry    246:
1.6       bouyer    247:        XENPRINTK2(("xpq_queue_machphys_update ma=0x%" PRIx64 " pa=0x%" PRIx64
                    248:            "\n", (int64_t)ma, (int64_t)pa));
1.35      cherry    249:
1.2       bouyer    250:        xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE;
1.45      jym       251:        xpq_queue[xpq_idx].val = pa >> PAGE_SHIFT;
1.2       bouyer    252:        xpq_increment_idx();
                    253: #ifdef XENDEBUG_SYNC
                    254:        xpq_flush_queue();
                    255: #endif
                    256: }
                    257:
                    258: void
1.6       bouyer    259: xpq_queue_pte_update(paddr_t ptr, pt_entry_t val)
1.2       bouyer    260: {
                    261:
1.41      cherry    262:        mmu_update_t *xpq_queue = xpq_queue_array[curcpu()->ci_cpuid];
                    263:        int xpq_idx = xpq_idx_array[curcpu()->ci_cpuid];
1.35      cherry    264:
1.6       bouyer    265:        KASSERT((ptr & 3) == 0);
1.2       bouyer    266:        xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
                    267:        xpq_queue[xpq_idx].val = val;
                    268:        xpq_increment_idx();
                    269: #ifdef XENDEBUG_SYNC
                    270:        xpq_flush_queue();
                    271: #endif
                    272: }
                    273:
                    274: void
                    275: xpq_queue_pt_switch(paddr_t pa)
                    276: {
                    277:        struct mmuext_op op;
                    278:        xpq_flush_queue();
                    279:
1.6       bouyer    280:        XENPRINTK2(("xpq_queue_pt_switch: 0x%" PRIx64 " 0x%" PRIx64 "\n",
                    281:            (int64_t)pa, (int64_t)pa));
1.2       bouyer    282:        op.cmd = MMUEXT_NEW_BASEPTR;
                    283:        op.arg1.mfn = pa >> PAGE_SHIFT;
                    284:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    285:                panic("xpq_queue_pt_switch");
                    286: }
                    287:
                    288: void
1.24      jym       289: xpq_queue_pin_table(paddr_t pa, int lvl)
1.2       bouyer    290: {
                    291:        struct mmuext_op op;
1.29      cherry    292:
1.2       bouyer    293:        xpq_flush_queue();
                    294:
1.24      jym       295:        XENPRINTK2(("xpq_queue_pin_l%d_table: %#" PRIxPADDR "\n",
                    296:            lvl + 1, pa));
1.2       bouyer    297:
1.6       bouyer    298:        op.arg1.mfn = pa >> PAGE_SHIFT;
1.24      jym       299:        op.cmd = lvl;
1.6       bouyer    300:
                    301:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    302:                panic("xpq_queue_pin_table");
                    303: }
                    304:
1.2       bouyer    305: void
                    306: xpq_queue_unpin_table(paddr_t pa)
                    307: {
                    308:        struct mmuext_op op;
1.29      cherry    309:
1.2       bouyer    310:        xpq_flush_queue();
                    311:
1.24      jym       312:        XENPRINTK2(("xpq_queue_unpin_table: %#" PRIxPADDR "\n", pa));
1.2       bouyer    313:        op.arg1.mfn = pa >> PAGE_SHIFT;
                    314:        op.cmd = MMUEXT_UNPIN_TABLE;
                    315:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    316:                panic("xpq_queue_unpin_table");
                    317: }
                    318:
                    319: void
                    320: xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
                    321: {
                    322:        struct mmuext_op op;
1.29      cherry    323:
1.2       bouyer    324:        xpq_flush_queue();
                    325:
                    326:        XENPRINTK2(("xpq_queue_set_ldt\n"));
                    327:        KASSERT(va == (va & ~PAGE_MASK));
                    328:        op.cmd = MMUEXT_SET_LDT;
                    329:        op.arg1.linear_addr = va;
                    330:        op.arg2.nr_ents = entries;
                    331:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    332:                panic("xpq_queue_set_ldt");
                    333: }
                    334:
                    335: void
1.8       cegger    336: xpq_queue_tlb_flush(void)
1.2       bouyer    337: {
                    338:        struct mmuext_op op;
1.29      cherry    339:
1.2       bouyer    340:        xpq_flush_queue();
                    341:
                    342:        XENPRINTK2(("xpq_queue_tlb_flush\n"));
                    343:        op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
                    344:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    345:                panic("xpq_queue_tlb_flush");
                    346: }
                    347:
                    348: void
1.8       cegger    349: xpq_flush_cache(void)
1.2       bouyer    350: {
1.52      jnemeth   351:        int s = splvm();
1.29      cherry    352:
1.2       bouyer    353:        xpq_flush_queue();
                    354:
                    355:        XENPRINTK2(("xpq_queue_flush_cache\n"));
1.52      jnemeth   356:        asm("wbinvd":::"memory");
1.29      cherry    357:        splx(s); /* XXX: removeme */
1.2       bouyer    358: }
                    359:
                    360: void
                    361: xpq_queue_invlpg(vaddr_t va)
                    362: {
                    363:        struct mmuext_op op;
                    364:        xpq_flush_queue();
                    365:
1.19      jym       366:        XENPRINTK2(("xpq_queue_invlpg %#" PRIxVADDR "\n", va));
1.2       bouyer    367:        op.cmd = MMUEXT_INVLPG_LOCAL;
                    368:        op.arg1.linear_addr = (va & ~PAGE_MASK);
                    369:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                    370:                panic("xpq_queue_invlpg");
                    371: }
                    372:
1.29      cherry    373: void
1.43      rmind     374: xen_mcast_invlpg(vaddr_t va, kcpuset_t *kc)
1.29      cherry    375: {
1.48      bouyer    376:        xcpumask_t xcpumask;
1.29      cherry    377:        mmuext_op_t op;
                    378:
1.49      rmind     379:        kcpuset_export_u32(kc, &xcpumask.xcpum_km[0], sizeof(xcpumask));
1.44      rmind     380:
1.29      cherry    381:        /* Flush pending page updates */
                    382:        xpq_flush_queue();
                    383:
                    384:        op.cmd = MMUEXT_INVLPG_MULTI;
                    385:        op.arg1.linear_addr = va;
1.48      bouyer    386:        op.arg2.vcpumask = &xcpumask.xcpum_xm;
1.29      cherry    387:
                    388:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) {
                    389:                panic("xpq_queue_invlpg_all");
                    390:        }
                    391:
                    392:        return;
                    393: }
                    394:
                    395: void
                    396: xen_bcast_invlpg(vaddr_t va)
                    397: {
                    398:        mmuext_op_t op;
                    399:
                    400:        /* Flush pending page updates */
                    401:        xpq_flush_queue();
                    402:
                    403:        op.cmd = MMUEXT_INVLPG_ALL;
                    404:        op.arg1.linear_addr = va;
                    405:
                    406:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) {
                    407:                panic("xpq_queue_invlpg_all");
                    408:        }
                    409:
                    410:        return;
                    411: }
                    412:
                    413: /* This is a synchronous call. */
                    414: void
1.43      rmind     415: xen_mcast_tlbflush(kcpuset_t *kc)
1.29      cherry    416: {
1.48      bouyer    417:        xcpumask_t xcpumask;
1.29      cherry    418:        mmuext_op_t op;
                    419:
1.49      rmind     420:        kcpuset_export_u32(kc, &xcpumask.xcpum_km[0], sizeof(xcpumask));
1.44      rmind     421:
1.29      cherry    422:        /* Flush pending page updates */
                    423:        xpq_flush_queue();
                    424:
                    425:        op.cmd = MMUEXT_TLB_FLUSH_MULTI;
1.48      bouyer    426:        op.arg2.vcpumask = &xcpumask.xcpum_xm;
1.29      cherry    427:
                    428:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) {
                    429:                panic("xpq_queue_invlpg_all");
                    430:        }
                    431:
                    432:        return;
                    433: }
                    434:
                    435: /* This is a synchronous call. */
                    436: void
                    437: xen_bcast_tlbflush(void)
                    438: {
                    439:        mmuext_op_t op;
                    440:
                    441:        /* Flush pending page updates */
                    442:        xpq_flush_queue();
                    443:
                    444:        op.cmd = MMUEXT_TLB_FLUSH_ALL;
                    445:
                    446:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) {
                    447:                panic("xpq_queue_invlpg_all");
                    448:        }
                    449:
                    450:        return;
                    451: }
                    452:
                    453: /* This is a synchronous call. */
                    454: void
1.43      rmind     455: xen_vcpu_mcast_invlpg(vaddr_t sva, vaddr_t eva, kcpuset_t *kc)
1.29      cherry    456: {
                    457:        KASSERT(eva > sva);
                    458:
                    459:        /* Flush pending page updates */
                    460:        xpq_flush_queue();
                    461:
                    462:        /* Align to nearest page boundary */
                    463:        sva &= ~PAGE_MASK;
                    464:        eva &= ~PAGE_MASK;
                    465:
                    466:        for ( ; sva <= eva; sva += PAGE_SIZE) {
1.43      rmind     467:                xen_mcast_invlpg(sva, kc);
1.29      cherry    468:        }
                    469:
                    470:        return;
                    471: }
                    472:
                    473: /* This is a synchronous call. */
                    474: void
                    475: xen_vcpu_bcast_invlpg(vaddr_t sva, vaddr_t eva)
                    476: {
                    477:        KASSERT(eva > sva);
                    478:
                    479:        /* Flush pending page updates */
                    480:        xpq_flush_queue();
                    481:
                    482:        /* Align to nearest page boundary */
                    483:        sva &= ~PAGE_MASK;
                    484:        eva &= ~PAGE_MASK;
                    485:
                    486:        for ( ; sva <= eva; sva += PAGE_SIZE) {
                    487:                xen_bcast_invlpg(sva);
                    488:        }
                    489:
                    490:        return;
                    491: }
                    492:
1.52.2.1! tls       493: /* Copy a page */
        !           494: void
        !           495: xen_copy_page(paddr_t srcpa, paddr_t dstpa)
        !           496: {
        !           497:        mmuext_op_t op;
        !           498:
        !           499:        op.cmd = MMUEXT_COPY_PAGE;
        !           500:        op.arg1.mfn = xpmap_ptom(dstpa) >> PAGE_SHIFT;
        !           501:        op.arg2.src_mfn = xpmap_ptom(srcpa) >> PAGE_SHIFT;
        !           502:
        !           503:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) {
        !           504:                panic(__func__);
        !           505:        }
        !           506: }
        !           507:
        !           508: /* Zero a physical page */
        !           509: void
        !           510: xen_pagezero(paddr_t pa)
        !           511: {
        !           512:        mmuext_op_t op;
        !           513:
        !           514:        op.cmd = MMUEXT_CLEAR_PAGE;
        !           515:        op.arg1.mfn = xpmap_ptom(pa) >> PAGE_SHIFT;
        !           516:
        !           517:        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) {
        !           518:                panic(__func__);
        !           519:        }
        !           520: }
        !           521:
1.2       bouyer    522: int
1.6       bouyer    523: xpq_update_foreign(paddr_t ptr, pt_entry_t val, int dom)
1.2       bouyer    524: {
                    525:        mmu_update_t op;
                    526:        int ok;
1.29      cherry    527:
1.2       bouyer    528:        xpq_flush_queue();
                    529:
1.6       bouyer    530:        op.ptr = ptr;
1.2       bouyer    531:        op.val = val;
                    532:        if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0)
                    533:                return EFAULT;
                    534:        return (0);
                    535: }
                    536:
                    537: #ifdef XENDEBUG
                    538: void
1.8       cegger    539: xpq_debug_dump(void)
1.2       bouyer    540: {
                    541:        int i;
                    542:
1.41      cherry    543:        mmu_update_t *xpq_queue = xpq_queue_array[curcpu()->ci_cpuid];
                    544:        int xpq_idx = xpq_idx_array[curcpu()->ci_cpuid];
1.35      cherry    545:
1.2       bouyer    546:        XENPRINTK2(("idx: %d\n", xpq_idx));
                    547:        for (i = 0; i < xpq_idx; i++) {
1.13      cegger    548:                snprintf(XBUF, sizeof(XBUF), "%" PRIx64 " %08" PRIx64,
1.19      jym       549:                    xpq_queue[i].ptr, xpq_queue[i].val);
1.2       bouyer    550:                if (++i < xpq_idx)
1.13      cegger    551:                        snprintf(XBUF + strlen(XBUF),
                    552:                            sizeof(XBUF) - strlen(XBUF),
                    553:                            "%" PRIx64 " %08" PRIx64,
1.19      jym       554:                            xpq_queue[i].ptr, xpq_queue[i].val);
1.2       bouyer    555:                if (++i < xpq_idx)
1.13      cegger    556:                        snprintf(XBUF + strlen(XBUF),
                    557:                            sizeof(XBUF) - strlen(XBUF),
                    558:                            "%" PRIx64 " %08" PRIx64,
1.19      jym       559:                            xpq_queue[i].ptr, xpq_queue[i].val);
1.2       bouyer    560:                if (++i < xpq_idx)
1.13      cegger    561:                        snprintf(XBUF + strlen(XBUF),
                    562:                            sizeof(XBUF) - strlen(XBUF),
                    563:                            "%" PRIx64 " %08" PRIx64,
1.19      jym       564:                            xpq_queue[i].ptr, xpq_queue[i].val);
1.2       bouyer    565:                XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
                    566:        }
                    567: }
                    568: #endif
                    569:
                    570:
                    571: extern volatile struct xencons_interface *xencons_interface; /* XXX */
                    572: extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
                    573:
                    574: static void xen_bt_set_readonly (vaddr_t);
                    575: static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int);
                    576:
                    577: /* How many PDEs ? */
                    578: #if L2_SLOT_KERNBASE > 0
                    579: #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
                    580: #else
                    581: #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
                    582: #endif
                    583:
                    584: /*
                    585:  * Construct and switch to new pagetables
                    586:  * first_avail is the first vaddr we can use after
                    587:  * we get rid of Xen pagetables
                    588:  */
                    589:
                    590: vaddr_t xen_pmap_bootstrap (void);
                    591:
                    592: /*
                    593:  * Function to get rid of Xen bootstrap tables
                    594:  */
                    595:
1.6       bouyer    596: /* How many PDP do we need: */
                    597: #ifdef PAE
                    598: /*
                    599:  * For PAE, we consider a single contigous L2 "superpage" of 4 pages,
                    600:  * all of them mapped by the L3 page. We also need a shadow page
                    601:  * for L3[3].
                    602:  */
                    603: static const int l2_4_count = 6;
1.36      cherry    604: #elif defined(__x86_64__)
                    605: static const int l2_4_count = PTP_LEVELS;
1.6       bouyer    606: #else
                    607: static const int l2_4_count = PTP_LEVELS - 1;
                    608: #endif
                    609:
1.2       bouyer    610: vaddr_t
1.8       cegger    611: xen_pmap_bootstrap(void)
1.2       bouyer    612: {
1.4       bouyer    613:        int count, oldcount;
                    614:        long mapsize;
1.2       bouyer    615:        vaddr_t bootstrap_tables, init_tables;
                    616:
1.35      cherry    617:        memset(xpq_idx_array, 0, sizeof xpq_idx_array);
                    618:
1.6       bouyer    619:        xpmap_phys_to_machine_mapping =
                    620:            (unsigned long *)xen_start_info.mfn_list;
1.2       bouyer    621:        init_tables = xen_start_info.pt_base;
                    622:        __PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables));
                    623:
                    624:        /* Space after Xen boostrap tables should be free */
                    625:        bootstrap_tables = xen_start_info.pt_base +
                    626:                (xen_start_info.nr_pt_frames * PAGE_SIZE);
                    627:
1.4       bouyer    628:        /*
                    629:         * Calculate how many space we need
                    630:         * first everything mapped before the Xen bootstrap tables
                    631:         */
                    632:        mapsize = init_tables - KERNTEXTOFF;
                    633:        /* after the tables we'll have:
                    634:         *  - UAREA
                    635:         *  - dummy user PGD (x86_64)
                    636:         *  - HYPERVISOR_shared_info
1.40      bouyer    637:         *  - early_zerop
1.4       bouyer    638:         *  - ISA I/O mem (if needed)
                    639:         */
                    640:        mapsize += UPAGES * NBPG;
                    641: #ifdef __x86_64__
                    642:        mapsize += NBPG;
                    643: #endif
                    644:        mapsize += NBPG;
1.40      bouyer    645:        mapsize += NBPG;
1.2       bouyer    646:
                    647: #ifdef DOM0OPS
1.10      cegger    648:        if (xendomain_is_dom0()) {
1.2       bouyer    649:                /* space for ISA I/O mem */
1.4       bouyer    650:                mapsize += IOM_SIZE;
                    651:        }
                    652: #endif
                    653:        /* at this point mapsize doens't include the table size */
                    654:
                    655: #ifdef __x86_64__
                    656:        count = TABLE_L2_ENTRIES;
                    657: #else
                    658:        count = (mapsize + (NBPD_L2 -1)) >> L2_SHIFT;
                    659: #endif /* __x86_64__ */
                    660:
                    661:        /* now compute how many L2 pages we need exactly */
                    662:        XENPRINTK(("bootstrap_final mapsize 0x%lx count %d\n", mapsize, count));
                    663:        while (mapsize + (count + l2_4_count) * PAGE_SIZE + KERNTEXTOFF >
                    664:            ((long)count << L2_SHIFT) + KERNBASE) {
                    665:                count++;
1.2       bouyer    666:        }
1.4       bouyer    667: #ifndef __x86_64__
1.5       bouyer    668:        /*
                    669:         * one more L2 page: we'll alocate several pages after kva_start
                    670:         * in pmap_bootstrap() before pmap_growkernel(), which have not been
                    671:         * counted here. It's not a big issue to allocate one more L2 as
                    672:         * pmap_growkernel() will be called anyway.
                    673:         */
                    674:        count++;
1.4       bouyer    675:        nkptp[1] = count;
1.2       bouyer    676: #endif
                    677:
1.4       bouyer    678:        /*
                    679:         * install bootstrap pages. We may need more L2 pages than will
                    680:         * have the final table here, as it's installed after the final table
                    681:         */
                    682:        oldcount = count;
                    683:
                    684: bootstrap_again:
                    685:        XENPRINTK(("bootstrap_again oldcount %d\n", oldcount));
1.2       bouyer    686:        /*
                    687:         * Xen space we'll reclaim may not be enough for our new page tables,
                    688:         * move bootstrap tables if necessary
                    689:         */
1.4       bouyer    690:        if (bootstrap_tables < init_tables + ((count + l2_4_count) * PAGE_SIZE))
1.2       bouyer    691:                bootstrap_tables = init_tables +
1.4       bouyer    692:                                        ((count + l2_4_count) * PAGE_SIZE);
                    693:        /* make sure we have enough to map the bootstrap_tables */
                    694:        if (bootstrap_tables + ((oldcount + l2_4_count) * PAGE_SIZE) >
                    695:            ((long)oldcount << L2_SHIFT) + KERNBASE) {
                    696:                oldcount++;
                    697:                goto bootstrap_again;
                    698:        }
1.2       bouyer    699:
                    700:        /* Create temporary tables */
                    701:        xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables,
1.4       bouyer    702:                xen_start_info.nr_pt_frames, oldcount, 0);
1.2       bouyer    703:
                    704:        /* Create final tables */
                    705:        xen_bootstrap_tables(bootstrap_tables, init_tables,
1.4       bouyer    706:            oldcount + l2_4_count, count, 1);
1.2       bouyer    707:
1.4       bouyer    708:        /* zero out free space after tables */
                    709:        memset((void *)(init_tables + ((count + l2_4_count) * PAGE_SIZE)), 0,
                    710:            (UPAGES + 1) * NBPG);
1.28      rmind     711:
                    712:        /* Finally, flush TLB. */
                    713:        xpq_queue_tlb_flush();
                    714:
1.4       bouyer    715:        return (init_tables + ((count + l2_4_count) * PAGE_SIZE));
1.2       bouyer    716: }
                    717:
                    718: /*
                    719:  * Build a new table and switch to it
                    720:  * old_count is # of old tables (including PGD, PDTPE and PDE)
                    721:  * new_count is # of new tables (PTE only)
                    722:  * we assume areas don't overlap
                    723:  */
                    724: static void
                    725: xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
                    726:        int old_count, int new_count, int final)
                    727: {
                    728:        pd_entry_t *pdtpe, *pde, *pte;
1.50      mrg       729:        pd_entry_t *bt_pgd;
1.6       bouyer    730:        paddr_t addr;
                    731:        vaddr_t page, avail, text_end, map_end;
1.2       bouyer    732:        int i;
                    733:        extern char __data_start;
1.40      bouyer    734:        extern char *early_zerop; /* from pmap.c */
1.2       bouyer    735:
1.19      jym       736:        __PRINTK(("xen_bootstrap_tables(%#" PRIxVADDR ", %#" PRIxVADDR ","
                    737:            " %d, %d)\n",
1.2       bouyer    738:            old_pgd, new_pgd, old_count, new_count));
                    739:        text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK;
                    740:        /*
                    741:         * size of R/W area after kernel text:
                    742:         *  xencons_interface (if present)
                    743:         *  xenstore_interface (if present)
1.6       bouyer    744:         *  table pages (new_count + l2_4_count entries)
1.2       bouyer    745:         * extra mappings (only when final is true):
1.4       bouyer    746:         *  UAREA
                    747:         *  dummy user PGD (x86_64 only)/gdt page (i386 only)
1.2       bouyer    748:         *  HYPERVISOR_shared_info
1.40      bouyer    749:         *  early_zerop
1.2       bouyer    750:         *  ISA I/O mem (if needed)
                    751:         */
1.6       bouyer    752:        map_end = new_pgd + ((new_count + l2_4_count) * NBPG);
1.2       bouyer    753:        if (final) {
1.4       bouyer    754:                map_end += (UPAGES + 1) * NBPG;
                    755:                HYPERVISOR_shared_info = (shared_info_t *)map_end;
1.2       bouyer    756:                map_end += NBPG;
1.40      bouyer    757:                early_zerop = (char *)map_end;
                    758:                map_end += NBPG;
1.2       bouyer    759:        }
1.4       bouyer    760:        /*
                    761:         * we always set atdevbase, as it's used by init386 to find the first
                    762:         * available VA. map_end is updated only if we are dom0, so
                    763:         * atdevbase -> atdevbase + IOM_SIZE will be mapped only in
                    764:         * this case.
                    765:         */
                    766:        if (final)
                    767:                atdevbase = map_end;
1.2       bouyer    768: #ifdef DOM0OPS
1.10      cegger    769:        if (final && xendomain_is_dom0()) {
1.2       bouyer    770:                /* ISA I/O mem */
                    771:                map_end += IOM_SIZE;
                    772:        }
                    773: #endif /* DOM0OPS */
                    774:
                    775:        __PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
                    776:            text_end, map_end));
1.19      jym       777:        __PRINTK(("console %#lx ", xen_start_info.console_mfn));
                    778:        __PRINTK(("xenstore %#" PRIx32 "\n", xen_start_info.store_mfn));
1.2       bouyer    779:
                    780:        /*
                    781:         * Create bootstrap page tables
                    782:         * What we need:
                    783:         * - a PGD (level 4)
                    784:         * - a PDTPE (level 3)
                    785:         * - a PDE (level2)
                    786:         * - some PTEs (level 1)
                    787:         */
                    788:
                    789:        bt_pgd = (pd_entry_t *) new_pgd;
                    790:        memset (bt_pgd, 0, PAGE_SIZE);
                    791:        avail = new_pgd + PAGE_SIZE;
1.4       bouyer    792: #if PTP_LEVELS > 3
1.36      cherry    793:        /* per-cpu L4 PD */
                    794:        pd_entry_t *bt_cpu_pgd = bt_pgd;
                    795:        /* pmap_kernel() "shadow" L4 PD */
                    796:        bt_pgd = (pd_entry_t *) avail;
                    797:        memset(bt_pgd, 0, PAGE_SIZE);
                    798:        avail += PAGE_SIZE;
                    799:
1.2       bouyer    800:        /* Install level 3 */
                    801:        pdtpe = (pd_entry_t *) avail;
                    802:        memset (pdtpe, 0, PAGE_SIZE);
                    803:        avail += PAGE_SIZE;
                    804:
1.6       bouyer    805:        addr = ((u_long) pdtpe) - KERNBASE;
1.36      cherry    806:        bt_pgd[pl4_pi(KERNTEXTOFF)] = bt_cpu_pgd[pl4_pi(KERNTEXTOFF)] =
1.4       bouyer    807:            xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
1.2       bouyer    808:
1.19      jym       809:        __PRINTK(("L3 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
                    810:            " -> L4[%#x]\n",
                    811:            pdtpe, addr, bt_pgd[pl4_pi(KERNTEXTOFF)], pl4_pi(KERNTEXTOFF)));
1.4       bouyer    812: #else
                    813:        pdtpe = bt_pgd;
                    814: #endif /* PTP_LEVELS > 3 */
1.2       bouyer    815:
1.4       bouyer    816: #if PTP_LEVELS > 2
1.2       bouyer    817:        /* Level 2 */
                    818:        pde = (pd_entry_t *) avail;
                    819:        memset(pde, 0, PAGE_SIZE);
                    820:        avail += PAGE_SIZE;
                    821:
1.6       bouyer    822:        addr = ((u_long) pde) - KERNBASE;
1.2       bouyer    823:        pdtpe[pl3_pi(KERNTEXTOFF)] =
1.6       bouyer    824:            xpmap_ptom_masked(addr) | PG_k | PG_V | PG_RW;
1.19      jym       825:        __PRINTK(("L2 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
                    826:            " -> L3[%#x]\n",
                    827:            pde, addr, pdtpe[pl3_pi(KERNTEXTOFF)], pl3_pi(KERNTEXTOFF)));
1.6       bouyer    828: #elif defined(PAE)
                    829:        /* our PAE-style level 2: 5 contigous pages (4 L2 + 1 shadow) */
                    830:        pde = (pd_entry_t *) avail;
                    831:        memset(pde, 0, PAGE_SIZE * 5);
                    832:        avail += PAGE_SIZE * 5;
                    833:        addr = ((u_long) pde) - KERNBASE;
                    834:        /*
                    835:         * enter L2 pages in the L3.
                    836:         * The real L2 kernel PD will be the last one (so that
                    837:         * pde[L2_SLOT_KERN] always point to the shadow).
                    838:         */
                    839:        for (i = 0; i < 3; i++, addr += PAGE_SIZE) {
                    840:                /*
1.25      jym       841:                 * Xen doesn't want R/W mappings in L3 entries, it'll add it
1.6       bouyer    842:                 * itself.
                    843:                 */
                    844:                pdtpe[i] = xpmap_ptom_masked(addr) | PG_k | PG_V;
1.19      jym       845:                __PRINTK(("L2 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
                    846:                    " -> L3[%#x]\n",
                    847:                    (vaddr_t)pde + PAGE_SIZE * i, addr, pdtpe[i], i));
1.6       bouyer    848:        }
                    849:        addr += PAGE_SIZE;
                    850:        pdtpe[3] = xpmap_ptom_masked(addr) | PG_k | PG_V;
1.19      jym       851:        __PRINTK(("L2 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
                    852:            " -> L3[%#x]\n",
                    853:            (vaddr_t)pde + PAGE_SIZE * 4, addr, pdtpe[3], 3));
1.6       bouyer    854:
                    855: #else /* PAE */
1.4       bouyer    856:        pde = bt_pgd;
1.6       bouyer    857: #endif /* PTP_LEVELS > 2 */
1.2       bouyer    858:
                    859:        /* Level 1 */
                    860:        page = KERNTEXTOFF;
                    861:        for (i = 0; i < new_count; i ++) {
1.6       bouyer    862:                vaddr_t cur_page = page;
1.2       bouyer    863:
                    864:                pte = (pd_entry_t *) avail;
                    865:                avail += PAGE_SIZE;
                    866:
                    867:                memset(pte, 0, PAGE_SIZE);
                    868:                while (pl2_pi(page) == pl2_pi (cur_page)) {
                    869:                        if (page >= map_end) {
                    870:                                /* not mapped at all */
                    871:                                pte[pl1_pi(page)] = 0;
                    872:                                page += PAGE_SIZE;
                    873:                                continue;
                    874:                        }
                    875:                        pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE);
                    876:                        if (page == (vaddr_t)HYPERVISOR_shared_info) {
                    877:                                pte[pl1_pi(page)] = xen_start_info.shared_info;
                    878:                                __PRINTK(("HYPERVISOR_shared_info "
1.19      jym       879:                                    "va %#lx pte %#" PRIxPADDR "\n",
                    880:                                    HYPERVISOR_shared_info, pte[pl1_pi(page)]));
1.2       bouyer    881:                        }
1.7       bouyer    882:                        if ((xpmap_ptom_masked(page - KERNBASE) >> PAGE_SHIFT)
1.12      cegger    883:                            == xen_start_info.console.domU.mfn) {
1.2       bouyer    884:                                xencons_interface = (void *)page;
1.19      jym       885:                                pte[pl1_pi(page)] = xen_start_info.console_mfn;
1.6       bouyer    886:                                pte[pl1_pi(page)] <<= PAGE_SHIFT;
1.2       bouyer    887:                                __PRINTK(("xencons_interface "
1.19      jym       888:                                    "va %#lx pte %#" PRIxPADDR "\n",
                    889:                                    xencons_interface, pte[pl1_pi(page)]));
1.2       bouyer    890:                        }
1.7       bouyer    891:                        if ((xpmap_ptom_masked(page - KERNBASE) >> PAGE_SHIFT)
                    892:                            == xen_start_info.store_mfn) {
1.2       bouyer    893:                                xenstore_interface = (void *)page;
1.6       bouyer    894:                                pte[pl1_pi(page)] = xen_start_info.store_mfn;
                    895:                                pte[pl1_pi(page)] <<= PAGE_SHIFT;
1.2       bouyer    896:                                __PRINTK(("xenstore_interface "
1.19      jym       897:                                    "va %#lx pte %#" PRIxPADDR "\n",
                    898:                                    xenstore_interface, pte[pl1_pi(page)]));
1.2       bouyer    899:                        }
                    900: #ifdef DOM0OPS
                    901:                        if (page >= (vaddr_t)atdevbase &&
                    902:                            page < (vaddr_t)atdevbase + IOM_SIZE) {
                    903:                                pte[pl1_pi(page)] =
                    904:                                    IOM_BEGIN + (page - (vaddr_t)atdevbase);
                    905:                        }
                    906: #endif
1.4       bouyer    907:                        pte[pl1_pi(page)] |= PG_k | PG_V;
1.2       bouyer    908:                        if (page < text_end) {
                    909:                                /* map kernel text RO */
                    910:                                pte[pl1_pi(page)] |= 0;
                    911:                        } else if (page >= old_pgd
                    912:                            && page < old_pgd + (old_count * PAGE_SIZE)) {
                    913:                                /* map old page tables RO */
                    914:                                pte[pl1_pi(page)] |= 0;
                    915:                        } else if (page >= new_pgd &&
1.6       bouyer    916:                            page < new_pgd + ((new_count + l2_4_count) * PAGE_SIZE)) {
1.2       bouyer    917:                                /* map new page tables RO */
                    918:                                pte[pl1_pi(page)] |= 0;
1.41      cherry    919: #ifdef i386
                    920:                        } else if (page == (vaddr_t)tmpgdt) {
                    921:                                /*
                    922:                                 * Map bootstrap gdt R/O. Later, we
                    923:                                 * will re-add this to page to uvm
                    924:                                 * after making it writable.
                    925:                                 */
                    926:
                    927:                                pte[pl1_pi(page)] = 0;
                    928:                                page += PAGE_SIZE;
                    929:                                continue;
                    930: #endif /* i386 */
1.2       bouyer    931:                        } else {
                    932:                                /* map page RW */
                    933:                                pte[pl1_pi(page)] |= PG_RW;
                    934:                        }
1.6       bouyer    935:
1.9       tron      936:                        if ((page  >= old_pgd && page < old_pgd + (old_count * PAGE_SIZE))
                    937:                            || page >= new_pgd) {
1.19      jym       938:                                __PRINTK(("va %#lx pa %#lx "
                    939:                                    "entry 0x%" PRIxPADDR " -> L1[%#x]\n",
1.2       bouyer    940:                                    page, page - KERNBASE,
1.19      jym       941:                                    pte[pl1_pi(page)], pl1_pi(page)));
1.9       tron      942:                        }
1.2       bouyer    943:                        page += PAGE_SIZE;
                    944:                }
                    945:
1.6       bouyer    946:                addr = ((u_long) pte) - KERNBASE;
1.2       bouyer    947:                pde[pl2_pi(cur_page)] =
1.4       bouyer    948:                    xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
1.19      jym       949:                __PRINTK(("L1 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
                    950:                    " -> L2[%#x]\n",
                    951:                    pte, addr, pde[pl2_pi(cur_page)], pl2_pi(cur_page)));
1.2       bouyer    952:                /* Mark readonly */
                    953:                xen_bt_set_readonly((vaddr_t) pte);
                    954:        }
                    955:
                    956:        /* Install recursive page tables mapping */
1.6       bouyer    957: #ifdef PAE
                    958:        /*
                    959:         * we need a shadow page for the kernel's L2 page
                    960:         * The real L2 kernel PD will be the last one (so that
                    961:         * pde[L2_SLOT_KERN] always point to the shadow.
                    962:         */
                    963:        memcpy(&pde[L2_SLOT_KERN + NPDPG], &pde[L2_SLOT_KERN], PAGE_SIZE);
1.36      cherry    964:        cpu_info_primary.ci_kpm_pdir = &pde[L2_SLOT_KERN + NPDPG];
                    965:        cpu_info_primary.ci_kpm_pdirpa =
                    966:            (vaddr_t) cpu_info_primary.ci_kpm_pdir - KERNBASE;
1.6       bouyer    967:
                    968:        /*
                    969:         * We don't enter a recursive entry from the L3 PD. Instead,
                    970:         * we enter the first 4 L2 pages, which includes the kernel's L2
                    971:         * shadow. But we have to entrer the shadow after switching
                    972:         * %cr3, or Xen will refcount some PTE with the wrong type.
                    973:         */
                    974:        addr = (u_long)pde - KERNBASE;
                    975:        for (i = 0; i < 3; i++, addr += PAGE_SIZE) {
                    976:                pde[PDIR_SLOT_PTE + i] = xpmap_ptom_masked(addr) | PG_k | PG_V;
1.19      jym       977:                __PRINTK(("pde[%d] va %#" PRIxVADDR " pa %#" PRIxPADDR
                    978:                    " entry %#" PRIxPADDR "\n",
                    979:                    (int)(PDIR_SLOT_PTE + i), pde + PAGE_SIZE * i,
                    980:                    addr, pde[PDIR_SLOT_PTE + i]));
1.6       bouyer    981:        }
                    982: #if 0
                    983:        addr += PAGE_SIZE; /* point to shadow L2 */
                    984:        pde[PDIR_SLOT_PTE + 3] = xpmap_ptom_masked(addr) | PG_k | PG_V;
                    985:        __PRINTK(("pde[%d] va 0x%lx pa 0x%lx entry 0x%" PRIx64 "\n",
                    986:            (int)(PDIR_SLOT_PTE + 3), pde + PAGE_SIZE * 4, (long)addr,
                    987:            (int64_t)pde[PDIR_SLOT_PTE + 3]));
                    988: #endif
1.14      jym       989:        /* Mark tables RO, and pin the kernel's shadow as L2 */
1.6       bouyer    990:        addr = (u_long)pde - KERNBASE;
                    991:        for (i = 0; i < 5; i++, addr += PAGE_SIZE) {
                    992:                xen_bt_set_readonly(((vaddr_t)pde) + PAGE_SIZE * i);
                    993:                if (i == 2 || i == 3)
                    994:                        continue;
                    995: #if 0
                    996:                __PRINTK(("pin L2 %d addr 0x%" PRIx64 "\n", i, (int64_t)addr));
1.24      jym       997:                xpq_queue_pin_l2_table(xpmap_ptom_masked(addr));
1.6       bouyer    998: #endif
                    999:        }
                   1000:        if (final) {
                   1001:                addr = (u_long)pde - KERNBASE + 3 * PAGE_SIZE;
1.19      jym      1002:                __PRINTK(("pin L2 %d addr %#" PRIxPADDR "\n", 2, addr));
1.24      jym      1003:                xpq_queue_pin_l2_table(xpmap_ptom_masked(addr));
1.6       bouyer   1004:        }
                   1005: #if 0
                   1006:        addr = (u_long)pde - KERNBASE + 2 * PAGE_SIZE;
                   1007:        __PRINTK(("pin L2 %d addr 0x%" PRIx64 "\n", 2, (int64_t)addr));
1.24      jym      1008:        xpq_queue_pin_l2_table(xpmap_ptom_masked(addr));
1.6       bouyer   1009: #endif
                   1010: #else /* PAE */
1.36      cherry   1011:        /* recursive entry in higher-level per-cpu PD and pmap_kernel() */
                   1012:        bt_pgd[PDIR_SLOT_PTE] = xpmap_ptom_masked((paddr_t)bt_pgd - KERNBASE) | PG_k | PG_V;
                   1013: #ifdef __x86_64__
                   1014:           bt_cpu_pgd[PDIR_SLOT_PTE] =
                   1015:                   xpmap_ptom_masked((paddr_t)bt_cpu_pgd - KERNBASE) | PG_k | PG_V;
                   1016: #endif /* __x86_64__ */
1.19      jym      1017:        __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va %#" PRIxVADDR " pa %#" PRIxPADDR
                   1018:            " entry %#" PRIxPADDR "\n", new_pgd, (paddr_t)new_pgd - KERNBASE,
                   1019:            bt_pgd[PDIR_SLOT_PTE]));
1.2       bouyer   1020:        /* Mark tables RO */
                   1021:        xen_bt_set_readonly((vaddr_t) pde);
1.6       bouyer   1022: #endif
                   1023: #if PTP_LEVELS > 2 || defined(PAE)
1.2       bouyer   1024:        xen_bt_set_readonly((vaddr_t) pdtpe);
1.4       bouyer   1025: #endif
                   1026: #if PTP_LEVELS > 3
1.2       bouyer   1027:        xen_bt_set_readonly(new_pgd);
1.4       bouyer   1028: #endif
1.2       bouyer   1029:        /* Pin the PGD */
1.26      jym      1030:        __PRINTK(("pin PGD: %"PRIxVADDR"\n", new_pgd - KERNBASE));
1.24      jym      1031: #ifdef __x86_64__
                   1032:        xpq_queue_pin_l4_table(xpmap_ptom_masked(new_pgd - KERNBASE));
                   1033: #elif PAE
1.6       bouyer   1034:        xpq_queue_pin_l3_table(xpmap_ptom_masked(new_pgd - KERNBASE));
                   1035: #else
1.24      jym      1036:        xpq_queue_pin_l2_table(xpmap_ptom_masked(new_pgd - KERNBASE));
1.6       bouyer   1037: #endif
1.21      jym      1038:
1.4       bouyer   1039:        /* Save phys. addr of PDP, for libkvm. */
1.6       bouyer   1040: #ifdef PAE
1.21      jym      1041:        PDPpaddr = (u_long)pde - KERNBASE; /* PDP is the L2 with PAE */
                   1042: #else
1.36      cherry   1043:        PDPpaddr = (u_long)bt_pgd - KERNBASE;
1.21      jym      1044: #endif
                   1045:
1.2       bouyer   1046:        /* Switch to new tables */
1.14      jym      1047:        __PRINTK(("switch to PGD\n"));
1.2       bouyer   1048:        xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
1.19      jym      1049:        __PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry %#" PRIxPADDR "\n",
                   1050:            bt_pgd[PDIR_SLOT_PTE]));
1.21      jym      1051:
1.6       bouyer   1052: #ifdef PAE
                   1053:        if (final) {
1.21      jym      1054:                /* save the address of the L3 page */
                   1055:                cpu_info_primary.ci_pae_l3_pdir = pdtpe;
                   1056:                cpu_info_primary.ci_pae_l3_pdirpa = (new_pgd - KERNBASE);
                   1057:
1.6       bouyer   1058:                /* now enter kernel's PTE mappings */
                   1059:                addr =  (u_long)pde - KERNBASE + PAGE_SIZE * 3;
                   1060:                xpq_queue_pte_update(
                   1061:                    xpmap_ptom(((vaddr_t)&pde[PDIR_SLOT_PTE + 3]) - KERNBASE),
                   1062:                    xpmap_ptom_masked(addr) | PG_k | PG_V);
                   1063:                xpq_flush_queue();
                   1064:        }
1.36      cherry   1065: #elif defined(__x86_64__)
                   1066:        if (final) {
                   1067:                /* save the address of the real per-cpu L4 pgd page */
                   1068:                cpu_info_primary.ci_kpm_pdir = bt_cpu_pgd;
                   1069:                cpu_info_primary.ci_kpm_pdirpa = ((paddr_t) bt_cpu_pgd - KERNBASE);
                   1070:        }
1.6       bouyer   1071: #endif
1.51      christos 1072:        __USE(pdtpe);
1.6       bouyer   1073:
1.2       bouyer   1074:        /* Now we can safely reclaim space taken by old tables */
                   1075:
1.14      jym      1076:        __PRINTK(("unpin old PGD\n"));
1.2       bouyer   1077:        /* Unpin old PGD */
                   1078:        xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE));
                   1079:        /* Mark old tables RW */
                   1080:        page = old_pgd;
                   1081:        addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME;
                   1082:        addr = xpmap_mtop(addr);
1.6       bouyer   1083:        pte = (pd_entry_t *) ((u_long)addr + KERNBASE);
1.2       bouyer   1084:        pte += pl1_pi(page);
1.19      jym      1085:        __PRINTK(("*pde %#" PRIxPADDR " addr %#" PRIxPADDR " pte %#lx\n",
                   1086:            pde[pl2_pi(page)], addr, (long)pte));
1.2       bouyer   1087:        while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) {
1.6       bouyer   1088:                addr = xpmap_ptom(((u_long) pte) - KERNBASE);
1.19      jym      1089:                XENPRINTK(("addr %#" PRIxPADDR " pte %#lx "
                   1090:                   "*pte %#" PRIxPADDR "\n",
                   1091:                   addr, (long)pte, *pte));
1.6       bouyer   1092:                xpq_queue_pte_update(addr, *pte | PG_RW);
1.2       bouyer   1093:                page += PAGE_SIZE;
                   1094:                /*
                   1095:                 * Our ptes are contiguous
                   1096:                 * so it's safe to just "++" here
                   1097:                 */
                   1098:                pte++;
                   1099:        }
                   1100:        xpq_flush_queue();
                   1101: }
                   1102:
                   1103:
                   1104: /*
                   1105:  * Bootstrap helper functions
                   1106:  */
                   1107:
                   1108: /*
                   1109:  * Mark a page readonly
                   1110:  * XXX: assuming vaddr = paddr + KERNBASE
                   1111:  */
                   1112:
                   1113: static void
                   1114: xen_bt_set_readonly (vaddr_t page)
                   1115: {
                   1116:        pt_entry_t entry;
                   1117:
                   1118:        entry = xpmap_ptom_masked(page - KERNBASE);
1.4       bouyer   1119:        entry |= PG_k | PG_V;
1.2       bouyer   1120:
                   1121:        HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG);
                   1122: }
1.4       bouyer   1123:
                   1124: #ifdef __x86_64__
                   1125: void
                   1126: xen_set_user_pgd(paddr_t page)
                   1127: {
                   1128:        struct mmuext_op op;
                   1129:        int s = splvm();
                   1130:
                   1131:        xpq_flush_queue();
                   1132:        op.cmd = MMUEXT_NEW_USER_BASEPTR;
1.46      jym      1133:        op.arg1.mfn = xpmap_ptom_masked(page) >> PAGE_SHIFT;
1.4       bouyer   1134:         if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
                   1135:                panic("xen_set_user_pgd: failed to install new user page"
1.19      jym      1136:                        " directory %#" PRIxPADDR, page);
1.4       bouyer   1137:        splx(s);
                   1138: }
                   1139: #endif /* __x86_64__ */

CVSweb <webmaster@jp.NetBSD.org>