[BACK]Return to patch-target_i386_nvmm_nvmm-all.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / pkgsrc / emulators / qemu / patches

Annotation of pkgsrc/emulators/qemu/patches/patch-target_i386_nvmm_nvmm-all.c, Revision 1.2

1.2     ! nia         1: $NetBSD: patch-target_i386_nvmm_nvmm-all.c,v 1.1 2021/05/24 14:22:08 ryoon Exp $
1.1       ryoon       2:
1.2     ! nia         3: --- target/i386/nvmm/nvmm-all.c.orig   2021-06-01 15:07:31.572325819 +0000
1.1       ryoon       4: +++ target/i386/nvmm/nvmm-all.c
1.2     ! nia         5: @@ -0,0 +1,1236 @@
1.1       ryoon       6: +/*
                      7: + * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
                      8: + *
                      9: + * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
                     10: + *
                     11: + * This work is licensed under the terms of the GNU GPL, version 2 or later.
                     12: + * See the COPYING file in the top-level directory.
                     13: + */
                     14: +
                     15: +#include "qemu/osdep.h"
                     16: +#include "cpu.h"
                     17: +#include "exec/address-spaces.h"
                     18: +#include "exec/ioport.h"
                     19: +#include "qemu-common.h"
                     20: +#include "qemu/accel.h"
                     21: +#include "sysemu/nvmm.h"
                     22: +#include "sysemu/cpus.h"
                     23: +#include "sysemu/runstate.h"
                     24: +#include "qemu/main-loop.h"
                     25: +#include "qemu/error-report.h"
                     26: +#include "qapi/error.h"
                     27: +#include "qemu/queue.h"
                     28: +#include "migration/blocker.h"
                     29: +#include "strings.h"
                     30: +
                     31: +#include "nvmm-accel-ops.h"
                     32: +
                     33: +#include <nvmm.h>
                     34: +
                     35: +struct qemu_vcpu {
                     36: +    struct nvmm_vcpu vcpu;
                     37: +    uint8_t tpr;
                     38: +    bool stop;
                     39: +
                     40: +    /* Window-exiting for INTs/NMIs. */
                     41: +    bool int_window_exit;
                     42: +    bool nmi_window_exit;
                     43: +
                     44: +    /* The guest is in an interrupt shadow (POP SS, etc). */
                     45: +    bool int_shadow;
                     46: +};
                     47: +
                     48: +struct qemu_machine {
                     49: +    struct nvmm_capability cap;
                     50: +    struct nvmm_machine mach;
                     51: +};
                     52: +
                     53: +/* -------------------------------------------------------------------------- */
                     54: +
                     55: +static bool nvmm_allowed;
                     56: +static struct qemu_machine qemu_mach;
                     57: +
                     58: +static struct qemu_vcpu *
                     59: +get_qemu_vcpu(CPUState *cpu)
                     60: +{
                     61: +    return (struct qemu_vcpu *)cpu->hax_vcpu;
                     62: +}
                     63: +
                     64: +static struct nvmm_machine *
                     65: +get_nvmm_mach(void)
                     66: +{
                     67: +    return &qemu_mach.mach;
                     68: +}
                     69: +
                     70: +/* -------------------------------------------------------------------------- */
                     71: +
                     72: +static void
                     73: +nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
                     74: +{
                     75: +    uint32_t attrib = qseg->flags;
                     76: +
                     77: +    nseg->selector = qseg->selector;
                     78: +    nseg->limit = qseg->limit;
                     79: +    nseg->base = qseg->base;
                     80: +    nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK);
                     81: +    nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK);
                     82: +    nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK);
                     83: +    nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK);
                     84: +    nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK);
                     85: +    nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK);
                     86: +    nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK);
                     87: +    nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK);
                     88: +}
                     89: +
                     90: +static void
                     91: +nvmm_set_registers(CPUState *cpu)
                     92: +{
                     93: +    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
                     94: +    struct nvmm_machine *mach = get_nvmm_mach();
                     95: +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
                     96: +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
                     97: +    struct nvmm_x64_state *state = vcpu->state;
                     98: +    uint64_t bitmap;
                     99: +    size_t i;
                    100: +    int ret;
                    101: +
                    102: +    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
                    103: +
                    104: +    /* GPRs. */
                    105: +    state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX];
                    106: +    state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX];
                    107: +    state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX];
                    108: +    state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX];
                    109: +    state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP];
                    110: +    state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP];
                    111: +    state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI];
                    112: +    state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI];
                    113: +#ifdef TARGET_X86_64
                    114: +    state->gprs[NVMM_X64_GPR_R8]  = env->regs[R_R8];
                    115: +    state->gprs[NVMM_X64_GPR_R9]  = env->regs[R_R9];
                    116: +    state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10];
                    117: +    state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11];
                    118: +    state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12];
                    119: +    state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13];
                    120: +    state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14];
                    121: +    state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15];
                    122: +#endif
                    123: +
                    124: +    /* RIP and RFLAGS. */
                    125: +    state->gprs[NVMM_X64_GPR_RIP] = env->eip;
                    126: +    state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags;
                    127: +
                    128: +    /* Segments. */
                    129: +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]);
                    130: +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]);
                    131: +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]);
                    132: +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]);
                    133: +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]);
                    134: +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]);
                    135: +
                    136: +    /* Special segments. */
                    137: +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt);
                    138: +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt);
                    139: +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr);
                    140: +    nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt);
                    141: +
                    142: +    /* Control registers. */
                    143: +    state->crs[NVMM_X64_CR_CR0] = env->cr[0];
                    144: +    state->crs[NVMM_X64_CR_CR2] = env->cr[2];
                    145: +    state->crs[NVMM_X64_CR_CR3] = env->cr[3];
                    146: +    state->crs[NVMM_X64_CR_CR4] = env->cr[4];
                    147: +    state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
                    148: +    state->crs[NVMM_X64_CR_XCR0] = env->xcr0;
                    149: +
                    150: +    /* Debug registers. */
                    151: +    state->drs[NVMM_X64_DR_DR0] = env->dr[0];
                    152: +    state->drs[NVMM_X64_DR_DR1] = env->dr[1];
                    153: +    state->drs[NVMM_X64_DR_DR2] = env->dr[2];
                    154: +    state->drs[NVMM_X64_DR_DR3] = env->dr[3];
                    155: +    state->drs[NVMM_X64_DR_DR6] = env->dr[6];
                    156: +    state->drs[NVMM_X64_DR_DR7] = env->dr[7];
                    157: +
                    158: +    /* FPU. */
                    159: +    state->fpu.fx_cw = env->fpuc;
                    160: +    state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11);
                    161: +    state->fpu.fx_tw = 0;
                    162: +    for (i = 0; i < 8; i++) {
                    163: +        state->fpu.fx_tw |= (!env->fptags[i]) << i;
                    164: +    }
                    165: +    state->fpu.fx_opcode = env->fpop;
                    166: +    state->fpu.fx_ip.fa_64 = env->fpip;
                    167: +    state->fpu.fx_dp.fa_64 = env->fpdp;
                    168: +    state->fpu.fx_mxcsr = env->mxcsr;
                    169: +    state->fpu.fx_mxcsr_mask = 0x0000FFFF;
                    170: +    assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
                    171: +    memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs));
                    172: +    for (i = 0; i < CPU_NB_REGS; i++) {
                    173: +        memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0],
                    174: +            &env->xmm_regs[i].ZMM_Q(0), 8);
                    175: +        memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8],
                    176: +            &env->xmm_regs[i].ZMM_Q(1), 8);
                    177: +    }
                    178: +
                    179: +    /* MSRs. */
                    180: +    state->msrs[NVMM_X64_MSR_EFER] = env->efer;
                    181: +    state->msrs[NVMM_X64_MSR_STAR] = env->star;
                    182: +#ifdef TARGET_X86_64
                    183: +    state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar;
                    184: +    state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar;
                    185: +    state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask;
                    186: +    state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase;
                    187: +#endif
                    188: +    state->msrs[NVMM_X64_MSR_SYSENTER_CS]  = env->sysenter_cs;
                    189: +    state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp;
                    190: +    state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip;
                    191: +    state->msrs[NVMM_X64_MSR_PAT] = env->pat;
                    192: +    state->msrs[NVMM_X64_MSR_TSC] = env->tsc;
                    193: +
                    194: +    bitmap =
                    195: +        NVMM_X64_STATE_SEGS |
                    196: +        NVMM_X64_STATE_GPRS |
                    197: +        NVMM_X64_STATE_CRS  |
                    198: +        NVMM_X64_STATE_DRS  |
                    199: +        NVMM_X64_STATE_MSRS |
                    200: +        NVMM_X64_STATE_FPU;
                    201: +
                    202: +    ret = nvmm_vcpu_setstate(mach, vcpu, bitmap);
                    203: +    if (ret == -1) {
                    204: +        error_report("NVMM: Failed to set virtual processor context,"
                    205: +            " error=%d", errno);
                    206: +    }
                    207: +}
                    208: +
                    209: +static void
                    210: +nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
                    211: +{
                    212: +    qseg->selector = nseg->selector;
                    213: +    qseg->limit = nseg->limit;
                    214: +    qseg->base = nseg->base;
                    215: +
                    216: +    qseg->flags =
                    217: +        __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) |
                    218: +        __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) |
                    219: +        __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) |
                    220: +        __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) |
                    221: +        __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) |
                    222: +        __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) |
                    223: +        __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) |
                    224: +        __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK);
                    225: +}
                    226: +
                    227: +static void
                    228: +nvmm_get_registers(CPUState *cpu)
                    229: +{
                    230: +    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
                    231: +    struct nvmm_machine *mach = get_nvmm_mach();
                    232: +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
                    233: +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
                    234: +    X86CPU *x86_cpu = X86_CPU(cpu);
                    235: +    struct nvmm_x64_state *state = vcpu->state;
                    236: +    uint64_t bitmap, tpr;
                    237: +    size_t i;
                    238: +    int ret;
                    239: +
                    240: +    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
                    241: +
                    242: +    bitmap =
                    243: +        NVMM_X64_STATE_SEGS |
                    244: +        NVMM_X64_STATE_GPRS |
                    245: +        NVMM_X64_STATE_CRS  |
                    246: +        NVMM_X64_STATE_DRS  |
                    247: +        NVMM_X64_STATE_MSRS |
                    248: +        NVMM_X64_STATE_FPU;
                    249: +
                    250: +    ret = nvmm_vcpu_getstate(mach, vcpu, bitmap);
                    251: +    if (ret == -1) {
                    252: +        error_report("NVMM: Failed to get virtual processor context,"
                    253: +            " error=%d", errno);
                    254: +    }
                    255: +
                    256: +    /* GPRs. */
                    257: +    env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX];
                    258: +    env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX];
                    259: +    env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX];
                    260: +    env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX];
                    261: +    env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP];
                    262: +    env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP];
                    263: +    env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI];
                    264: +    env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI];
                    265: +#ifdef TARGET_X86_64
                    266: +    env->regs[R_R8]  = state->gprs[NVMM_X64_GPR_R8];
                    267: +    env->regs[R_R9]  = state->gprs[NVMM_X64_GPR_R9];
                    268: +    env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10];
                    269: +    env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11];
                    270: +    env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12];
                    271: +    env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13];
                    272: +    env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14];
                    273: +    env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15];
                    274: +#endif
                    275: +
                    276: +    /* RIP and RFLAGS. */
                    277: +    env->eip = state->gprs[NVMM_X64_GPR_RIP];
                    278: +    env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS];
                    279: +
                    280: +    /* Segments. */
                    281: +    nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]);
                    282: +    nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]);
                    283: +    nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]);
                    284: +    nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]);
                    285: +    nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]);
                    286: +    nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]);
                    287: +
                    288: +    /* Special segments. */
                    289: +    nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]);
                    290: +    nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]);
                    291: +    nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]);
                    292: +    nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]);
                    293: +
                    294: +    /* Control registers. */
                    295: +    env->cr[0] = state->crs[NVMM_X64_CR_CR0];
                    296: +    env->cr[2] = state->crs[NVMM_X64_CR_CR2];
                    297: +    env->cr[3] = state->crs[NVMM_X64_CR_CR3];
                    298: +    env->cr[4] = state->crs[NVMM_X64_CR_CR4];
                    299: +    tpr = state->crs[NVMM_X64_CR_CR8];
                    300: +    if (tpr != qcpu->tpr) {
                    301: +        qcpu->tpr = tpr;
                    302: +        cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
                    303: +    }
                    304: +    env->xcr0 = state->crs[NVMM_X64_CR_XCR0];
                    305: +
                    306: +    /* Debug registers. */
                    307: +    env->dr[0] = state->drs[NVMM_X64_DR_DR0];
                    308: +    env->dr[1] = state->drs[NVMM_X64_DR_DR1];
                    309: +    env->dr[2] = state->drs[NVMM_X64_DR_DR2];
                    310: +    env->dr[3] = state->drs[NVMM_X64_DR_DR3];
                    311: +    env->dr[6] = state->drs[NVMM_X64_DR_DR6];
                    312: +    env->dr[7] = state->drs[NVMM_X64_DR_DR7];
                    313: +
                    314: +    /* FPU. */
                    315: +    env->fpuc = state->fpu.fx_cw;
                    316: +    env->fpstt = (state->fpu.fx_sw >> 11) & 0x7;
                    317: +    env->fpus = state->fpu.fx_sw & ~0x3800;
                    318: +    for (i = 0; i < 8; i++) {
                    319: +        env->fptags[i] = !((state->fpu.fx_tw >> i) & 1);
                    320: +    }
                    321: +    env->fpop = state->fpu.fx_opcode;
                    322: +    env->fpip = state->fpu.fx_ip.fa_64;
                    323: +    env->fpdp = state->fpu.fx_dp.fa_64;
                    324: +    env->mxcsr = state->fpu.fx_mxcsr;
                    325: +    assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
                    326: +    memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs));
                    327: +    for (i = 0; i < CPU_NB_REGS; i++) {
                    328: +        memcpy(&env->xmm_regs[i].ZMM_Q(0),
                    329: +            &state->fpu.fx_xmm[i].xmm_bytes[0], 8);
                    330: +        memcpy(&env->xmm_regs[i].ZMM_Q(1),
                    331: +            &state->fpu.fx_xmm[i].xmm_bytes[8], 8);
                    332: +    }
                    333: +
                    334: +    /* MSRs. */
                    335: +    env->efer = state->msrs[NVMM_X64_MSR_EFER];
                    336: +    env->star = state->msrs[NVMM_X64_MSR_STAR];
                    337: +#ifdef TARGET_X86_64
                    338: +    env->lstar = state->msrs[NVMM_X64_MSR_LSTAR];
                    339: +    env->cstar = state->msrs[NVMM_X64_MSR_CSTAR];
                    340: +    env->fmask = state->msrs[NVMM_X64_MSR_SFMASK];
                    341: +    env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE];
                    342: +#endif
                    343: +    env->sysenter_cs  = state->msrs[NVMM_X64_MSR_SYSENTER_CS];
                    344: +    env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP];
                    345: +    env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP];
                    346: +    env->pat = state->msrs[NVMM_X64_MSR_PAT];
                    347: +    env->tsc = state->msrs[NVMM_X64_MSR_TSC];
                    348: +
                    349: +    x86_update_hflags(env);
                    350: +}
                    351: +
                    352: +static bool
                    353: +nvmm_can_take_int(CPUState *cpu)
                    354: +{
                    355: +    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
                    356: +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
                    357: +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
                    358: +    struct nvmm_machine *mach = get_nvmm_mach();
                    359: +
                    360: +    if (qcpu->int_window_exit) {
                    361: +        return false;
                    362: +    }
                    363: +
                    364: +    if (qcpu->int_shadow || !(env->eflags & IF_MASK)) {
                    365: +        struct nvmm_x64_state *state = vcpu->state;
                    366: +
                    367: +        /* Exit on interrupt window. */
                    368: +        nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR);
                    369: +        state->intr.int_window_exiting = 1;
                    370: +        nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR);
                    371: +
                    372: +        return false;
                    373: +    }
                    374: +
                    375: +    return true;
                    376: +}
                    377: +
                    378: +static bool
                    379: +nvmm_can_take_nmi(CPUState *cpu)
                    380: +{
                    381: +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
                    382: +
                    383: +    /*
                    384: +     * Contrary to INTs, NMIs always schedule an exit when they are
                    385: +     * completed. Therefore, if window-exiting is enabled, it means
                    386: +     * NMIs are blocked.
                    387: +     */
                    388: +    if (qcpu->nmi_window_exit) {
                    389: +        return false;
                    390: +    }
                    391: +
                    392: +    return true;
                    393: +}
                    394: +
                    395: +/*
                    396: + * Called before the VCPU is run. We inject events generated by the I/O
                    397: + * thread, and synchronize the guest TPR.
                    398: + */
                    399: +static void
                    400: +nvmm_vcpu_pre_run(CPUState *cpu)
                    401: +{
                    402: +    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
                    403: +    struct nvmm_machine *mach = get_nvmm_mach();
                    404: +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
                    405: +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
                    406: +    X86CPU *x86_cpu = X86_CPU(cpu);
                    407: +    struct nvmm_x64_state *state = vcpu->state;
                    408: +    struct nvmm_vcpu_event *event = vcpu->event;
                    409: +    bool has_event = false;
                    410: +    bool sync_tpr = false;
                    411: +    uint8_t tpr;
                    412: +    int ret;
                    413: +
                    414: +    qemu_mutex_lock_iothread();
                    415: +
                    416: +    tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
                    417: +    if (tpr != qcpu->tpr) {
                    418: +        qcpu->tpr = tpr;
                    419: +        sync_tpr = true;
                    420: +    }
                    421: +
                    422: +    /*
                    423: +     * Force the VCPU out of its inner loop to process any INIT requests
                    424: +     * or commit pending TPR access.
                    425: +     */
                    426: +    if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
                    427: +        cpu->exit_request = 1;
                    428: +    }
                    429: +
                    430: +    if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
                    431: +        if (nvmm_can_take_nmi(cpu)) {
                    432: +            cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
                    433: +            event->type = NVMM_VCPU_EVENT_INTR;
                    434: +            event->vector = 2;
                    435: +            has_event = true;
                    436: +        }
                    437: +    }
                    438: +
                    439: +    if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
                    440: +        if (nvmm_can_take_int(cpu)) {
                    441: +            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
                    442: +            event->type = NVMM_VCPU_EVENT_INTR;
                    443: +            event->vector = cpu_get_pic_interrupt(env);
                    444: +            has_event = true;
                    445: +        }
                    446: +    }
                    447: +
                    448: +    /* Don't want SMIs. */
                    449: +    if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
                    450: +        cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
                    451: +    }
                    452: +
                    453: +    if (sync_tpr) {
                    454: +        ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS);
                    455: +        if (ret == -1) {
                    456: +            error_report("NVMM: Failed to get CPU state,"
                    457: +                " error=%d", errno);
                    458: +        }
                    459: +
                    460: +        state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
                    461: +
                    462: +        ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS);
                    463: +        if (ret == -1) {
                    464: +            error_report("NVMM: Failed to set CPU state,"
                    465: +                " error=%d", errno);
                    466: +        }
                    467: +    }
                    468: +
                    469: +    if (has_event) {
                    470: +        ret = nvmm_vcpu_inject(mach, vcpu);
                    471: +        if (ret == -1) {
                    472: +            error_report("NVMM: Failed to inject event,"
                    473: +                " error=%d", errno);
                    474: +        }
                    475: +    }
                    476: +
                    477: +    qemu_mutex_unlock_iothread();
                    478: +}
                    479: +
                    480: +/*
                    481: + * Called after the VCPU ran. We synchronize the host view of the TPR and
                    482: + * RFLAGS.
                    483: + */
                    484: +static void
                    485: +nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit)
                    486: +{
                    487: +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
                    488: +    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
                    489: +    X86CPU *x86_cpu = X86_CPU(cpu);
                    490: +    uint64_t tpr;
                    491: +
                    492: +    env->eflags = exit->exitstate.rflags;
                    493: +    qcpu->int_shadow = exit->exitstate.int_shadow;
                    494: +    qcpu->int_window_exit = exit->exitstate.int_window_exiting;
                    495: +    qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting;
                    496: +
                    497: +    tpr = exit->exitstate.cr8;
                    498: +    if (qcpu->tpr != tpr) {
                    499: +        qcpu->tpr = tpr;
                    500: +        qemu_mutex_lock_iothread();
                    501: +        cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr);
                    502: +        qemu_mutex_unlock_iothread();
                    503: +    }
                    504: +}
                    505: +
                    506: +/* -------------------------------------------------------------------------- */
                    507: +
                    508: +static void
                    509: +nvmm_io_callback(struct nvmm_io *io)
                    510: +{
                    511: +    MemTxAttrs attrs = { 0 };
                    512: +    int ret;
                    513: +
                    514: +    ret = address_space_rw(&address_space_io, io->port, attrs, io->data,
                    515: +        io->size, !io->in);
                    516: +    if (ret != MEMTX_OK) {
                    517: +        error_report("NVMM: I/O Transaction Failed "
                    518: +            "[%s, port=%u, size=%zu]", (io->in ? "in" : "out"),
                    519: +            io->port, io->size);
                    520: +    }
                    521: +
                    522: +    /* Needed, otherwise infinite loop. */
                    523: +    current_cpu->vcpu_dirty = false;
                    524: +}
                    525: +
                    526: +static void
                    527: +nvmm_mem_callback(struct nvmm_mem *mem)
                    528: +{
                    529: +    cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
                    530: +
                    531: +    /* Needed, otherwise infinite loop. */
                    532: +    current_cpu->vcpu_dirty = false;
                    533: +}
                    534: +
                    535: +static struct nvmm_assist_callbacks nvmm_callbacks = {
                    536: +    .io = nvmm_io_callback,
                    537: +    .mem = nvmm_mem_callback
                    538: +};
                    539: +
                    540: +/* -------------------------------------------------------------------------- */
                    541: +
                    542: +static int
                    543: +nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
                    544: +{
                    545: +    int ret;
                    546: +
                    547: +    ret = nvmm_assist_mem(mach, vcpu);
                    548: +    if (ret == -1) {
                    549: +        error_report("NVMM: Mem Assist Failed [gpa=%p]",
                    550: +            (void *)vcpu->exit->u.mem.gpa);
                    551: +    }
                    552: +
                    553: +    return ret;
                    554: +}
                    555: +
                    556: +static int
                    557: +nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
                    558: +{
                    559: +    int ret;
                    560: +
                    561: +    ret = nvmm_assist_io(mach, vcpu);
                    562: +    if (ret == -1) {
                    563: +        error_report("NVMM: I/O Assist Failed [port=%d]",
                    564: +            (int)vcpu->exit->u.io.port);
                    565: +    }
                    566: +
                    567: +    return ret;
                    568: +}
                    569: +
                    570: +static int
                    571: +nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu,
                    572: +    struct nvmm_vcpu_exit *exit)
                    573: +{
                    574: +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
                    575: +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
                    576: +    X86CPU *x86_cpu = X86_CPU(cpu);
                    577: +    struct nvmm_x64_state *state = vcpu->state;
                    578: +    uint64_t val;
                    579: +    int ret;
                    580: +
                    581: +    switch (exit->u.rdmsr.msr) {
                    582: +    case MSR_IA32_APICBASE:
                    583: +        val = cpu_get_apic_base(x86_cpu->apic_state);
                    584: +        break;
                    585: +    case MSR_MTRRcap:
                    586: +    case MSR_MTRRdefType:
                    587: +    case MSR_MCG_CAP:
                    588: +    case MSR_MCG_STATUS:
                    589: +        val = 0;
                    590: +        break;
                    591: +    default: /* More MSRs to add? */
                    592: +        val = 0;
                    593: +        error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
                    594: +            exit->u.rdmsr.msr);
                    595: +        break;
                    596: +    }
                    597: +
                    598: +    ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
                    599: +    if (ret == -1) {
                    600: +        return -1;
                    601: +    }
                    602: +
                    603: +    state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF);
                    604: +    state->gprs[NVMM_X64_GPR_RDX] = (val >> 32);
                    605: +    state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc;
                    606: +
                    607: +    ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
                    608: +    if (ret == -1) {
                    609: +        return -1;
                    610: +    }
                    611: +
                    612: +    return 0;
                    613: +}
                    614: +
                    615: +static int
                    616: +nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu,
                    617: +    struct nvmm_vcpu_exit *exit)
                    618: +{
                    619: +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
                    620: +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
                    621: +    X86CPU *x86_cpu = X86_CPU(cpu);
                    622: +    struct nvmm_x64_state *state = vcpu->state;
                    623: +    uint64_t val;
                    624: +    int ret;
                    625: +
                    626: +    val = exit->u.wrmsr.val;
                    627: +
                    628: +    switch (exit->u.wrmsr.msr) {
                    629: +    case MSR_IA32_APICBASE:
                    630: +        cpu_set_apic_base(x86_cpu->apic_state, val);
                    631: +        break;
                    632: +    case MSR_MTRRdefType:
                    633: +    case MSR_MCG_STATUS:
                    634: +        break;
                    635: +    default: /* More MSRs to add? */
                    636: +        error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
                    637: +            exit->u.wrmsr.msr, val);
                    638: +        break;
                    639: +    }
                    640: +
                    641: +    ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
                    642: +    if (ret == -1) {
                    643: +        return -1;
                    644: +    }
                    645: +
                    646: +    state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc;
                    647: +
                    648: +    ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
                    649: +    if (ret == -1) {
                    650: +        return -1;
                    651: +    }
                    652: +
                    653: +    return 0;
                    654: +}
                    655: +
                    656: +static int
                    657: +nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
                    658: +    struct nvmm_vcpu_exit *exit)
                    659: +{
                    660: +    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
                    661: +    int ret = 0;
                    662: +
                    663: +    qemu_mutex_lock_iothread();
                    664: +
                    665: +    if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
                    666: +          (env->eflags & IF_MASK)) &&
                    667: +        !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
                    668: +        cpu->exception_index = EXCP_HLT;
                    669: +        cpu->halted = true;
                    670: +        ret = 1;
                    671: +    }
                    672: +
                    673: +    qemu_mutex_unlock_iothread();
                    674: +
                    675: +    return ret;
                    676: +}
                    677: +
                    678: +static int
                    679: +nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
                    680: +{
                    681: +    struct nvmm_vcpu_event *event = vcpu->event;
                    682: +
                    683: +    event->type = NVMM_VCPU_EVENT_EXCP;
                    684: +    event->vector = 6;
                    685: +    event->u.excp.error = 0;
                    686: +
                    687: +    return nvmm_vcpu_inject(mach, vcpu);
                    688: +}
                    689: +
                    690: +static int
                    691: +nvmm_vcpu_loop(CPUState *cpu)
                    692: +{
                    693: +    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
                    694: +    struct nvmm_machine *mach = get_nvmm_mach();
                    695: +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
                    696: +    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
                    697: +    X86CPU *x86_cpu = X86_CPU(cpu);
                    698: +    struct nvmm_vcpu_exit *exit = vcpu->exit;
                    699: +    int ret;
                    700: +
                    701: +    /*
                    702: +     * Some asynchronous events must be handled outside of the inner
                    703: +     * VCPU loop. They are handled here.
                    704: +     */
                    705: +    if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
                    706: +        nvmm_cpu_synchronize_state(cpu);
                    707: +        do_cpu_init(x86_cpu);
                    708: +        /* set int/nmi windows back to the reset state */
                    709: +    }
                    710: +    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
                    711: +        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
                    712: +        apic_poll_irq(x86_cpu->apic_state);
                    713: +    }
                    714: +    if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
                    715: +         (env->eflags & IF_MASK)) ||
                    716: +        (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
                    717: +        cpu->halted = false;
                    718: +    }
                    719: +    if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
                    720: +        nvmm_cpu_synchronize_state(cpu);
                    721: +        do_cpu_sipi(x86_cpu);
                    722: +    }
                    723: +    if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
                    724: +        cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
                    725: +        nvmm_cpu_synchronize_state(cpu);
                    726: +        apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
                    727: +            env->tpr_access_type);
                    728: +    }
                    729: +
                    730: +    if (cpu->halted) {
                    731: +        cpu->exception_index = EXCP_HLT;
                    732: +        qatomic_set(&cpu->exit_request, false);
                    733: +        return 0;
                    734: +    }
                    735: +
                    736: +    qemu_mutex_unlock_iothread();
                    737: +    cpu_exec_start(cpu);
                    738: +
                    739: +    /*
                    740: +     * Inner VCPU loop.
                    741: +     */
                    742: +    do {
                    743: +        if (cpu->vcpu_dirty) {
                    744: +            nvmm_set_registers(cpu);
                    745: +            cpu->vcpu_dirty = false;
                    746: +        }
                    747: +
                    748: +        if (qcpu->stop) {
                    749: +            cpu->exception_index = EXCP_INTERRUPT;
                    750: +            qcpu->stop = false;
                    751: +            ret = 1;
                    752: +            break;
                    753: +        }
                    754: +
                    755: +        nvmm_vcpu_pre_run(cpu);
                    756: +
                    757: +        if (qatomic_read(&cpu->exit_request)) {
1.2     ! nia       758: +#if NVMM_USER_VERSION >= 2
1.1       ryoon     759: +            nvmm_vcpu_stop(vcpu);
1.2     ! nia       760: +#else
        !           761: +            qemu_cpu_kick_self();
        !           762: +#endif
1.1       ryoon     763: +        }
                    764: +
                    765: +        /* Read exit_request before the kernel reads the immediate exit flag */
                    766: +        smp_rmb();
                    767: +        ret = nvmm_vcpu_run(mach, vcpu);
                    768: +        if (ret == -1) {
                    769: +            error_report("NVMM: Failed to exec a virtual processor,"
                    770: +                " error=%d", errno);
                    771: +            break;
                    772: +        }
                    773: +
                    774: +        nvmm_vcpu_post_run(cpu, exit);
                    775: +
                    776: +        switch (exit->reason) {
                    777: +        case NVMM_VCPU_EXIT_NONE:
                    778: +            break;
1.2     ! nia       779: +#if NVMM_USER_VERSION >= 2
1.1       ryoon     780: +        case NVMM_VCPU_EXIT_STOPPED:
                    781: +            /*
                    782: +             * The kernel cleared the immediate exit flag; cpu->exit_request
                    783: +             * must be cleared after
                    784: +             */
                    785: +            smp_wmb();
                    786: +            qcpu->stop = true;
                    787: +            break;
1.2     ! nia       788: +#endif
1.1       ryoon     789: +        case NVMM_VCPU_EXIT_MEMORY:
                    790: +            ret = nvmm_handle_mem(mach, vcpu);
                    791: +            break;
                    792: +        case NVMM_VCPU_EXIT_IO:
                    793: +            ret = nvmm_handle_io(mach, vcpu);
                    794: +            break;
                    795: +        case NVMM_VCPU_EXIT_INT_READY:
                    796: +        case NVMM_VCPU_EXIT_NMI_READY:
                    797: +        case NVMM_VCPU_EXIT_TPR_CHANGED:
                    798: +            break;
                    799: +        case NVMM_VCPU_EXIT_HALTED:
                    800: +            ret = nvmm_handle_halted(mach, cpu, exit);
                    801: +            break;
                    802: +        case NVMM_VCPU_EXIT_SHUTDOWN:
                    803: +            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
                    804: +            cpu->exception_index = EXCP_INTERRUPT;
                    805: +            ret = 1;
                    806: +            break;
                    807: +        case NVMM_VCPU_EXIT_RDMSR:
                    808: +            ret = nvmm_handle_rdmsr(mach, cpu, exit);
                    809: +            break;
                    810: +        case NVMM_VCPU_EXIT_WRMSR:
                    811: +            ret = nvmm_handle_wrmsr(mach, cpu, exit);
                    812: +            break;
                    813: +        case NVMM_VCPU_EXIT_MONITOR:
                    814: +        case NVMM_VCPU_EXIT_MWAIT:
                    815: +            ret = nvmm_inject_ud(mach, vcpu);
                    816: +            break;
                    817: +        default:
                    818: +            error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
                    819: +                exit->reason, exit->u.inv.hwcode);
                    820: +            nvmm_get_registers(cpu);
                    821: +            qemu_mutex_lock_iothread();
                    822: +            qemu_system_guest_panicked(cpu_get_crash_info(cpu));
                    823: +            qemu_mutex_unlock_iothread();
                    824: +            ret = -1;
                    825: +            break;
                    826: +        }
                    827: +    } while (ret == 0);
                    828: +
                    829: +    cpu_exec_end(cpu);
                    830: +    qemu_mutex_lock_iothread();
                    831: +
                    832: +    qatomic_set(&cpu->exit_request, false);
                    833: +
                    834: +    return ret < 0;
                    835: +}
                    836: +
                    837: +/* -------------------------------------------------------------------------- */
                    838: +
                    839: +static void
                    840: +do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
                    841: +{
                    842: +    nvmm_get_registers(cpu);
                    843: +    cpu->vcpu_dirty = true;
                    844: +}
                    845: +
                    846: +static void
                    847: +do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
                    848: +{
                    849: +    nvmm_set_registers(cpu);
                    850: +    cpu->vcpu_dirty = false;
                    851: +}
                    852: +
                    853: +static void
                    854: +do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
                    855: +{
                    856: +    nvmm_set_registers(cpu);
                    857: +    cpu->vcpu_dirty = false;
                    858: +}
                    859: +
                    860: +static void
                    861: +do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
                    862: +{
                    863: +    cpu->vcpu_dirty = true;
                    864: +}
                    865: +
                    866: +void nvmm_cpu_synchronize_state(CPUState *cpu)
                    867: +{
                    868: +    if (!cpu->vcpu_dirty) {
                    869: +        run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
                    870: +    }
                    871: +}
                    872: +
                    873: +void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
                    874: +{
                    875: +    run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
                    876: +}
                    877: +
                    878: +void nvmm_cpu_synchronize_post_init(CPUState *cpu)
                    879: +{
                    880: +    run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
                    881: +}
                    882: +
                    883: +void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
                    884: +{
                    885: +    run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
                    886: +}
                    887: +
                    888: +/* -------------------------------------------------------------------------- */
                    889: +
                    890: +static Error *nvmm_migration_blocker;
                    891: +
                    892: +/*
                    893: + * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
                    894: + * and another thread signaling the vCPU thread to exit.
                    895: + */
                    896: +
                    897: +static void
                    898: +nvmm_ipi_signal(int sigcpu)
                    899: +{
                    900: +    if (current_cpu) {
                    901: +        struct qemu_vcpu *qcpu = get_qemu_vcpu(current_cpu);
1.2     ! nia       902: +#if NVMM_USER_VERSION >= 2
1.1       ryoon     903: +        struct nvmm_vcpu *vcpu = &qcpu->vcpu;
                    904: +        nvmm_vcpu_stop(vcpu);
1.2     ! nia       905: +#else
        !           906: +        qcpu->stop = true;
        !           907: +#endif
1.1       ryoon     908: +    }
                    909: +}
                    910: +
                    911: +static void
                    912: +nvmm_init_cpu_signals(void)
                    913: +{
                    914: +    struct sigaction sigact;
                    915: +    sigset_t set;
                    916: +
                    917: +    /* Install the IPI handler. */
                    918: +    memset(&sigact, 0, sizeof(sigact));
                    919: +    sigact.sa_handler = nvmm_ipi_signal;
                    920: +    sigaction(SIG_IPI, &sigact, NULL);
                    921: +
                    922: +    /* Allow IPIs on the current thread. */
                    923: +    sigprocmask(SIG_BLOCK, NULL, &set);
                    924: +    sigdelset(&set, SIG_IPI);
                    925: +    pthread_sigmask(SIG_SETMASK, &set, NULL);
                    926: +}
                    927: +
                    928: +int
                    929: +nvmm_init_vcpu(CPUState *cpu)
                    930: +{
                    931: +    struct nvmm_machine *mach = get_nvmm_mach();
                    932: +    struct nvmm_vcpu_conf_cpuid cpuid;
                    933: +    struct nvmm_vcpu_conf_tpr tpr;
                    934: +    Error *local_error = NULL;
                    935: +    struct qemu_vcpu *qcpu;
                    936: +    int ret, err;
                    937: +
                    938: +    nvmm_init_cpu_signals();
                    939: +
                    940: +    if (nvmm_migration_blocker == NULL) {
                    941: +        error_setg(&nvmm_migration_blocker,
                    942: +            "NVMM: Migration not supported");
                    943: +
                    944: +        (void)migrate_add_blocker(nvmm_migration_blocker, &local_error);
                    945: +        if (local_error) {
                    946: +            error_report_err(local_error);
                    947: +            migrate_del_blocker(nvmm_migration_blocker);
                    948: +            error_free(nvmm_migration_blocker);
                    949: +            return -EINVAL;
                    950: +        }
                    951: +    }
                    952: +
                    953: +    qcpu = g_malloc0(sizeof(*qcpu));
                    954: +    if (qcpu == NULL) {
                    955: +        error_report("NVMM: Failed to allocate VCPU context.");
                    956: +        return -ENOMEM;
                    957: +    }
                    958: +
                    959: +    ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu);
                    960: +    if (ret == -1) {
                    961: +        err = errno;
                    962: +        error_report("NVMM: Failed to create a virtual processor,"
                    963: +            " error=%d", err);
                    964: +        g_free(qcpu);
                    965: +        return -err;
                    966: +    }
                    967: +
                    968: +    memset(&cpuid, 0, sizeof(cpuid));
                    969: +    cpuid.mask = 1;
                    970: +    cpuid.leaf = 0x00000001;
                    971: +    cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR;
                    972: +    ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID,
                    973: +        &cpuid);
                    974: +    if (ret == -1) {
                    975: +        err = errno;
                    976: +        error_report("NVMM: Failed to configure a virtual processor,"
                    977: +            " error=%d", err);
                    978: +        g_free(qcpu);
                    979: +        return -err;
                    980: +    }
                    981: +
                    982: +    ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS,
                    983: +        &nvmm_callbacks);
                    984: +    if (ret == -1) {
                    985: +        err = errno;
                    986: +        error_report("NVMM: Failed to configure a virtual processor,"
                    987: +            " error=%d", err);
                    988: +        g_free(qcpu);
                    989: +        return -err;
                    990: +    }
                    991: +
                    992: +    if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) {
                    993: +        memset(&tpr, 0, sizeof(tpr));
                    994: +        tpr.exit_changed = 1;
                    995: +        ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr);
                    996: +        if (ret == -1) {
                    997: +            err = errno;
                    998: +            error_report("NVMM: Failed to configure a virtual processor,"
                    999: +                " error=%d", err);
                   1000: +            g_free(qcpu);
                   1001: +            return -err;
                   1002: +        }
                   1003: +    }
                   1004: +
                   1005: +    cpu->vcpu_dirty = true;
                   1006: +    cpu->hax_vcpu = (struct hax_vcpu_state *)qcpu;
                   1007: +
                   1008: +    return 0;
                   1009: +}
                   1010: +
                   1011: +int
                   1012: +nvmm_vcpu_exec(CPUState *cpu)
                   1013: +{
                   1014: +    int ret, fatal;
                   1015: +
                   1016: +    while (1) {
                   1017: +        if (cpu->exception_index >= EXCP_INTERRUPT) {
                   1018: +            ret = cpu->exception_index;
                   1019: +            cpu->exception_index = -1;
                   1020: +            break;
                   1021: +        }
                   1022: +
                   1023: +        fatal = nvmm_vcpu_loop(cpu);
                   1024: +
                   1025: +        if (fatal) {
                   1026: +            error_report("NVMM: Failed to execute a VCPU.");
                   1027: +            abort();
                   1028: +        }
                   1029: +    }
                   1030: +
                   1031: +    return ret;
                   1032: +}
                   1033: +
                   1034: +void
                   1035: +nvmm_destroy_vcpu(CPUState *cpu)
                   1036: +{
                   1037: +    struct nvmm_machine *mach = get_nvmm_mach();
                   1038: +    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
                   1039: +
                   1040: +    nvmm_vcpu_destroy(mach, &qcpu->vcpu);
                   1041: +    g_free(cpu->hax_vcpu);
                   1042: +}
                   1043: +
                   1044: +/* -------------------------------------------------------------------------- */
                   1045: +
                   1046: +static void
                   1047: +nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva,
                   1048: +    bool add, bool rom, const char *name)
                   1049: +{
                   1050: +    struct nvmm_machine *mach = get_nvmm_mach();
                   1051: +    int ret, prot;
                   1052: +
                   1053: +    if (add) {
                   1054: +        prot = PROT_READ | PROT_EXEC;
                   1055: +        if (!rom) {
                   1056: +            prot |= PROT_WRITE;
                   1057: +        }
                   1058: +        ret = nvmm_gpa_map(mach, hva, start_pa, size, prot);
                   1059: +    } else {
                   1060: +        ret = nvmm_gpa_unmap(mach, hva, start_pa, size);
                   1061: +    }
                   1062: +
                   1063: +    if (ret == -1) {
                   1064: +        error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
                   1065: +            "Size:%p bytes, HostVA:%p, error=%d",
                   1066: +            (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa,
                   1067: +            (void *)size, (void *)hva, errno);
                   1068: +    }
                   1069: +}
                   1070: +
                   1071: +static void
                   1072: +nvmm_process_section(MemoryRegionSection *section, int add)
                   1073: +{
                   1074: +    MemoryRegion *mr = section->mr;
                   1075: +    hwaddr start_pa = section->offset_within_address_space;
                   1076: +    ram_addr_t size = int128_get64(section->size);
                   1077: +    unsigned int delta;
                   1078: +    uintptr_t hva;
                   1079: +
                   1080: +    if (!memory_region_is_ram(mr)) {
                   1081: +        return;
                   1082: +    }
                   1083: +
                   1084: +    /* Adjust start_pa and size so that they are page-aligned. */
                   1085: +    delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
                   1086: +    delta &= ~qemu_real_host_page_mask;
                   1087: +    if (delta > size) {
                   1088: +        return;
                   1089: +    }
                   1090: +    start_pa += delta;
                   1091: +    size -= delta;
                   1092: +    size &= qemu_real_host_page_mask;
                   1093: +    if (!size || (start_pa & ~qemu_real_host_page_mask)) {
                   1094: +        return;
                   1095: +    }
                   1096: +
                   1097: +    hva = (uintptr_t)memory_region_get_ram_ptr(mr) +
                   1098: +        section->offset_within_region + delta;
                   1099: +
                   1100: +    nvmm_update_mapping(start_pa, size, hva, add,
                   1101: +        memory_region_is_rom(mr), mr->name);
                   1102: +}
                   1103: +
                   1104: +static void
                   1105: +nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section)
                   1106: +{
                   1107: +    memory_region_ref(section->mr);
                   1108: +    nvmm_process_section(section, 1);
                   1109: +}
                   1110: +
                   1111: +static void
                   1112: +nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section)
                   1113: +{
                   1114: +    nvmm_process_section(section, 0);
                   1115: +    memory_region_unref(section->mr);
                   1116: +}
                   1117: +
                   1118: +static void
                   1119: +nvmm_transaction_begin(MemoryListener *listener)
                   1120: +{
                   1121: +    /* nothing */
                   1122: +}
                   1123: +
                   1124: +static void
                   1125: +nvmm_transaction_commit(MemoryListener *listener)
                   1126: +{
                   1127: +    /* nothing */
                   1128: +}
                   1129: +
                   1130: +static void
                   1131: +nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section)
                   1132: +{
                   1133: +    MemoryRegion *mr = section->mr;
                   1134: +
                   1135: +    if (!memory_region_is_ram(mr)) {
                   1136: +        return;
                   1137: +    }
                   1138: +
                   1139: +    memory_region_set_dirty(mr, 0, int128_get64(section->size));
                   1140: +}
                   1141: +
                   1142: +static MemoryListener nvmm_memory_listener = {
                   1143: +    .begin = nvmm_transaction_begin,
                   1144: +    .commit = nvmm_transaction_commit,
                   1145: +    .region_add = nvmm_region_add,
                   1146: +    .region_del = nvmm_region_del,
                   1147: +    .log_sync = nvmm_log_sync,
                   1148: +    .priority = 10,
                   1149: +};
                   1150: +
                   1151: +static void
                   1152: +nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size)
                   1153: +{
                   1154: +    struct nvmm_machine *mach = get_nvmm_mach();
                   1155: +    uintptr_t hva = (uintptr_t)host;
                   1156: +    int ret;
                   1157: +
                   1158: +    ret = nvmm_hva_map(mach, hva, size);
                   1159: +
                   1160: +    if (ret == -1) {
                   1161: +        error_report("NVMM: Failed to map HVA, HostVA:%p "
                   1162: +            "Size:%p bytes, error=%d",
                   1163: +            (void *)hva, (void *)size, errno);
                   1164: +    }
                   1165: +}
                   1166: +
                   1167: +static struct RAMBlockNotifier nvmm_ram_notifier = {
                   1168: +    .ram_block_added = nvmm_ram_block_added
                   1169: +};
                   1170: +
                   1171: +/* -------------------------------------------------------------------------- */
                   1172: +
                   1173: +static int
                   1174: +nvmm_accel_init(MachineState *ms)
                   1175: +{
                   1176: +    int ret, err;
                   1177: +
                   1178: +    ret = nvmm_init();
                   1179: +    if (ret == -1) {
                   1180: +        err = errno;
                   1181: +        error_report("NVMM: Initialization failed, error=%d", errno);
                   1182: +        return -err;
                   1183: +    }
                   1184: +
                   1185: +    ret = nvmm_capability(&qemu_mach.cap);
                   1186: +    if (ret == -1) {
                   1187: +        err = errno;
                   1188: +        error_report("NVMM: Unable to fetch capability, error=%d", errno);
                   1189: +        return -err;
                   1190: +    }
                   1191: +    if (qemu_mach.cap.version < NVMM_KERN_VERSION) {
                   1192: +        error_report("NVMM: Unsupported version %u", qemu_mach.cap.version);
                   1193: +        return -EPROGMISMATCH;
                   1194: +    }
                   1195: +    if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) {
                   1196: +        error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size);
                   1197: +        return -EPROGMISMATCH;
                   1198: +    }
                   1199: +
                   1200: +    ret = nvmm_machine_create(&qemu_mach.mach);
                   1201: +    if (ret == -1) {
                   1202: +        err = errno;
                   1203: +        error_report("NVMM: Machine creation failed, error=%d", errno);
                   1204: +        return -err;
                   1205: +    }
                   1206: +
                   1207: +    memory_listener_register(&nvmm_memory_listener, &address_space_memory);
                   1208: +    ram_block_notifier_add(&nvmm_ram_notifier);
                   1209: +
                   1210: +    printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
                   1211: +    return 0;
                   1212: +}
                   1213: +
                   1214: +int
                   1215: +nvmm_enabled(void)
                   1216: +{
                   1217: +    return nvmm_allowed;
                   1218: +}
                   1219: +
                   1220: +static void
                   1221: +nvmm_accel_class_init(ObjectClass *oc, void *data)
                   1222: +{
                   1223: +    AccelClass *ac = ACCEL_CLASS(oc);
                   1224: +    ac->name = "NVMM";
                   1225: +    ac->init_machine = nvmm_accel_init;
                   1226: +    ac->allowed = &nvmm_allowed;
                   1227: +}
                   1228: +
                   1229: +static const TypeInfo nvmm_accel_type = {
                   1230: +    .name = ACCEL_CLASS_NAME("nvmm"),
                   1231: +    .parent = TYPE_ACCEL,
                   1232: +    .class_init = nvmm_accel_class_init,
                   1233: +};
                   1234: +
                   1235: +static void
                   1236: +nvmm_type_init(void)
                   1237: +{
                   1238: +    type_register_static(&nvmm_accel_type);
                   1239: +}
                   1240: +
                   1241: +type_init(nvmm_type_init);

CVSweb <webmaster@jp.NetBSD.org>