Annotation of src/sys/arch/xen/x86/hypervisor_machdep.c, Revision 1.34
1.34 ! cherry 1: /* $NetBSD: hypervisor_machdep.c,v 1.33 2018/11/19 10:05:09 kre Exp $ */
1.2 bouyer 2:
3: /*
4: *
5: * Copyright (c) 2004 Christian Limpach.
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27: */
28:
29: /******************************************************************************
30: * hypervisor.c
31: *
32: * Communication to/from hypervisor.
33: *
34: * Copyright (c) 2002-2004, K A Fraser
35: *
36: * Permission is hereby granted, free of charge, to any person obtaining a copy
37: * of this software and associated documentation files (the "Software"), to
38: * deal in the Software without restriction, including without limitation the
39: * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
40: * sell copies of the Software, and to permit persons to whom the Software is
41: * furnished to do so, subject to the following conditions:
42: *
43: * The above copyright notice and this permission notice shall be included in
44: * all copies or substantial portions of the Software.
45: *
46: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
47: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
48: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
49: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
50: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
51: * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
52: * DEALINGS IN THE SOFTWARE.
53: */
54:
55:
56: #include <sys/cdefs.h>
1.34 ! cherry 57: __KERNEL_RCSID(0, "$NetBSD: hypervisor_machdep.c,v 1.33 2018/11/19 10:05:09 kre Exp $");
1.2 bouyer 58:
59: #include <sys/param.h>
60: #include <sys/systm.h>
1.10 bouyer 61: #include <sys/kmem.h>
62:
63: #include <uvm/uvm_extern.h>
64:
65: #include <machine/vmparam.h>
66: #include <machine/pmap.h>
1.2 bouyer 67:
68: #include <xen/xen.h>
69: #include <xen/hypervisor.h>
70: #include <xen/evtchn.h>
1.10 bouyer 71: #include <xen/xenpmap.h>
1.2 bouyer 72:
73: #include "opt_xen.h"
1.29 cherry 74: #include "isa.h"
75: #include "pci.h"
1.2 bouyer 76:
1.10 bouyer 77: /*
78: * arch-dependent p2m frame lists list (L3 and L2)
79: * used by Xen for save/restore mappings
80: */
81: static unsigned long * l3_p2m_page;
82: static unsigned long * l2_p2m_page;
83: static int l2_p2m_page_size; /* size of L2 page, in pages */
84:
85: static void build_p2m_frame_list_list(void);
86: static void update_p2m_frame_list_list(void);
87:
1.2 bouyer 88: // #define PORT_DEBUG 4
89: // #define EARLY_DEBUG_EVENT
90:
1.15 cherry 91: /* callback function type */
1.27 bouyer 92: typedef void (*iterate_func_t)(unsigned int, unsigned int,
93: unsigned int, void *);
1.15 cherry 94:
95: static inline void
1.27 bouyer 96: evt_iterate_bits(volatile unsigned long *pendingl1,
1.15 cherry 97: volatile unsigned long *pendingl2,
98: volatile unsigned long *mask,
99: iterate_func_t iterate_pending, void *iterate_args)
100: {
101:
102: KASSERT(pendingl1 != NULL);
103: KASSERT(pendingl2 != NULL);
104:
105: unsigned long l1, l2;
106: unsigned int l1i, l2i, port;
107:
108: l1 = xen_atomic_xchg(pendingl1, 0);
109: while ((l1i = xen_ffs(l1)) != 0) {
110: l1i--;
111: l1 &= ~(1UL << l1i);
112:
113: l2 = pendingl2[l1i] & (mask != NULL ? ~mask[l1i] : -1UL);
1.27 bouyer 114: l2 &= curcpu()->ci_evtmask[l1i];
1.15 cherry 115:
116: if (mask != NULL) xen_atomic_setbits_l(&mask[l1i], l2);
117: xen_atomic_clearbits_l(&pendingl2[l1i], l2);
118:
119: while ((l2i = xen_ffs(l2)) != 0) {
120: l2i--;
121: l2 &= ~(1UL << l2i);
122:
123: port = (l1i << LONG_SHIFT) + l2i;
124:
1.27 bouyer 125: iterate_pending(port, l1i, l2i, iterate_args);
1.15 cherry 126: }
127: }
128: }
129:
130: /*
131: * Set per-cpu "pending" information for outstanding events that
132: * cannot be processed now.
133: */
134:
135: static inline void
1.27 bouyer 136: evt_set_pending(unsigned int port, unsigned int l1i,
1.15 cherry 137: unsigned int l2i, void *args)
138: {
139:
140: KASSERT(args != NULL);
141:
142: int *ret = args;
143:
144: if (evtsource[port]) {
1.27 bouyer 145: hypervisor_set_ipending(evtsource[port]->ev_imask, l1i, l2i);
1.15 cherry 146: evtsource[port]->ev_evcnt.ev_count++;
1.27 bouyer 147: if (*ret == 0 && curcpu()->ci_ilevel <
1.15 cherry 148: evtsource[port]->ev_maxlevel)
149: *ret = 1;
150: }
151: #ifdef DOM0OPS
152: else {
153: /* set pending event */
154: xenevt_setipending(l1i, l2i);
155: }
156: #endif
157: }
158:
1.2 bouyer 159: int stipending(void);
160: int
1.7 cegger 161: stipending(void)
1.2 bouyer 162: {
163: volatile shared_info_t *s = HYPERVISOR_shared_info;
164: struct cpu_info *ci;
1.8 cegger 165: volatile struct vcpu_info *vci;
1.2 bouyer 166: int ret;
167:
168: ret = 0;
169: ci = curcpu();
1.8 cegger 170: vci = ci->ci_vcpu;
1.2 bouyer 171:
172: #if 0
173: if (HYPERVISOR_shared_info->events)
174: printf("stipending events %08lx mask %08lx ilevel %d\n",
175: HYPERVISOR_shared_info->events,
176: HYPERVISOR_shared_info->events_mask, ci->ci_ilevel);
177: #endif
178:
179: #ifdef EARLY_DEBUG_EVENT
180: if (xen_atomic_test_bit(&s->evtchn_pending[0], debug_port)) {
181: xen_debug_handler(NULL);
182: xen_atomic_clear_bit(&s->evtchn_pending[0], debug_port);
183: }
184: #endif
185:
186: /*
187: * we're only called after STIC, so we know that we'll have to
188: * STI at the end
189: */
1.15 cherry 190:
1.8 cegger 191: while (vci->evtchn_upcall_pending) {
1.25 bouyer 192: cli();
1.15 cherry 193:
1.8 cegger 194: vci->evtchn_upcall_pending = 0;
1.15 cherry 195:
1.27 bouyer 196: evt_iterate_bits(&vci->evtchn_pending_sel,
1.15 cherry 197: s->evtchn_pending, s->evtchn_mask,
198: evt_set_pending, &ret);
199:
1.25 bouyer 200: sti();
1.2 bouyer 201: }
202:
203: #if 0
1.34 ! cherry 204: if (ci->ci_xpending & 0x1)
1.2 bouyer 205: printf("stipending events %08lx mask %08lx ilevel %d ipending %08x\n",
206: HYPERVISOR_shared_info->events,
207: HYPERVISOR_shared_info->events_mask, ci->ci_ilevel,
1.34 ! cherry 208: ci->ci_xpending);
1.2 bouyer 209: #endif
210:
211: return (ret);
212: }
213:
1.15 cherry 214: /* Iterate through pending events and call the event handler */
215:
216: static inline void
1.27 bouyer 217: evt_do_hypervisor_callback(unsigned int port, unsigned int l1i,
218: unsigned int l2i, void *args)
1.15 cherry 219: {
220: KASSERT(args != NULL);
221:
1.27 bouyer 222: struct cpu_info *ci = curcpu();
1.15 cherry 223: struct intrframe *regs = args;
224:
225: #ifdef PORT_DEBUG
226: if (port == PORT_DEBUG)
227: printf("do_hypervisor_callback event %d\n", port);
228: #endif
1.22 cherry 229: if (evtsource[port]) {
230: ci->ci_idepth++;
231: evtchn_do_event(port, regs);
232: ci->ci_idepth--;
233: }
1.15 cherry 234: #ifdef DOM0OPS
235: else {
236: if (ci->ci_ilevel < IPL_HIGH) {
237: /* fast path */
238: int oipl = ci->ci_ilevel;
239: ci->ci_ilevel = IPL_HIGH;
1.22 cherry 240: ci->ci_idepth++;
241: xenevt_event(port);
242: ci->ci_idepth--;
1.15 cherry 243: ci->ci_ilevel = oipl;
244: } else {
245: /* set pending event */
246: xenevt_setipending(l1i, l2i);
247: }
248: }
249: #endif
250: }
251:
1.2 bouyer 252: void
253: do_hypervisor_callback(struct intrframe *regs)
254: {
255: volatile shared_info_t *s = HYPERVISOR_shared_info;
256: struct cpu_info *ci;
1.8 cegger 257: volatile struct vcpu_info *vci;
1.28 bouyer 258: int level __diagused;
1.2 bouyer 259:
260: ci = curcpu();
1.8 cegger 261: vci = ci->ci_vcpu;
1.2 bouyer 262: level = ci->ci_ilevel;
263:
1.31 cherry 264: /* Save trapframe for clock handler */
265: KASSERT(regs != NULL);
1.33 kre 266: ci->ci_xen_clockf_usermode = USERMODE(regs->_INTRFRAME_CS);
267: ci->ci_xen_clockf_pc = regs->_INTRFRAME_IP;
1.31 cherry 268:
1.2 bouyer 269: // DDD printf("do_hypervisor_callback\n");
270:
271: #ifdef EARLY_DEBUG_EVENT
272: if (xen_atomic_test_bit(&s->evtchn_pending[0], debug_port)) {
273: xen_debug_handler(NULL);
274: xen_atomic_clear_bit(&s->evtchn_pending[0], debug_port);
275: }
276: #endif
277:
1.8 cegger 278: while (vci->evtchn_upcall_pending) {
279: vci->evtchn_upcall_pending = 0;
1.2 bouyer 280:
1.27 bouyer 281: evt_iterate_bits(&vci->evtchn_pending_sel,
1.15 cherry 282: s->evtchn_pending, s->evtchn_mask,
283: evt_do_hypervisor_callback, regs);
1.2 bouyer 284: }
285:
286: #ifdef DIAGNOSTIC
287: if (level != ci->ci_ilevel)
288: printf("hypervisor done %08x level %d/%d ipending %08x\n",
1.8 cegger 289: (uint)vci->evtchn_pending_sel,
1.34 ! cherry 290: level, ci->ci_ilevel, ci->ci_xpending);
1.2 bouyer 291: #endif
292: }
293:
294: void
1.17 cherry 295: hypervisor_send_event(struct cpu_info *ci, unsigned int ev)
296: {
297: KASSERT(ci != NULL);
298:
299: volatile shared_info_t *s = HYPERVISOR_shared_info;
300: volatile struct vcpu_info *vci = ci->ci_vcpu;
301:
302: #ifdef PORT_DEBUG
303: if (ev == PORT_DEBUG)
304: printf("hypervisor_send_event %d\n", ev);
305: #endif
306:
307: xen_atomic_set_bit(&s->evtchn_pending[0], ev);
308:
1.21 cherry 309: if (__predict_false(ci == curcpu())) {
1.20 cherry 310: xen_atomic_set_bit(&vci->evtchn_pending_sel,
311: ev >> LONG_SHIFT);
312: xen_atomic_set_bit(&vci->evtchn_upcall_pending, 0);
313: }
1.17 cherry 314:
315: xen_atomic_clear_bit(&s->evtchn_mask[0], ev);
316:
317: if (__predict_true(ci == curcpu())) {
318: hypervisor_force_callback();
319: } else {
1.18 bouyer 320: if (__predict_false(xen_send_ipi(ci, XEN_IPI_HVCB))) {
321: panic("xen_send_ipi(cpu%d, XEN_IPI_HVCB) failed\n",
322: (int) ci->ci_cpuid);
1.17 cherry 323: }
324: }
325: }
326:
327: void
1.2 bouyer 328: hypervisor_unmask_event(unsigned int ev)
329: {
1.30 cherry 330:
331: KASSERT(ev > 0 && ev < NR_EVENT_CHANNELS);
1.8 cegger 332:
1.2 bouyer 333: #ifdef PORT_DEBUG
334: if (ev == PORT_DEBUG)
335: printf("hypervisor_unmask_event %d\n", ev);
336: #endif
337:
1.30 cherry 338: /* Xen unmasks the evtchn_mask[0]:ev bit for us. */
339: evtchn_op_t op;
340: op.cmd = EVTCHNOP_unmask;
341: op.u.unmask.port = ev;
342: if (HYPERVISOR_event_channel_op(&op) != 0)
343: panic("Failed to unmask event %d\n", ev);
1.18 bouyer 344:
1.30 cherry 345: return;
1.2 bouyer 346: }
347:
348: void
349: hypervisor_mask_event(unsigned int ev)
350: {
351: volatile shared_info_t *s = HYPERVISOR_shared_info;
352: #ifdef PORT_DEBUG
353: if (ev == PORT_DEBUG)
354: printf("hypervisor_mask_event %d\n", ev);
355: #endif
356:
357: xen_atomic_set_bit(&s->evtchn_mask[0], ev);
358: }
359:
360: void
361: hypervisor_clear_event(unsigned int ev)
362: {
363: volatile shared_info_t *s = HYPERVISOR_shared_info;
364: #ifdef PORT_DEBUG
365: if (ev == PORT_DEBUG)
366: printf("hypervisor_clear_event %d\n", ev);
367: #endif
368:
369: xen_atomic_clear_bit(&s->evtchn_pending[0], ev);
370: }
371:
1.15 cherry 372: static inline void
1.27 bouyer 373: evt_enable_event(unsigned int port, unsigned int l1i,
374: unsigned int l2i, void *args)
1.15 cherry 375: {
376: KASSERT(args == NULL);
1.29 cherry 377: hypervisor_unmask_event(port);
378: #if NPCI > 0 || NISA > 0
379: hypervisor_ack_pirq_event(port);
380: #endif /* NPCI > 0 || NISA > 0 */
1.15 cherry 381: }
382:
1.2 bouyer 383: void
384: hypervisor_enable_ipl(unsigned int ipl)
385: {
386: struct cpu_info *ci = curcpu();
387:
388: /*
389: * enable all events for ipl. As we only set an event in ipl_evt_mask
390: * for its lowest IPL, and pending IPLs are processed high to low,
391: * we know that all callback for this event have been processed.
392: */
393:
1.34 ! cherry 394: evt_iterate_bits(&ci->ci_xsources[ipl]->ipl_evt_mask1,
! 395: ci->ci_xsources[ipl]->ipl_evt_mask2, NULL,
1.15 cherry 396: evt_enable_event, NULL);
1.2 bouyer 397:
398: }
399:
400: void
1.27 bouyer 401: hypervisor_set_ipending(uint32_t iplmask, int l1, int l2)
1.2 bouyer 402: {
1.27 bouyer 403:
404: /* This function is not re-entrant */
405: KASSERT(x86_read_psl() != 0);
406:
1.2 bouyer 407: int ipl;
1.27 bouyer 408: struct cpu_info *ci = curcpu();
1.2 bouyer 409:
410: /* set pending bit for the appropriate IPLs */
1.34 ! cherry 411: ci->ci_xpending |= iplmask;
1.2 bouyer 412:
413: /*
414: * And set event pending bit for the lowest IPL. As IPL are handled
415: * from high to low, this ensure that all callbacks will have been
416: * called when we ack the event
417: */
418: ipl = ffs(iplmask);
419: KASSERT(ipl > 0);
420: ipl--;
1.15 cherry 421: KASSERT(ipl < NIPL);
1.34 ! cherry 422: KASSERT(ci->ci_xsources[ipl] != NULL);
! 423: ci->ci_xsources[ipl]->ipl_evt_mask1 |= 1UL << l1;
! 424: ci->ci_xsources[ipl]->ipl_evt_mask2[l1] |= 1UL << l2;
1.25 bouyer 425: if (__predict_false(ci != curcpu())) {
426: if (xen_send_ipi(ci, XEN_IPI_HVCB)) {
427: panic("hypervisor_set_ipending: "
428: "xen_send_ipi(cpu%d, XEN_IPI_HVCB) failed\n",
429: (int) ci->ci_cpuid);
430: }
431: }
1.2 bouyer 432: }
1.10 bouyer 433:
434: void
1.12 cegger 435: hypervisor_machdep_attach(void)
436: {
1.10 bouyer 437: /* dom0 does not require the arch-dependent P2M translation table */
1.16 jym 438: if (!xendomain_is_dom0()) {
1.10 bouyer 439: build_p2m_frame_list_list();
1.16 jym 440: sysctl_xen_suspend_setup();
1.10 bouyer 441: }
442: }
443:
1.16 jym 444: void
445: hypervisor_machdep_resume(void)
446: {
447: /* dom0 does not require the arch-dependent P2M translation table */
448: if (!xendomain_is_dom0())
449: update_p2m_frame_list_list();
450: }
451:
1.10 bouyer 452: /*
453: * Generate the p2m_frame_list_list table,
454: * needed for guest save/restore
455: */
456: static void
1.12 cegger 457: build_p2m_frame_list_list(void)
458: {
1.10 bouyer 459: int fpp; /* number of page (frame) pointer per page */
460: unsigned long max_pfn;
461: /*
462: * The p2m list is composed of three levels of indirection,
463: * each layer containing MFNs pointing to lower level pages
464: * The indirection is used to convert a given PFN to its MFN
465: * Each N level page can point to @fpp (N-1) level pages
466: * For example, for x86 32bit, we have:
467: * - PAGE_SIZE: 4096 bytes
468: * - fpp: 1024 (one L3 page can address 1024 L2 pages)
469: * A L1 page contains the list of MFN we are looking for
470: */
471: max_pfn = xen_start_info.nr_pages;
1.14 jym 472: fpp = PAGE_SIZE / sizeof(xen_pfn_t);
1.10 bouyer 473:
474: /* we only need one L3 page */
1.14 jym 475: l3_p2m_page = (vaddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE,
476: PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_NOWAIT);
1.10 bouyer 477: if (l3_p2m_page == NULL)
478: panic("could not allocate memory for l3_p2m_page");
479:
480: /*
481: * Determine how many L2 pages we need for the mapping
482: * Each L2 can map a total of @fpp L1 pages
483: */
484: l2_p2m_page_size = howmany(max_pfn, fpp);
485:
1.14 jym 486: l2_p2m_page = (vaddr_t *)uvm_km_alloc(kernel_map,
487: l2_p2m_page_size * PAGE_SIZE,
488: PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_NOWAIT);
1.10 bouyer 489: if (l2_p2m_page == NULL)
490: panic("could not allocate memory for l2_p2m_page");
491:
492: /* We now have L3 and L2 pages ready, update L1 mapping */
493: update_p2m_frame_list_list();
494:
495: }
496:
497: /*
498: * Update the L1 p2m_frame_list_list mapping (during guest boot or resume)
499: */
500: static void
1.12 cegger 501: update_p2m_frame_list_list(void)
502: {
1.10 bouyer 503: int i;
504: int fpp; /* number of page (frame) pointer per page */
505: unsigned long max_pfn;
506:
507: max_pfn = xen_start_info.nr_pages;
1.14 jym 508: fpp = PAGE_SIZE / sizeof(xen_pfn_t);
1.10 bouyer 509:
510: for (i = 0; i < l2_p2m_page_size; i++) {
511: /*
512: * Each time we start a new L2 page,
513: * store its MFN in the L3 page
514: */
515: if ((i % fpp) == 0) {
516: l3_p2m_page[i/fpp] = vtomfn(
517: (vaddr_t)&l2_p2m_page[i]);
518: }
519: /*
520: * we use a shortcut
521: * since @xpmap_phys_to_machine_mapping array
522: * already contains PFN to MFN mapping, we just
523: * set the l2_p2m_page MFN pointer to the MFN of the
524: * according frame of @xpmap_phys_to_machine_mapping
525: */
526: l2_p2m_page[i] = vtomfn((vaddr_t)
527: &xpmap_phys_to_machine_mapping[i*fpp]);
528: }
529:
530: HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
531: vtomfn((vaddr_t)l3_p2m_page);
532: HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
533:
534: }
CVSweb <webmaster@jp.NetBSD.org>