Annotation of src/sys/arch/xen/xen/xennetback_xenbus.c, Revision 1.14.2.2
1.14.2.2! ad 1: /* $NetBSD: xennetback_xenbus.c,v 1.14.2.1 2007/03/13 16:50:16 ad Exp $ */
1.1 bouyer 2:
3: /*
4: * Copyright (c) 2006 Manuel Bouyer.
5: *
6: * Redistribution and use in source and binary forms, with or without
7: * modification, are permitted provided that the following conditions
8: * are met:
9: * 1. Redistributions of source code must retain the above copyright
10: * notice, this list of conditions and the following disclaimer.
11: * 2. Redistributions in binary form must reproduce the above copyright
12: * notice, this list of conditions and the following disclaimer in the
13: * documentation and/or other materials provided with the distribution.
14: * 3. All advertising materials mentioning features or use of this software
15: * must display the following acknowledgement:
16: * This product includes software developed by Manuel Bouyer.
17: * 4. The name of the author may not be used to endorse or promote products
18: * derived from this software without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30: *
31: */
32:
33: #include "opt_xen.h"
34:
35: #include <sys/types.h>
36: #include <sys/param.h>
37: #include <sys/systm.h>
38: #include <sys/malloc.h>
39: #include <sys/queue.h>
40: #include <sys/kernel.h>
41: #include <sys/mbuf.h>
42: #include <sys/protosw.h>
43: #include <sys/socket.h>
44: #include <sys/ioctl.h>
45: #include <sys/errno.h>
46: #include <sys/device.h>
47:
48: #include <net/if.h>
49: #include <net/if_types.h>
50: #include <net/if_dl.h>
51: #include <net/route.h>
52: #include <net/netisr.h>
53: #include "bpfilter.h"
54: #if NBPFILTER > 0
55: #include <net/bpf.h>
56: #include <net/bpfdesc.h>
57: #endif
58:
59: #include <net/if_ether.h>
60:
61:
62: #include <machine/xen.h>
63: #include <machine/xen_shm.h>
64: #include <machine/evtchn.h>
65: #include <machine/xenbus.h>
1.10 yamt 66: #include <machine/xennet_checksum.h>
1.1 bouyer 67:
68: #include <uvm/uvm.h>
69:
70: #ifdef XENDEBUG_NET
71: #define XENPRINTF(x) printf x
72: #else
73: #define XENPRINTF(x)
74: #endif
75:
76: #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
77: #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
78:
1.8 bouyer 79: /* linux wants at last 16 bytes free in front of the packet */
80: #define LINUX_REQUESTED_OFFSET 16
81:
1.1 bouyer 82: /* hash list for TX requests */
83: /* descriptor of a packet being handled by the kernel */
84: struct xni_pkt {
85: int pkt_id; /* packet's ID */
86: grant_handle_t pkt_handle;
87: struct xnetback_instance *pkt_xneti; /* pointer back to our softc */
88: };
89:
90: static inline void xni_pkt_unmap(struct xni_pkt *, vaddr_t);
91:
92:
93: /* pools for xni_pkt */
94: struct pool xni_pkt_pool;
95: /* ratecheck(9) for pool allocation failures */
96: struct timeval xni_pool_errintvl = { 30, 0 }; /* 30s, each */
97: /*
98: * Backend network device driver for Xen
99: */
100:
101: /* state of a xnetback instance */
102: typedef enum {CONNECTED, DISCONNECTING, DISCONNECTED} xnetback_state_t;
103:
104: /* we keep the xnetback instances in a linked list */
105: struct xnetback_instance {
106: SLIST_ENTRY(xnetback_instance) next;
107: struct xenbus_device *xni_xbusd; /* our xenstore entry */
108: domid_t xni_domid; /* attached to this domain */
109: uint32_t xni_handle; /* domain-specific handle */
110: xnetback_state_t xni_status;
111: void *xni_softintr;
112:
113: /* network interface stuff */
114: struct ethercom xni_ec;
115: struct callout xni_restart;
116: u_int8_t xni_enaddr[ETHER_ADDR_LEN];
117:
118: /* remote domain communication stuff */
119: unsigned int xni_evtchn; /* our even channel */
120: netif_tx_back_ring_t xni_txring;
121: netif_rx_back_ring_t xni_rxring;
122: grant_handle_t xni_tx_ring_handle; /* to unmap the ring */
123: grant_handle_t xni_rx_ring_handle;
124: vaddr_t xni_tx_ring_va; /* to unmap the ring */
125: vaddr_t xni_rx_ring_va;
126: };
127: #define xni_if xni_ec.ec_if
128: #define xni_bpf xni_if.if_bpf
129:
130: void xvifattach(int);
1.13 christos 131: static int xennetback_ifioctl(struct ifnet *, u_long, void *);
1.1 bouyer 132: static void xennetback_ifstart(struct ifnet *);
133: static void xennetback_ifsoftstart(void *);
134: static void xennetback_ifwatchdog(struct ifnet *);
135: static int xennetback_ifinit(struct ifnet *);
136: static void xennetback_ifstop(struct ifnet *, int);
137:
138: static int xennetback_xenbus_create(struct xenbus_device *);
139: static int xennetback_xenbus_destroy(void *);
140: static void xennetback_frontend_changed(void *, XenbusState);
141:
142: static inline void xennetback_tx_response(struct xnetback_instance *,
143: int, int);
1.13 christos 144: static void xennetback_tx_free(struct mbuf * , void *, size_t, void *);
1.1 bouyer 145:
146: SLIST_HEAD(, xnetback_instance) xnetback_instances;
147:
148: static struct xnetback_instance *xnetif_lookup(domid_t, uint32_t);
149: static int xennetback_evthandler(void *);
150:
151: static struct xenbus_backend_driver xvif_backend_driver = {
152: .xbakd_create = xennetback_xenbus_create,
153: .xbakd_type = "vif"
154: };
155:
156: /*
157: * Number of packets to transmit in one hypercall (= number of pages to
158: * transmit at once).
159: */
160: #define NB_XMIT_PAGES_BATCH 64
161: /*
162: * We will transfers a mapped page to the remote domain, and remap another
163: * page in place immediatly. For this we keep a list of pages available.
164: * When the list is empty, we ask the hypervisor to give us
165: * NB_XMIT_PAGES_BATCH pages back.
166: */
167: static unsigned long mcl_pages[NB_XMIT_PAGES_BATCH]; /* our physical pages */
168: int mcl_pages_alloc; /* current index in mcl_pages */
169: static int xennetback_get_mcl_page(paddr_t *);
170: static void xennetback_get_new_mcl_pages(void);
171: /*
172: * If we can't transfer the mbuf directly, we have to copy it to a page which
173: * will be transfered to the remote domain. We use a pool + pool_cache
174: * for this, or the mbuf cluster pool cache if MCLBYTES == PAGE_SIZE
175: */
176: #if MCLBYTES != PAGE_SIZE
177: struct pool xmit_pages_pool;
178: struct pool_cache xmit_pages_pool_cache;
179: #endif
180: struct pool_cache *xmit_pages_pool_cachep;
181:
182: /* arrays used in xennetback_ifstart(), too large to allocate on stack */
1.8 bouyer 183: static mmu_update_t xstart_mmu[NB_XMIT_PAGES_BATCH];
184: static multicall_entry_t xstart_mcl[NB_XMIT_PAGES_BATCH + 1];
1.1 bouyer 185: static gnttab_transfer_t xstart_gop[NB_XMIT_PAGES_BATCH];
186: struct mbuf *mbufs_sent[NB_XMIT_PAGES_BATCH];
187: struct _pages_pool_free {
188: vaddr_t va;
189: paddr_t pa;
190: } pages_pool_free[NB_XMIT_PAGES_BATCH];
191:
192:
193: static inline void
194: xni_pkt_unmap(struct xni_pkt *pkt, vaddr_t pkt_va)
195: {
1.4 bouyer 196: xen_shm_unmap(pkt_va, 1, &pkt->pkt_handle);
1.1 bouyer 197: pool_put(&xni_pkt_pool, pkt);
198: }
199:
200: void
201: xvifattach(int n)
202: {
203: int i;
204: struct pglist mlist;
205: struct vm_page *pg;
206:
207: XENPRINTF(("xennetback_init\n"));
208:
209: /*
210: * steal some non-managed pages to the VM system, to remplace
211: * mbuf cluster or xmit_pages_pool pages given to foreing domains.
212: */
213: if (uvm_pglistalloc(PAGE_SIZE * NB_XMIT_PAGES_BATCH, 0, 0xffffffff,
214: 0, 0, &mlist, NB_XMIT_PAGES_BATCH, 0) != 0)
215: panic("xennetback_init: uvm_pglistalloc");
216: for (i = 0, pg = mlist.tqh_first; pg != NULL;
217: pg = pg->pageq.tqe_next, i++)
218: mcl_pages[i] = xpmap_ptom(VM_PAGE_TO_PHYS(pg)) >> PAGE_SHIFT;
219: if (i != NB_XMIT_PAGES_BATCH)
220: panic("xennetback_init: %d mcl pages", i);
221: mcl_pages_alloc = NB_XMIT_PAGES_BATCH - 1;
222:
223: /* initialise pools */
224: pool_init(&xni_pkt_pool, sizeof(struct xni_pkt), 0, 0, 0,
1.14.2.1 ad 225: "xnbpkt", NULL, IPL_VM);
1.1 bouyer 226: #if MCLBYTES != PAGE_SIZE
1.14.2.1 ad 227: pool_init(&xmit_pages_pool, PAGE_SIZE, 0, 0, 0, "xnbxm", NULL,
228: IPL_VM);
1.1 bouyer 229: pool_cache_init(&xmit_pages_pool_cache, &xmit_pages_pool,
230: NULL, NULL, NULL);
231: xmit_pages_pool_cachep = &xmit_pages_pool_cache;
232: #else
233: xmit_pages_pool_cachep = &mclpool_cache;
234: #endif
235:
236: SLIST_INIT(&xnetback_instances);
237: xenbus_backend_register(&xvif_backend_driver);
238: }
239:
240: static int
241: xennetback_xenbus_create(struct xenbus_device *xbusd)
242: {
243: struct xnetback_instance *xneti;
244: long domid, handle;
245: struct ifnet *ifp;
246: extern int ifqmaxlen; /* XXX */
247: char *val, *e, *p;
248: int i, err;
249:
250: if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
1.5 bouyer 251: "frontend-id", &domid, 10)) != 0) {
1.1 bouyer 252: aprint_error("xvif: can' read %s/frontend-id: %d\n",
253: xbusd->xbusd_path, err);
254: return err;
255: }
256: if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
1.5 bouyer 257: "handle", &handle, 10)) != 0) {
1.1 bouyer 258: aprint_error("xvif: can' read %s/handle: %d\n",
259: xbusd->xbusd_path, err);
260: return err;
261: }
262:
263: if (xnetif_lookup(domid, handle) != NULL) {
264: return EEXIST;
265: }
266: xneti = malloc(sizeof(struct xnetback_instance), M_DEVBUF,
267: M_NOWAIT | M_ZERO);
268: if (xneti == NULL) {
269: return ENOMEM;
270: }
271: xneti->xni_domid = domid;
272: xneti->xni_handle = handle;
273: xneti->xni_status = DISCONNECTED;
274:
275: xbusd->xbusd_u.b.b_cookie = xneti;
276: xbusd->xbusd_u.b.b_detach = xennetback_xenbus_destroy;
277: xneti->xni_xbusd = xbusd;
278:
279: xneti->xni_softintr = softintr_establish(IPL_SOFTNET,
280: xennetback_ifsoftstart, xneti);
281: if (xneti->xni_softintr == NULL) {
282: err = ENOMEM;
283: goto fail;
284: }
285:
286: ifp = &xneti->xni_if;
287: ifp->if_softc = xneti;
288:
289: /* read mac address */
290: if ((err = xenbus_read(NULL, xbusd->xbusd_path, "mac", NULL, &val))) {
291: aprint_error("xvif: can' read %s/mac: %d\n",
292: xbusd->xbusd_path, err);
293: goto fail;
294: }
295: for (i = 0, p = val; i < 6; i++) {
296: xneti->xni_enaddr[i] = strtoul(p, &e, 16);
297: if ((e[0] == '\0' && i != 5) && e[0] != ':') {
298: aprint_error("xvif: %s is not a valid mac address\n",
299: val);
300: err = EINVAL;
301: goto fail;
302: }
303: p = &e[1];
304: }
305: free(val, M_DEVBUF);
306:
307: /* we can't use the same MAC addr as our guest */
308: xneti->xni_enaddr[3]++;
309: /* create pseudo-interface */
310: snprintf(xneti->xni_if.if_xname, IFNAMSIZ, "xvif%d.%d",
311: (int)domid, (int)handle);
312: printf("%s: Ethernet address %s\n", ifp->if_xname,
313: ether_sprintf(xneti->xni_enaddr));
314: ifp->if_flags =
315: IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
316: ifp->if_snd.ifq_maxlen =
1.12 bouyer 317: max(ifqmaxlen, NET_TX_RING_SIZE * 2);
1.11 yamt 318: ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx;
1.1 bouyer 319: ifp->if_ioctl = xennetback_ifioctl;
320: ifp->if_start = xennetback_ifstart;
321: ifp->if_watchdog = xennetback_ifwatchdog;
322: ifp->if_init = xennetback_ifinit;
323: ifp->if_stop = xennetback_ifstop;
324: ifp->if_timer = 0;
325: IFQ_SET_READY(&ifp->if_snd);
326: if_attach(ifp);
327: ether_ifattach(&xneti->xni_if, xneti->xni_enaddr);
328:
329: SLIST_INSERT_HEAD(&xnetback_instances, xneti, next);
330:
331: xbusd->xbusd_otherend_changed = xennetback_frontend_changed;
332:
333: err = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
334: if (err) {
335: printf("failed to switch state on %s: %d\n",
336: xbusd->xbusd_path, err);
337: goto fail;
338: }
339: if (err) {
340: printf("failed to write %s/hotplug-status: %d\n",
341: xbusd->xbusd_path, err);
342: goto fail;
343: }
344: return 0;
345: fail:
346: free(xneti, M_DEVBUF);
347: return err;
348: }
349:
350: int
351: xennetback_xenbus_destroy(void *arg)
352: {
353: struct xnetback_instance *xneti = arg;
354: struct gnttab_unmap_grant_ref op;
355: int err;
356:
357: #if 0
358: if (xneti->xni_status == CONNECTED) {
359: return EBUSY;
360: }
361: #endif
362: printf("%s: disconnecting\n", xneti->xni_if.if_xname);
363: hypervisor_mask_event(xneti->xni_evtchn);
364: event_remove_handler(xneti->xni_evtchn, xennetback_evthandler, xneti);
365: softintr_disestablish(xneti->xni_softintr);
366:
367: SLIST_REMOVE(&xnetback_instances,
368: xneti, xnetback_instance, next);
369:
370: ether_ifdetach(&xneti->xni_if);
371: if_detach(&xneti->xni_if);
372:
373: if (xneti->xni_txring.sring) {
374: op.host_addr = xneti->xni_tx_ring_va;
375: op.handle = xneti->xni_tx_ring_handle;
376: op.dev_bus_addr = 0;
377: err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
378: &op, 1);
379: if (err)
380: printf("%s: unmap_grant_ref failed: %d\n",
381: xneti->xni_if.if_xname, err);
382: }
383: if (xneti->xni_rxring.sring) {
384: op.host_addr = xneti->xni_rx_ring_va;
385: op.handle = xneti->xni_rx_ring_handle;
386: op.dev_bus_addr = 0;
387: err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
388: &op, 1);
389: if (err)
390: printf("%s: unmap_grant_ref failed: %d\n",
391: xneti->xni_if.if_xname, err);
392: }
393: uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
394: PAGE_SIZE, UVM_KMF_VAONLY);
395: uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
396: PAGE_SIZE, UVM_KMF_VAONLY);
397: free(xneti, M_DEVBUF);
398: return 0;
399: }
400:
401: static void
402: xennetback_frontend_changed(void *arg, XenbusState new_state)
403: {
404: struct xnetback_instance *xneti = arg;
405: struct xenbus_device *xbusd = xneti->xni_xbusd;
406: int err;
407: netif_tx_sring_t *tx_ring;
408: netif_rx_sring_t *rx_ring;
409: struct gnttab_map_grant_ref op;
410: evtchn_op_t evop;
411: u_long tx_ring_ref, rx_ring_ref;
412: u_long revtchn;
413:
414: XENPRINTF(("%s: new state %d\n", xneti->xni_if.if_xname, new_state));
415: switch(new_state) {
416: case XenbusStateInitialising:
417: case XenbusStateInitialised:
418: break;
419:
420: case XenbusStateConnected:
421: /* read comunication informations */
422: err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
1.5 bouyer 423: "tx-ring-ref", &tx_ring_ref, 10);
1.1 bouyer 424: if (err) {
425: xenbus_dev_fatal(xbusd, err, "reading %s/tx-ring-ref",
426: xbusd->xbusd_otherend);
427: break;
428: }
429: err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
1.5 bouyer 430: "rx-ring-ref", &rx_ring_ref, 10);
1.1 bouyer 431: if (err) {
432: xenbus_dev_fatal(xbusd, err, "reading %s/rx-ring-ref",
433: xbusd->xbusd_otherend);
434: break;
435: }
436: err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
1.5 bouyer 437: "event-channel", &revtchn, 10);
1.1 bouyer 438: if (err) {
439: xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
440: xbusd->xbusd_otherend);
441: break;
442: }
443: /* allocate VA space and map rings */
444: xneti->xni_tx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
445: UVM_KMF_VAONLY);
1.4 bouyer 446: if (xneti->xni_tx_ring_va == 0) {
447: xenbus_dev_fatal(xbusd, ENOMEM,
448: "can't get VA for tx ring", xbusd->xbusd_otherend);
1.1 bouyer 449: break;
1.4 bouyer 450: }
1.1 bouyer 451: tx_ring = (void *)xneti->xni_tx_ring_va;
452: xneti->xni_rx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
453: UVM_KMF_VAONLY);
1.4 bouyer 454: if (xneti->xni_rx_ring_va == 0) {
455: xenbus_dev_fatal(xbusd, ENOMEM,
456: "can't get VA for rx ring", xbusd->xbusd_otherend);
1.1 bouyer 457: goto err1;
1.4 bouyer 458: }
1.1 bouyer 459: rx_ring = (void *)xneti->xni_rx_ring_va;
460: op.host_addr = xneti->xni_tx_ring_va;
461: op.flags = GNTMAP_host_map;
462: op.ref = tx_ring_ref;
463: op.dom = xneti->xni_domid;
464: err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
465: if (err || op.status) {
466: printf("%s: can't map TX grant ref: %d/%d\n",
467: xneti->xni_if.if_xname, err, op.status);
468: goto err2;
469: }
470: xneti->xni_tx_ring_handle = op.handle;
471:
472: op.host_addr = xneti->xni_rx_ring_va;
473: op.flags = GNTMAP_host_map;
474: op.ref = rx_ring_ref;
475: op.dom = xneti->xni_domid;
476: err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
477: if (err || op.status) {
478: printf("%s: can't map RX grant ref: %d/%d\n",
479: xneti->xni_if.if_xname, err, op.status);
480: goto err2;
481: }
482: xneti->xni_rx_ring_handle = op.handle;
483: BACK_RING_INIT(&xneti->xni_txring, tx_ring, PAGE_SIZE);
484: BACK_RING_INIT(&xneti->xni_rxring, rx_ring, PAGE_SIZE);
485: evop.cmd = EVTCHNOP_bind_interdomain;
486: evop.u.bind_interdomain.remote_dom = xneti->xni_domid;
487: evop.u.bind_interdomain.remote_port = revtchn;
488: err = HYPERVISOR_event_channel_op(&evop);
489: if (err) {
490: printf("%s: can't get event channel: %d\n",
491: xneti->xni_if.if_xname, err);
492: goto err2;
493: }
494: xneti->xni_evtchn = evop.u.bind_interdomain.local_port;
495: x86_sfence();
496: xneti->xni_status = CONNECTED;
497: xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
498: x86_sfence();
499: event_set_handler(xneti->xni_evtchn, xennetback_evthandler,
500: xneti, IPL_NET, xneti->xni_if.if_xname);
501: xennetback_ifinit(&xneti->xni_if);
502: hypervisor_enable_event(xneti->xni_evtchn);
503: hypervisor_notify_via_evtchn(xneti->xni_evtchn);
504: break;
505:
506: case XenbusStateClosing:
507: xneti->xni_status = DISCONNECTING;
508: xneti->xni_if.if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
509: xneti->xni_if.if_timer = 0;
510: xenbus_switch_state(xbusd, NULL, XenbusStateClosing);
511: break;
512:
513: case XenbusStateClosed:
514: /* otherend_changed() should handle it for us */
515: panic("xennetback_frontend_changed: closed\n");
516: case XenbusStateUnknown:
517: case XenbusStateInitWait:
518: default:
519: aprint_error("%s: invalid frontend state %d\n",
520: xneti->xni_if.if_xname, new_state);
521: break;
522: }
523: return;
524: err2:
525: uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
526: PAGE_SIZE, UVM_KMF_VAONLY);
527: err1:
528: uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
529: PAGE_SIZE, UVM_KMF_VAONLY);
530: }
531:
532: /* lookup a xneti based on domain id and interface handle */
533: static struct xnetback_instance *
534: xnetif_lookup(domid_t dom , uint32_t handle)
535: {
536: struct xnetback_instance *xneti;
537:
538: SLIST_FOREACH(xneti, &xnetback_instances, next) {
539: if (xneti->xni_domid == dom && xneti->xni_handle == handle)
540: return xneti;
541: }
542: return NULL;
543: }
544:
545:
546: /* get a page to remplace a mbuf cluster page given to a domain */
547: static int
548: xennetback_get_mcl_page(paddr_t *map)
549: {
550: if (mcl_pages_alloc < 0)
551: /*
552: * we exhausted our allocation. We can't allocate new ones yet
553: * because the current pages may not have been loaned to
554: * the remote domain yet. We have to let the caller do this.
555: */
556: return -1;
557:
558: *map = mcl_pages[mcl_pages_alloc] << PAGE_SHIFT;
559: mcl_pages_alloc--;
560: return 0;
561:
562: }
563:
564: static void
565: xennetback_get_new_mcl_pages(void)
566: {
567: int nb_pages;
568: struct xen_memory_reservation res;
569:
570: /* get some new pages. */
571: res.extent_start = mcl_pages;
572: res.nr_extents = NB_XMIT_PAGES_BATCH;
573: res.extent_order = 0;
574: res.address_bits = 0;
575: res.domid = DOMID_SELF;
576:
577: nb_pages = HYPERVISOR_memory_op(XENMEM_increase_reservation, &res);
578: if (nb_pages <= 0) {
579: printf("xennetback: can't get new mcl pages (%d)\n", nb_pages);
580: return;
581: }
582: if (nb_pages != NB_XMIT_PAGES_BATCH)
583: printf("xennetback: got only %d new mcl pages\n", nb_pages);
584:
585: mcl_pages_alloc = nb_pages - 1;
586: }
587:
588: static inline void
589: xennetback_tx_response(struct xnetback_instance *xneti, int id, int status)
590: {
591: RING_IDX resp_prod;
592: netif_tx_response_t *txresp;
593: int do_event;
594:
595: resp_prod = xneti->xni_txring.rsp_prod_pvt;
596: txresp = RING_GET_RESPONSE(&xneti->xni_txring, resp_prod);
597:
598: txresp->id = id;
599: txresp->status = status;
600: xneti->xni_txring.rsp_prod_pvt++;
601: RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_txring, do_event);
602: if (do_event) {
603: XENPRINTF(("%s send event\n", xneti->xni_if.if_xname));
604: hypervisor_notify_via_evtchn(xneti->xni_evtchn);
605: }
606: }
607:
608: static int
609: xennetback_evthandler(void *arg)
610: {
611: struct xnetback_instance *xneti = arg;
612: struct ifnet *ifp = &xneti->xni_if;
613: netif_tx_request_t *txreq;
614: struct xni_pkt *pkt;
615: vaddr_t pkt_va;
616: struct mbuf *m;
617: int receive_pending, err;
618: RING_IDX req_cons;
619:
620: XENPRINTF(("xennetback_evthandler "));
621: req_cons = xneti->xni_txring.req_cons;
622: x86_lfence();
623: while (1) {
624: x86_lfence(); /* be sure to read the request before updating */
625: xneti->xni_txring.req_cons = req_cons;
626: x86_sfence();
627: RING_FINAL_CHECK_FOR_REQUESTS(&xneti->xni_txring,
628: receive_pending);
629: if (receive_pending == 0)
630: break;
631: txreq = RING_GET_REQUEST(&xneti->xni_txring, req_cons);
632: x86_lfence();
633: XENPRINTF(("%s pkt size %d\n", xneti->xni_if.if_xname,
634: txreq->size));
1.6 bouyer 635: req_cons++;
1.1 bouyer 636: if (__predict_false((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
637: (IFF_UP | IFF_RUNNING))) {
638: /* interface not up, drop */
639: xennetback_tx_response(xneti, txreq->id,
640: NETIF_RSP_DROPPED);
641: continue;
642: }
643: /*
644: * Do some sanity checks, and map the packet's page.
645: */
646: if (__predict_false(txreq->size < ETHER_HDR_LEN ||
647: txreq->size > (ETHER_MAX_LEN - ETHER_CRC_LEN))) {
648: printf("%s: packet size %d too big\n",
649: ifp->if_xname, txreq->size);
650: xennetback_tx_response(xneti, txreq->id,
651: NETIF_RSP_ERROR);
652: ifp->if_ierrors++;
653: continue;
654: }
655: /* don't cross page boundaries */
656: if (__predict_false(
657: txreq->offset + txreq->size > PAGE_SIZE)) {
658: printf("%s: packet cross page boundary\n",
659: ifp->if_xname);
660: xennetback_tx_response(xneti, txreq->id,
661: NETIF_RSP_ERROR);
662: ifp->if_ierrors++;
663: continue;
664: }
665: /* get a mbuf for this packet */
666: MGETHDR(m, M_DONTWAIT, MT_DATA);
667: if (__predict_false(m == NULL)) {
668: static struct timeval lasttime;
669: if (ratecheck(&lasttime, &xni_pool_errintvl))
670: printf("%s: mbuf alloc failed\n",
671: ifp->if_xname);
672: xennetback_tx_response(xneti, txreq->id,
673: NETIF_RSP_DROPPED);
674: ifp->if_ierrors++;
675: continue;
676: }
677:
678: XENPRINTF(("%s pkt offset %d size %d id %d req_cons %d\n",
679: xneti->xni_if.if_xname, txreq->offset,
680: txreq->size, txreq->id, MASK_NETIF_TX_IDX(req_cons)));
681:
682: pkt = pool_get(&xni_pkt_pool, PR_NOWAIT);
683: if (__predict_false(pkt == NULL)) {
684: static struct timeval lasttime;
685: if (ratecheck(&lasttime, &xni_pool_errintvl))
686: printf("%s: xnbpkt alloc failed\n",
687: ifp->if_xname);
688: xennetback_tx_response(xneti, txreq->id,
689: NETIF_RSP_DROPPED);
690: ifp->if_ierrors++;
691: m_freem(m);
692: continue;
693: }
1.4 bouyer 694: err = xen_shm_map(1, xneti->xni_domid, &txreq->gref, &pkt_va,
1.2 bouyer 695: &pkt->pkt_handle, XSHM_RO);
696: if (__predict_false(err == ENOMEM)) {
1.1 bouyer 697: xennetback_tx_response(xneti, txreq->id,
698: NETIF_RSP_DROPPED);
699: ifp->if_ierrors++;
700: pool_put(&xni_pkt_pool, pkt);
701: m_freem(m);
702: continue;
703: }
704:
1.2 bouyer 705: if (__predict_false(err)) {
1.1 bouyer 706: printf("%s: mapping foreing page failed: %d\n",
1.2 bouyer 707: xneti->xni_if.if_xname, err);
1.1 bouyer 708: xennetback_tx_response(xneti, txreq->id,
709: NETIF_RSP_ERROR);
710: ifp->if_ierrors++;
711: pool_put(&xni_pkt_pool, pkt);
712: m_freem(m);
713: continue;
714: }
715:
716: if ((ifp->if_flags & IFF_PROMISC) == 0) {
717: struct ether_header *eh =
718: (void*)(pkt_va + txreq->offset);
719: if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
1.14.2.2! ad 720: memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
1.1 bouyer 721: ETHER_ADDR_LEN) != 0) {
722: xni_pkt_unmap(pkt, pkt_va);
723: m_freem(m);
724: xennetback_tx_response(xneti, txreq->id,
725: NETIF_RSP_OKAY);
726: continue; /* packet is not for us */
727: }
728: }
1.3 bouyer 729: #ifdef notyet
730: a lot of work is needed in the tcp stack to handle read-only ext storage
731: so always copy for now.
1.1 bouyer 732: if (((req_cons + 1) & (NET_TX_RING_SIZE - 1)) ==
1.12 bouyer 733: (xneti->xni_txring.rsp_prod_pvt & (NET_TX_RING_SIZE - 1)))
734: #else
735: if (1)
1.3 bouyer 736: #endif /* notyet */
1.12 bouyer 737: {
1.1 bouyer 738: /*
739: * This is the last TX buffer. Copy the data and
740: * ack it. Delaying it until the mbuf is
741: * freed will stall transmit.
742: */
743: m->m_len = min(MHLEN, txreq->size);
744: m->m_pkthdr.len = 0;
745: m_copyback(m, 0, txreq->size,
1.13 christos 746: (void *)(pkt_va + txreq->offset));
1.1 bouyer 747: xni_pkt_unmap(pkt, pkt_va);
748: if (m->m_pkthdr.len < txreq->size) {
749: ifp->if_ierrors++;
750: m_freem(m);
751: xennetback_tx_response(xneti, txreq->id,
752: NETIF_RSP_DROPPED);
753: continue;
754: }
755: xennetback_tx_response(xneti, txreq->id,
756: NETIF_RSP_OKAY);
757: } else {
1.3 bouyer 758:
1.1 bouyer 759: pkt->pkt_id = txreq->id;
760: pkt->pkt_xneti = xneti;
761:
762: MEXTADD(m, pkt_va + txreq->offset,
763: txreq->size, M_DEVBUF, xennetback_tx_free, pkt);
764: m->m_pkthdr.len = m->m_len = txreq->size;
1.2 bouyer 765: m->m_flags |= M_EXT_ROMAP;
1.1 bouyer 766: }
1.10 yamt 767: if ((txreq->flags & NETTXF_csum_blank) != 0) {
768: xennet_checksum_fill(&m);
769: if (m == NULL) {
770: ifp->if_ierrors++;
771: continue;
772: }
773: }
1.1 bouyer 774: m->m_pkthdr.rcvif = ifp;
775: ifp->if_ipackets++;
776:
777: #if NBPFILTER > 0
778: if (ifp->if_bpf)
779: bpf_mtap(ifp->if_bpf, m);
780: #endif
781: (*ifp->if_input)(ifp, m);
782: }
783: x86_lfence(); /* be sure to read the request before updating pointer */
784: xneti->xni_txring.req_cons = req_cons;
785: x86_sfence();
786: /* check to see if we can transmit more packets */
787: softintr_schedule(xneti->xni_softintr);
788:
789: return 1;
790: }
791:
792: static void
1.13 christos 793: xennetback_tx_free(struct mbuf *m, void *va, size_t size, void *arg)
1.1 bouyer 794: {
795: int s = splnet();
796: struct xni_pkt *pkt = arg;
797: struct xnetback_instance *xneti = pkt->pkt_xneti;
798:
799: XENPRINTF(("xennetback_tx_free\n"));
800:
801: xennetback_tx_response(xneti, pkt->pkt_id, NETIF_RSP_OKAY);
802:
803: xni_pkt_unmap(pkt, (vaddr_t)va & ~PAGE_MASK);
804:
805: if (m)
806: pool_cache_put(&mbpool_cache, m);
807: splx(s);
808: }
809:
810: static int
1.13 christos 811: xennetback_ifioctl(struct ifnet *ifp, u_long cmd, void *data)
1.1 bouyer 812: {
813: //struct xnetback_instance *xneti = ifp->if_softc;
814: //struct ifreq *ifr = (struct ifreq *)data;
815: int s, error;
816:
817: s = splnet();
818: error = ether_ioctl(ifp, cmd, data);
819: if (error == ENETRESET)
820: error = 0;
821: splx(s);
822: return error;
823: }
824:
825: static void
826: xennetback_ifstart(struct ifnet *ifp)
827: {
828: struct xnetback_instance *xneti = ifp->if_softc;
829:
830: /*
831: * The Xen communication channel is much more efficient if we can
832: * schedule batch of packets for the domain. To achieve this, we
833: * schedule a soft interrupt, and just return. This way, the network
834: * stack will enqueue all pending mbufs in the interface's send queue
835: * before it is processed by xennet_softstart().
836: */
837: softintr_schedule(xneti->xni_softintr);
838: }
839:
840: static void
841: xennetback_ifsoftstart(void *arg)
842: {
843: struct xnetback_instance *xneti = arg;
844: struct ifnet *ifp = &xneti->xni_if;
845: struct mbuf *m;
846: vaddr_t xmit_va;
847: paddr_t xmit_pa;
848: paddr_t xmit_ma;
1.9 bouyer 849: paddr_t newp_ma = 0; /* XXX gcc */
1.1 bouyer 850: int i, j, nppitems;
851: mmu_update_t *mmup;
852: multicall_entry_t *mclp;
853: netif_rx_response_t *rxresp;
854: RING_IDX req_prod, resp_prod;
855: int do_event = 0;
856: gnttab_transfer_t *gop;
857: int id, offset;
858:
859: XENPRINTF(("xennetback_ifsoftstart "));
860: int s = splnet();
861: if (__predict_false(
862: (ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)) {
863: splx(s);
864: return;
865: }
866:
867: while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
868: XENPRINTF(("pkt\n"));
869: req_prod = xneti->xni_rxring.sring->req_prod;
870: resp_prod = xneti->xni_rxring.rsp_prod_pvt;
871: x86_lfence();
872:
873: mmup = xstart_mmu;
874: mclp = xstart_mcl;
875: gop = xstart_gop;
876: for (nppitems = 0, i = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) {
877: XENPRINTF(("have a packet\n"));
878: IFQ_POLL(&ifp->if_snd, m);
879: if (__predict_false(m == NULL))
880: panic("xennetback_ifstart: IFQ_POLL");
881: if (__predict_false(
882: req_prod == xneti->xni_rxring.req_cons ||
883: xneti->xni_rxring.req_cons - resp_prod ==
884: NET_RX_RING_SIZE)) {
885: /* out of ring space */
886: XENPRINTF(("xennetback_ifstart: ring full "
887: "req_prod 0x%x req_cons 0x%x resp_prod "
888: "0x%x\n",
889: req_prod, xneti->xni_rxring.req_cons,
890: resp_prod));
891: ifp->if_timer = 1;
892: break;
893: }
894: if (__predict_false(i == NB_XMIT_PAGES_BATCH))
895: break; /* we filled the array */
896: if (__predict_false(
897: xennetback_get_mcl_page(&newp_ma) != 0))
898: break; /* out of memory */
899: if ((m->m_flags & M_CLUSTER) != 0 &&
900: !M_READONLY(m) && MCLBYTES == PAGE_SIZE) {
901: /* we can give this page away */
902: xmit_pa = m->m_ext.ext_paddr;
903: xmit_ma = xpmap_ptom(xmit_pa);
904: xmit_va = (vaddr_t)m->m_ext.ext_buf;
905: KASSERT(xmit_pa != M_PADDR_INVALID);
906: KASSERT((xmit_va & PAGE_MASK) == 0);
907: offset = m->m_data - m->m_ext.ext_buf;
908: } else {
909: /* we have to copy the packet */
910: xmit_va = (vaddr_t)pool_cache_get_paddr(
911: xmit_pages_pool_cachep,
912: PR_NOWAIT, &xmit_pa);
913: if (__predict_false(xmit_va == 0))
914: break; /* out of memory */
915:
916: KASSERT(xmit_pa != POOL_PADDR_INVALID);
917: xmit_ma = xpmap_ptom(xmit_pa);
918: XENPRINTF(("xennetback_get_xmit_page: got va "
919: "0x%x ma 0x%x\n", (u_int)xmit_va,
920: (u_int)xmit_ma));
921: m_copydata(m, 0, m->m_pkthdr.len,
1.14 dogcow 922: (char *)xmit_va + LINUX_REQUESTED_OFFSET);
1.8 bouyer 923: offset = LINUX_REQUESTED_OFFSET;
1.1 bouyer 924: pages_pool_free[nppitems].va = xmit_va;
925: pages_pool_free[nppitems].pa = xmit_pa;
926: nppitems++;
927: }
928: /* start filling ring */
929: gop->ref = RING_GET_REQUEST(&xneti->xni_rxring,
930: xneti->xni_rxring.req_cons)->gref;
931: id = RING_GET_REQUEST(&xneti->xni_rxring,
932: xneti->xni_rxring.req_cons)->id;
1.8 bouyer 933: x86_lfence();
1.1 bouyer 934: xneti->xni_rxring.req_cons++;
935: rxresp = RING_GET_RESPONSE(&xneti->xni_rxring,
936: resp_prod);
937: rxresp->id = id;
938: rxresp->offset = offset;
939: rxresp->status = m->m_pkthdr.len;
1.11 yamt 940: if ((m->m_pkthdr.csum_flags &
941: (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) {
942: rxresp->flags = NETRXF_csum_blank;
943: } else {
944: rxresp->flags = 0;
945: }
1.1 bouyer 946: /*
947: * transfers the page containing the packet to the
948: * remote domain, and map newp in place.
949: */
950: xpmap_phys_to_machine_mapping[
951: (xmit_pa - XPMAP_OFFSET) >> PAGE_SHIFT] =
952: newp_ma >> PAGE_SHIFT;
953: MULTI_update_va_mapping(mclp, xmit_va,
954: newp_ma | PG_V | PG_RW | PG_U | PG_M, 0);
955: mclp++;
956: gop->mfn = xmit_ma >> PAGE_SHIFT;
957: gop->domid = xneti->xni_domid;
958: gop++;
959:
960: mmup->ptr = newp_ma | MMU_MACHPHYS_UPDATE;
961: mmup->val = (xmit_pa - XPMAP_OFFSET) >> PAGE_SHIFT;
962: mmup++;
963:
964: /* done with this packet */
965: IFQ_DEQUEUE(&ifp->if_snd, m);
966: mbufs_sent[i] = m;
1.8 bouyer 967: resp_prod++;
1.1 bouyer 968: i++; /* this packet has been queued */
969: ifp->if_opackets++;
970: #if NBPFILTER > 0
971: if (ifp->if_bpf)
972: bpf_mtap(ifp->if_bpf, m);
973: #endif
974: }
975: if (i != 0) {
976: /*
977: * We may have allocated buffers which have entries
978: * outstanding in the page update queue -- make sure
979: * we flush those first!
980: */
981: int svm = splvm();
982: xpq_flush_queue();
983: splx(svm);
984: mclp[-1].args[MULTI_UVMFLAGS_INDEX] =
985: UVMF_TLB_FLUSH|UVMF_ALL;
986: mclp->op = __HYPERVISOR_mmu_update;
987: mclp->args[0] = (unsigned long)xstart_mmu;
988: mclp->args[1] = i;
989: mclp->args[2] = 0;
990: mclp->args[3] = DOMID_SELF;
991: mclp++;
992: /* update the MMU */
993: if (HYPERVISOR_multicall(xstart_mcl, i + 1) != 0) {
994: panic("%s: HYPERVISOR_multicall failed",
995: ifp->if_xname);
996: }
997: for (j = 0; j < i + 1; j++) {
1.8 bouyer 998: if (xstart_mcl[j].result != 0) {
999: printf("%s: xstart_mcl[%d] "
1000: "failed (%lu)\n", ifp->if_xname,
1001: j, xstart_mcl[j].result);
1002: printf("%s: req_prod %u req_cons "
1003: "%u rsp_prod %u rsp_prod_pvt %u "
1004: "i %u\n",
1005: ifp->if_xname,
1006: xneti->xni_rxring.sring->req_prod,
1007: xneti->xni_rxring.req_cons,
1008: xneti->xni_rxring.sring->rsp_prod,
1009: xneti->xni_rxring.rsp_prod_pvt,
1010: i);
1011: }
1.1 bouyer 1012: }
1013: if (HYPERVISOR_grant_table_op(GNTTABOP_transfer,
1014: xstart_gop, i) != 0) {
1015: panic("%s: GNTTABOP_transfer failed",
1016: ifp->if_xname);
1017: }
1018:
1019: for (j = 0; j < i; j++) {
1020: if (xstart_gop[j].status != 0) {
1.8 bouyer 1021: printf("%s GNTTABOP_transfer[%d] %d\n",
1022: ifp->if_xname,
1.1 bouyer 1023: j, xstart_gop[j].status);
1.8 bouyer 1024: printf("%s: req_prod %u req_cons "
1025: "%u rsp_prod %u rsp_prod_pvt %u "
1026: "i %d\n",
1027: ifp->if_xname,
1028: xneti->xni_rxring.sring->req_prod,
1029: xneti->xni_rxring.req_cons,
1030: xneti->xni_rxring.sring->rsp_prod,
1031: xneti->xni_rxring.rsp_prod_pvt,
1032: i);
1033: rxresp = RING_GET_RESPONSE(
1034: &xneti->xni_rxring,
1035: xneti->xni_rxring.rsp_prod_pvt + j);
1.1 bouyer 1036: rxresp->status = NETIF_RSP_ERROR;
1037: }
1038: }
1039:
1040: /* update pointer */
1.8 bouyer 1041: KASSERT(
1042: xneti->xni_rxring.rsp_prod_pvt + i == resp_prod);
1.1 bouyer 1043: xneti->xni_rxring.rsp_prod_pvt = resp_prod;
1044: RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
1045: &xneti->xni_rxring, j);
1046: if (j)
1047: do_event = 1;
1048: /* now we can free the mbufs */
1049: for (j = 0; j < i; j++) {
1050: m_freem(mbufs_sent[j]);
1051: }
1052: for (j = 0; j < nppitems; j++) {
1053: pool_cache_put_paddr(xmit_pages_pool_cachep,
1054: (void *)pages_pool_free[j].va,
1055: pages_pool_free[j].pa);
1056: }
1057: }
1058: /* send event */
1059: if (do_event) {
1060: x86_lfence();
1061: XENPRINTF(("%s receive event\n",
1062: xneti->xni_if.if_xname));
1063: hypervisor_notify_via_evtchn(xneti->xni_evtchn);
1064: do_event = 0;
1065: }
1066: /* check if we need to get back some pages */
1067: if (mcl_pages_alloc < 0) {
1068: xennetback_get_new_mcl_pages();
1069: if (mcl_pages_alloc < 0) {
1070: /*
1071: * setup the watchdog to try again, because
1072: * xennetback_ifstart() will never be called
1073: * again if queue is full.
1074: */
1075: printf("xennetback_ifstart: no mcl_pages\n");
1076: ifp->if_timer = 1;
1077: break;
1078: }
1079: }
1.8 bouyer 1080: /*
1081: * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS()
1082: * here, as the frontend doesn't notify when adding
1083: * requests anyway
1084: */
1.1 bouyer 1085: if (__predict_false(
1.8 bouyer 1086: !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) {
1.1 bouyer 1087: /* ring full */
1088: break;
1089: }
1090: }
1091: splx(s);
1092: }
1093:
1094:
1095: static void
1096: xennetback_ifwatchdog(struct ifnet * ifp)
1097: {
1098: /*
1099: * We can get to the following condition:
1100: * transmit stalls because the ring is full when the ifq is full too.
1101: * In this case (as, unfortunably, we don't get an interrupt from xen
1102: * on transmit) noting will ever call xennetback_ifstart() again.
1103: * Here we abuse the watchdog to get out of this condition.
1104: */
1105: XENPRINTF(("xennetback_ifwatchdog\n"));
1106: xennetback_ifstart(ifp);
1107: }
1108:
1109:
1110: static int
1111: xennetback_ifinit(struct ifnet *ifp)
1112: {
1113: struct xnetback_instance *xneti = ifp->if_softc;
1114: int s = splnet();
1115:
1116: if ((ifp->if_flags & IFF_UP) == 0) {
1117: splx(s);
1118: return 0;
1119: }
1120: if (xneti->xni_status == CONNECTED)
1121: ifp->if_flags |= IFF_RUNNING;
1122: splx(s);
1123: return 0;
1124: }
1125:
1126: static void
1127: xennetback_ifstop(struct ifnet *ifp, int disable)
1128: {
1129: struct xnetback_instance *xneti = ifp->if_softc;
1130: int s = splnet();
1131:
1132: ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1133: ifp->if_timer = 0;
1134: if (xneti->xni_status == CONNECTED) {
1135: XENPRINTF(("%s: req_prod 0x%x resp_prod 0x%x req_cons 0x%x "
1136: "event 0x%x\n", ifp->if_xname, xneti->xni_txring->req_prod,
1137: xneti->xni_txring->resp_prod, xneti->xni_txring->req_cons,
1138: xneti->xni_txring->event));
1139: xennetback_evthandler(ifp->if_softc); /* flush pending RX requests */
1140: }
1141: splx(s);
1142: }
CVSweb <webmaster@jp.NetBSD.org>