Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/dev/pci/ixgbe/ixgbe.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/dev/pci/ixgbe/ixgbe.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.39 retrieving revision 1.39.2.3 diff -u -p -r1.39 -r1.39.2.3 --- src/sys/dev/pci/ixgbe/ixgbe.c 2016/07/11 06:14:51 1.39 +++ src/sys/dev/pci/ixgbe/ixgbe.c 2017/03/20 06:57:37 1.39.2.3 @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2013, Intel Corporation + Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -58,11 +58,14 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -/*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 279805 2015-03-09 10:29:15Z araujo $*/ -/*$NetBSD: ixgbe.c,v 1.39 2016/07/11 06:14:51 knakahara Exp $*/ +/*$FreeBSD: head/sys/dev/ixgbe/if_ix.c 302384 2016-07-07 03:39:18Z sbruno $*/ +/*$NetBSD: ixgbe.c,v 1.39.2.3 2017/03/20 06:57:37 pgoyette Exp $*/ +#ifdef _KERNEL_OPT #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_net_mpsafe.h" +#endif #include "ixgbe.h" #include "vlan.h" @@ -70,14 +73,10 @@ #include /********************************************************************* - * Set this to one to display debug statistics - *********************************************************************/ -int ixgbe_display_debug_stats = 0; - -/********************************************************************* * Driver version *********************************************************************/ -char ixgbe_driver_version[] = "2.5.15"; +char ixgbe_driver_version[] = "3.1.13-k"; + /********************************************************************* * PCI Device ID Table @@ -114,7 +113,15 @@ static ixgbe_vendor_info_t ixgbe_vendor_ {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; @@ -136,74 +143,49 @@ static int ixgbe_detach(device_t, i #if 0 static int ixgbe_shutdown(device_t); #endif -#ifdef IXGBE_LEGACY_TX -static void ixgbe_start(struct ifnet *); -static void ixgbe_start_locked(struct tx_ring *, struct ifnet *); -#else /* ! IXGBE_LEGACY_TX */ -static int ixgbe_mq_start(struct ifnet *, struct mbuf *); -static int ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *); -static void ixgbe_qflush(struct ifnet *); -static void ixgbe_deferred_mq_start(void *, int); -#endif /* IXGBE_LEGACY_TX */ +static bool ixgbe_suspend(device_t, const pmf_qual_t *); +static bool ixgbe_resume(device_t, const pmf_qual_t *); static int ixgbe_ioctl(struct ifnet *, u_long, void *); static void ixgbe_ifstop(struct ifnet *, int); static int ixgbe_init(struct ifnet *); static void ixgbe_init_locked(struct adapter *); static void ixgbe_stop(void *); +static void ixgbe_add_media_types(struct adapter *); static void ixgbe_media_status(struct ifnet *, struct ifmediareq *); static int ixgbe_media_change(struct ifnet *); static void ixgbe_identify_hardware(struct adapter *); static int ixgbe_allocate_pci_resources(struct adapter *, const struct pci_attach_args *); -static void ixgbe_get_slot_info(struct ixgbe_hw *); +static void ixgbe_get_slot_info(struct adapter *); static int ixgbe_allocate_msix(struct adapter *, const struct pci_attach_args *); static int ixgbe_allocate_legacy(struct adapter *, const struct pci_attach_args *); -static int ixgbe_allocate_queues(struct adapter *); static int ixgbe_setup_msix(struct adapter *); static void ixgbe_free_pci_resources(struct adapter *); static void ixgbe_local_timer(void *); +static void ixgbe_local_timer1(void *); static int ixgbe_setup_interface(device_t, struct adapter *); +static void ixgbe_config_gpie(struct adapter *); +static void ixgbe_config_dmac(struct adapter *); +static void ixgbe_config_delay_values(struct adapter *); static void ixgbe_config_link(struct adapter *); +static void ixgbe_check_wol_support(struct adapter *); +static int ixgbe_setup_low_power_mode(struct adapter *); +static void ixgbe_rearm_queues(struct adapter *, u64); -static int ixgbe_allocate_transmit_buffers(struct tx_ring *); -static int ixgbe_setup_transmit_structures(struct adapter *); -static void ixgbe_setup_transmit_ring(struct tx_ring *); static void ixgbe_initialize_transmit_units(struct adapter *); -static void ixgbe_free_transmit_structures(struct adapter *); -static void ixgbe_free_transmit_buffers(struct tx_ring *); - -static int ixgbe_allocate_receive_buffers(struct rx_ring *); -static int ixgbe_setup_receive_structures(struct adapter *); -static int ixgbe_setup_receive_ring(struct rx_ring *); static void ixgbe_initialize_receive_units(struct adapter *); -static void ixgbe_free_receive_structures(struct adapter *); -static void ixgbe_free_receive_buffers(struct rx_ring *); -static void ixgbe_setup_hw_rsc(struct rx_ring *); +static void ixgbe_enable_rx_drop(struct adapter *); +static void ixgbe_disable_rx_drop(struct adapter *); +static void ixgbe_initialize_rss_mapping(struct adapter *); static void ixgbe_enable_intr(struct adapter *); static void ixgbe_disable_intr(struct adapter *); static void ixgbe_update_stats_counters(struct adapter *); -static void ixgbe_txeof(struct tx_ring *); -static bool ixgbe_rxeof(struct ix_queue *); -static void ixgbe_rx_checksum(u32, struct mbuf *, u32, - struct ixgbe_hw_stats *); static void ixgbe_set_promisc(struct adapter *); static void ixgbe_set_multi(struct adapter *); static void ixgbe_update_link_status(struct adapter *); -static void ixgbe_refresh_mbufs(struct rx_ring *, int); -static int ixgbe_xmit(struct tx_ring *, struct mbuf *); -static int ixgbe_set_flowcntl(SYSCTLFN_PROTO); -static int ixgbe_set_advertise(SYSCTLFN_PROTO); -static int ixgbe_set_thermal_test(SYSCTLFN_PROTO); -static int ixgbe_dma_malloc(struct adapter *, bus_size_t, - struct ixgbe_dma_alloc *, int); -static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *); -static int ixgbe_tx_ctx_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); -static int ixgbe_tso_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); static void ixgbe_set_ivar(struct adapter *, u8, u8, s8); static void ixgbe_configure_ivars(struct adapter *); static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); @@ -214,14 +196,31 @@ static void ixgbe_register_vlan(void *, static void ixgbe_unregister_vlan(void *, struct ifnet *, u16); #endif -static void ixgbe_add_hw_stats(struct adapter *adapter); - -static __inline void ixgbe_rx_discard(struct rx_ring *, int); -static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, - struct mbuf *, u32); - -static void ixgbe_enable_rx_drop(struct adapter *); -static void ixgbe_disable_rx_drop(struct adapter *); +static void ixgbe_add_device_sysctls(struct adapter *); +static void ixgbe_add_hw_stats(struct adapter *); +static int ixgbe_set_flowcntl(struct adapter *, int); +static int ixgbe_set_advertise(struct adapter *, int); + +/* Sysctl handlers */ +static void ixgbe_set_sysctl_value(struct adapter *, const char *, + const char *, int *, int); +static int ixgbe_sysctl_flowcntl(SYSCTLFN_PROTO); +static int ixgbe_sysctl_advertise(SYSCTLFN_PROTO); +static int ixgbe_sysctl_thermal_test(SYSCTLFN_PROTO); +static int ixgbe_sysctl_dmac(SYSCTLFN_PROTO); +static int ixgbe_sysctl_phy_temp(SYSCTLFN_PROTO); +static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTLFN_PROTO); +#ifdef IXGBE_DEBUG +static int ixgbe_sysctl_power_state(SYSCTLFN_PROTO); +static int ixgbe_sysctl_print_rss_config(SYSCTLFN_PROTO); +#endif +static int ixgbe_sysctl_wol_enable(SYSCTLFN_PROTO); +static int ixgbe_sysctl_wufc(SYSCTLFN_PROTO); +static int ixgbe_sysctl_eee_enable(SYSCTLFN_PROTO); +static int ixgbe_sysctl_eee_negotiated(SYSCTLFN_PROTO); +static int ixgbe_sysctl_eee_rx_lpi_status(SYSCTLFN_PROTO); +static int ixgbe_sysctl_eee_tx_lpi_status(SYSCTLFN_PROTO); +static int ixgbe_sysctl_eee_tx_lpi_delay(SYSCTLFN_PROTO); /* Support for pluggable optic modules */ static bool ixgbe_sfp_probe(struct adapter *); @@ -230,28 +229,35 @@ static void ixgbe_setup_optics(struct ad /* Legacy (single vector interrupt handler */ static int ixgbe_legacy_irq(void *); -#if defined(NETBSD_MSI_OR_MSIX) /* The MSI/X Interrupt handlers */ static int ixgbe_msix_que(void *); static int ixgbe_msix_link(void *); -#endif /* Software interrupts for deferred work */ static void ixgbe_handle_que(void *); static void ixgbe_handle_link(void *); static void ixgbe_handle_msf(void *); static void ixgbe_handle_mod(void *); +static void ixgbe_handle_phy(void *); const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *); static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *); #ifdef IXGBE_FDIR -static void ixgbe_atr(struct tx_ring *, struct mbuf *); static void ixgbe_reinit_fdir(void *, int); #endif -/* Missing shared code prototype */ -extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw); +#ifdef PCI_IOV +static void ixgbe_ping_all_vfs(struct adapter *); +static void ixgbe_handle_mbx(void *, int); +static int ixgbe_init_iov(device_t, u16, const nvlist_t *); +static void ixgbe_uninit_iov(device_t); +static int ixgbe_add_vf(device_t, u16, const nvlist_t *); +static void ixgbe_initialize_iov(struct adapter *); +static void ixgbe_recalculate_max_frame(struct adapter *); +static void ixgbe_init_vf(struct adapter *, struct ixgbe_vf *); +#endif /* PCI_IOV */ + /********************************************************************* * FreeBSD Device Interface Entry Points @@ -262,11 +268,11 @@ CFATTACH_DECL3_NEW(ixg, sizeof(struct ad DVF_DETACH_SHUTDOWN); #if 0 -devclass_t ixgbe_devclass; -DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0); +devclass_t ix_devclass; +DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0); -MODULE_DEPEND(ixgbe, pci, 1, 1, 1); -MODULE_DEPEND(ixgbe, ether, 1, 1, 1); +MODULE_DEPEND(ix, pci, 1, 1, 1); +MODULE_DEPEND(ix, ether, 1, 1, 1); #endif /* @@ -279,20 +285,38 @@ MODULE_DEPEND(ixgbe, ether, 1, 1, 1); ** is varied over time based on the ** traffic for that interrupt vector */ -static int ixgbe_enable_aim = TRUE; -#define SYSCTL_INT(__x, __y) -SYSCTL_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim); +static bool ixgbe_enable_aim = true; +#define SYSCTL_INT(_a1, _a2, _a3, _a4, _a5, _a6, _a7) +SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &ixgbe_enable_aim, 0, + "Enable adaptive interrupt moderation"); static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); -SYSCTL_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate); +SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, + &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); /* How many packets rxeof tries to clean at a time */ static int ixgbe_rx_process_limit = 256; -SYSCTL_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit); +SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, + &ixgbe_rx_process_limit, 0, + "Maximum number of received packets to process at a time," + "-1 means unlimited"); /* How many packets txeof tries to clean at a time */ static int ixgbe_tx_process_limit = 256; -SYSCTL_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit); +SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, + &ixgbe_tx_process_limit, 0, + "Maximum number of sent packets to process at a time," + "-1 means unlimited"); + +/* Flow control setting, default to full */ +static int ixgbe_flow_control = ixgbe_fc_full; +SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN, + &ixgbe_flow_control, 0, "Default flow control used for all adapters"); + +/* Advertise Speed, default to 0 (auto) */ +static int ixgbe_advertise_speed = 0; +SYSCTL_INT(_hw_ix, OID_AUTO, advertise_speed, CTLFLAG_RDTUN, + &ixgbe_advertise_speed, 0, "Default advertised speed for all adapters"); /* ** Smart speed setting, default to on @@ -308,18 +332,18 @@ static int ixgbe_smart_speed = ixgbe_sma * but this allows it to be forced off for testing. */ static int ixgbe_enable_msix = 1; -SYSCTL_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix); +SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, + "Enable MSI-X interrupts"); -#if defined(NETBSD_MSI_OR_MSIX) /* * Number of Queues, can be set to 0, * it then autoconfigures based on the * number of cpus with a max of 8. This * can be overriden manually here. */ -static int ixgbe_num_queues = 1; -SYSCTL_INT("hw.ixgbe.num_queues", &ixgbe_num_queues); -#endif +static int ixgbe_num_queues = 0; +SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0, + "Number of queues to configure, 0 indicates autoconfigure"); /* ** Number of TX descriptors per ring, @@ -327,11 +351,13 @@ SYSCTL_INT("hw.ixgbe.num_queues", &ixgbe ** the better performing choice. */ static int ixgbe_txd = PERFORM_TXD; -SYSCTL_INT("hw.ixgbe.txd", &ixgbe_txd); +SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0, + "Number of transmit descriptors per queue"); /* Number of RX descriptors per ring */ static int ixgbe_rxd = PERFORM_RXD; -SYSCTL_INT("hw.ixgbe.rxd", &ixgbe_rxd); +SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0, + "Number of receive descriptors per queue"); /* ** Defining this on will allow the use @@ -339,36 +365,13 @@ SYSCTL_INT("hw.ixgbe.rxd", &ixgbe_rxd); ** doing so you are on your own :) */ static int allow_unsupported_sfp = false; -SYSCTL_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp); - -/* -** HW RSC control: -** this feature only works with -** IPv4, and only on 82599 and later. -** Also this will cause IP forwarding to -** fail and that can't be controlled by -** the stack as LRO can. For all these -** reasons I've deemed it best to leave -** this off and not bother with a tuneable -** interface, this would need to be compiled -** to enable. -*/ -static bool ixgbe_rsc_enable = FALSE; +#define TUNABLE_INT(__x, __y) +TUNABLE_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp); /* Keep running tab on them for sanity check */ static int ixgbe_total_ports; #ifdef IXGBE_FDIR -/* -** For Flow Director: this is the -** number of TX packets we sample -** for the filter pool, this means -** every 20th packet will be probed. -** -** This feature can be disabled by -** setting this to 0. -*/ -static int atr_sample_rate = 20; /* ** Flow Director actually 'steals' ** part of the packet buffer as its @@ -391,6 +394,15 @@ static int fdir_pballoc = 1; #include #endif /* DEV_NETMAP */ +#ifdef NET_MPSAFE +#define IXGBE_MPSAFE 1 +#define IXGBE_CALLOUT_FLAGS CALLOUT_MPSAFE +#define IXGBE_SOFTINFT_FLAGS SOFTINT_MPSAFE +#else +#define IXGBE_CALLOUT_FLAGS 0 +#define IXGBE_SOFTINFT_FLAGS 0 +#endif + /********************************************************************* * Device identification routine * @@ -414,7 +426,7 @@ ixgbe_lookup(const struct pci_attach_arg pcireg_t subid; ixgbe_vendor_info_t *ent; - INIT_DEBUGOUT("ixgbe_probe: begin"); + INIT_DEBUGOUT("ixgbe_lookup: begin"); if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID) return NULL; @@ -437,62 +449,6 @@ ixgbe_lookup(const struct pci_attach_arg return NULL; } - -static void -ixgbe_sysctl_attach(struct adapter *adapter) -{ - struct sysctllog **log; - const struct sysctlnode *rnode, *cnode; - device_t dev; - - dev = adapter->dev; - log = &adapter->sysctllog; - - if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) { - aprint_error_dev(dev, "could not create sysctl root\n"); - return; - } - - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READONLY, CTLTYPE_INT, - "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"), - NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0) - aprint_error_dev(dev, "could not create sysctl\n"); - - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READONLY, CTLTYPE_INT, - "num_queues", SYSCTL_DESCR("Number of queues"), - NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0) - aprint_error_dev(dev, "could not create sysctl\n"); - - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READWRITE, CTLTYPE_INT, - "fc", SYSCTL_DESCR("Flow Control"), - ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) - aprint_error_dev(dev, "could not create sysctl\n"); - - /* XXX This is an *instance* sysctl controlling a *global* variable. - * XXX It's that way in the FreeBSD driver that this derives from. - */ - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READWRITE, CTLTYPE_INT, - "enable_aim", SYSCTL_DESCR("Interrupt Moderation"), - NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0) - aprint_error_dev(dev, "could not create sysctl\n"); - - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READWRITE, CTLTYPE_INT, - "advertise_speed", SYSCTL_DESCR("Link Speed"), - ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) - aprint_error_dev(dev, "could not create sysctl\n"); - - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READWRITE, CTLTYPE_INT, - "ts", SYSCTL_DESCR("Thermal Test"), - ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) - aprint_error_dev(dev, "could not create sysctl\n"); -} - /********************************************************************* * Device initialization routine * @@ -509,20 +465,24 @@ ixgbe_attach(device_t parent, device_t d struct adapter *adapter; struct ixgbe_hw *hw; int error = -1; - u16 csum; + u16 csum, high, low; u32 ctrl_ext; ixgbe_vendor_info_t *ent; - const struct pci_attach_args *pa = aux; + struct pci_attach_args *pa = aux; + const char *str; INIT_DEBUGOUT("ixgbe_attach: begin"); /* Allocate, clear, and link in our adapter structure */ adapter = device_private(dev); - adapter->dev = adapter->osdep.dev = dev; + adapter->dev = dev; hw = &adapter->hw; adapter->osdep.pc = pa->pa_pc; adapter->osdep.tag = pa->pa_tag; - adapter->osdep.dmat = pa->pa_dmat; + if (pci_dma64_available(pa)) + adapter->osdep.dmat = pa->pa_dmat64; + else + adapter->osdep.dmat = pa->pa_dmat; adapter->osdep.attached = false; ent = ixgbe_lookup(pa); @@ -532,19 +492,54 @@ ixgbe_attach(device_t parent, device_t d aprint_normal(": %s, Version - %s\n", ixgbe_strings[ent->index], ixgbe_driver_version); +#ifdef DEV_NETMAP + adapter->init_locked = ixgbe_init_locked; + adapter->stop_locked = ixgbe_stop; +#endif + /* Core Lock Init*/ IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev)); - /* SYSCTL APIs */ - - ixgbe_sysctl_attach(adapter); - /* Set up the timer callout */ - callout_init(&adapter->timer, 0); + callout_init(&adapter->timer, IXGBE_CALLOUT_FLAGS); /* Determine hardware revision */ ixgbe_identify_hardware(adapter); + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + str = "82598EB"; + break; + case ixgbe_mac_82599EB: + str = "82599EB"; + break; + case ixgbe_mac_82599_vf: + str = "82599 VF"; + break; + case ixgbe_mac_X540: + str = "X540"; + break; + case ixgbe_mac_X540_vf: + str = "X540 VF"; + break; + case ixgbe_mac_X550: + str = "X550"; + break; + case ixgbe_mac_X550EM_x: + str = "X550EM"; + break; + case ixgbe_mac_X550_vf: + str = "X550 VF"; + break; + case ixgbe_mac_X550EM_x_vf: + str = "X550EM X VF"; + break; + default: + str = "Unknown"; + break; + } + aprint_normal_dev(dev, "device %s\n", str); + /* Do base PCI setup - map BAR0 */ if (ixgbe_allocate_pci_resources(adapter, pa)) { aprint_error_dev(dev, "Allocation of PCI resources failed\n"); @@ -552,6 +547,15 @@ ixgbe_attach(device_t parent, device_t d goto err_out; } + /* Sysctls for limiting the amount of work done in the taskqueues */ + ixgbe_set_sysctl_value(adapter, "rx_processing_limit", + "max number of rx packets to process", + &adapter->rx_process_limit, ixgbe_rx_process_limit); + + ixgbe_set_sysctl_value(adapter, "tx_processing_limit", + "max number of tx packets to process", + &adapter->tx_process_limit, ixgbe_tx_process_limit); + /* Do descriptor calc and sanity checks */ if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) { @@ -565,7 +569,7 @@ ixgbe_attach(device_t parent, device_t d ** system mbuf allocation. Tuning nmbclusters ** can alleviate this. */ - if (nmbclusters > 0 ) { + if (nmbclusters > 0) { int s; s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports; if (s > nmbclusters) { @@ -589,7 +593,7 @@ ixgbe_attach(device_t parent, device_t d } /* Allocate multicast array memory. */ - adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS * + adapter->mta = malloc(sizeof(*adapter->mta) * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { aprint_error_dev(dev, "Cannot allocate multicast setup array\n"); @@ -609,45 +613,52 @@ ixgbe_attach(device_t parent, device_t d adapter->sfp_probe = TRUE; error = 0; } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { - aprint_error_dev(dev,"Unsupported SFP+ module detected!\n"); + aprint_error_dev(dev, "Unsupported SFP+ module detected!\n"); error = EIO; goto err_late; } else if (error) { - aprint_error_dev(dev,"Unable to initialize the shared code\n"); + aprint_error_dev(dev, "Unable to initialize the shared code\n"); error = EIO; goto err_late; } /* Make sure we have a good EEPROM before we read from it */ if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) { - aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n"); + aprint_error_dev(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } + /* Print the TrackID */ + hw->eeprom.ops.read(hw, IXGBE_TRACKID_H, &high); + hw->eeprom.ops.read(hw, IXGBE_TRACKID_L, &low); + aprint_normal_dev(dev, "TrackID %08x\n", ((uint32_t)high << 16) | low); + error = ixgbe_init_hw(hw); switch (error) { case IXGBE_ERR_EEPROM_VERSION: aprint_error_dev(dev, "This device is a pre-production adapter/" "LOM. Please be aware there may be issues associated " - "with your hardware.\n If you are experiencing problems " + "with your hardware.\nIf you are experiencing problems " "please contact your Intel or hardware representative " "who provided you with this hardware.\n"); break; case IXGBE_ERR_SFP_NOT_SUPPORTED: - aprint_error_dev(dev,"Unsupported SFP+ Module\n"); + aprint_error_dev(dev, "Unsupported SFP+ Module\n"); error = EIO; - aprint_error_dev(dev,"Hardware Initialization Failure\n"); + aprint_error_dev(dev, "Hardware Initialization Failure\n"); goto err_late; case IXGBE_ERR_SFP_NOT_PRESENT: - device_printf(dev,"No SFP+ Module found\n"); + aprint_error_dev(dev, "No SFP+ Module found\n"); /* falls thru */ default: break; } - /* Detect and set physical type */ - ixgbe_setup_optics(adapter); + /* hw.ix defaults init */ + ixgbe_set_advertise(adapter, ixgbe_advertise_speed); + ixgbe_set_flowcntl(adapter, ixgbe_flow_control); + adapter->enable_aim = ixgbe_enable_aim; error = -1; if ((adapter->msix > 1) && (ixgbe_enable_msix)) @@ -657,6 +668,12 @@ ixgbe_attach(device_t parent, device_t d if (error) goto err_late; + /* Enable the optics for 82599 SFP+ fiber */ + ixgbe_enable_tx_laser(hw); + + /* Enable power to the phy. */ + ixgbe_set_phy_power(hw, TRUE); + /* Setup OS specific network interface */ if (ixgbe_setup_interface(dev, adapter) != 0) goto err_late; @@ -664,27 +681,61 @@ ixgbe_attach(device_t parent, device_t d /* Initialize statistics */ ixgbe_update_stats_counters(adapter); - /* - ** Check PCIE slot type/speed/width - */ - ixgbe_get_slot_info(hw); + /* Check PCIE slot type/speed/width */ + ixgbe_get_slot_info(adapter); + + /* Set an initial default flow control & dmac value */ + adapter->fc = ixgbe_fc_full; + adapter->dmac = 0; + adapter->eee_enabled = 0; + +#ifdef PCI_IOV + if ((hw->mac.type != ixgbe_mac_82598EB) && (adapter->msix > 1)) { + nvlist_t *pf_schema, *vf_schema; + + hw->mbx.ops.init_params(hw); + pf_schema = pci_iov_schema_alloc_node(); + vf_schema = pci_iov_schema_alloc_node(); + pci_iov_schema_add_unicast_mac(vf_schema, "mac-addr", 0, NULL); + pci_iov_schema_add_bool(vf_schema, "mac-anti-spoof", + IOV_SCHEMA_HASDEFAULT, TRUE); + pci_iov_schema_add_bool(vf_schema, "allow-set-mac", + IOV_SCHEMA_HASDEFAULT, FALSE); + pci_iov_schema_add_bool(vf_schema, "allow-promisc", + IOV_SCHEMA_HASDEFAULT, FALSE); + error = pci_iov_attach(dev, pf_schema, vf_schema); + if (error != 0) { + device_printf(dev, + "Error %d setting up SR-IOV\n", error); + } + } +#endif /* PCI_IOV */ - /* Set an initial default flow control value */ - adapter->fc = ixgbe_fc_full; + /* Check for certain supported features */ + ixgbe_check_wol_support(adapter); + + /* Add sysctls */ + ixgbe_add_device_sysctls(adapter); + ixgbe_add_hw_stats(adapter); /* let hardware know driver is loaded */ ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); - ixgbe_add_hw_stats(adapter); - #ifdef DEV_NETMAP ixgbe_netmap_attach(adapter); #endif /* DEV_NETMAP */ + + if (pmf_device_register(dev, ixgbe_suspend, ixgbe_resume)) + pmf_class_network_register(dev, adapter->ifp); + else + aprint_error_dev(dev, "couldn't establish power handler\n"); + INIT_DEBUGOUT("ixgbe_attach: end"); adapter->osdep.attached = true; return; + err_late: ixgbe_free_transmit_structures(adapter); ixgbe_free_receive_structures(adapter); @@ -695,7 +746,6 @@ err_out: if (adapter->mta != NULL) free(adapter->mta, M_DEVBUF); return; - } /********************************************************************* @@ -712,10 +762,10 @@ static int ixgbe_detach(device_t dev, int flags) { struct adapter *adapter = device_private(dev); - struct rx_ring *rxr = adapter->rx_rings; - struct ixgbe_hw_stats *stats = &adapter->stats; struct ix_queue *que = adapter->queues; + struct rx_ring *rxr = adapter->rx_rings; struct tx_ring *txr = adapter->tx_rings; + struct ixgbe_hw_stats *stats = &adapter->stats.pf; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_detach: begin"); @@ -734,13 +784,24 @@ ixgbe_detach(device_t dev, int flags) } #endif +#ifdef PCI_IOV + if (pci_iov_detach(dev) != 0) { + device_printf(dev, "SR-IOV in use; detach first.\n"); + return (EBUSY); + } +#endif /* PCI_IOV */ + + pmf_device_deregister(dev); + + ether_ifdetach(adapter->ifp); + /* Stop the adapter */ IXGBE_CORE_LOCK(adapter); - ixgbe_stop(adapter); + ixgbe_setup_low_power_mode(adapter); IXGBE_CORE_UNLOCK(adapter); for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { #ifndef IXGBE_LEGACY_TX - softint_disestablish(txr->txq_si); + softint_disestablish(txr->txr_si); #endif softint_disestablish(que->que_si); } @@ -749,6 +810,10 @@ ixgbe_detach(device_t dev, int flags) softint_disestablish(adapter->link_si); softint_disestablish(adapter->mod_si); softint_disestablish(adapter->msf_si); +#ifdef PCI_IOV + softint_disestablish(adapter->mbx_si); +#endif + softint_disestablish(adapter->phy_si); #ifdef IXGBE_FDIR softint_disestablish(adapter->fdir_si); #endif @@ -758,7 +823,6 @@ ixgbe_detach(device_t dev, int flags) ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); - ether_ifdetach(adapter->ifp); callout_halt(&adapter->timer, NULL); #ifdef DEV_NETMAP netmap_detach(adapter->ifp); @@ -768,15 +832,13 @@ ixgbe_detach(device_t dev, int flags) bus_generic_detach(dev); #endif if_detach(adapter->ifp); + if_percpuq_destroy(adapter->ipq); sysctl_teardown(&adapter->sysctllog); evcnt_detach(&adapter->handleq); evcnt_detach(&adapter->req); - evcnt_detach(&adapter->morerx); - evcnt_detach(&adapter->moretx); - evcnt_detach(&adapter->txloops); evcnt_detach(&adapter->efbig_tx_dma_setup); - evcnt_detach(&adapter->m_defrag_failed); + evcnt_detach(&adapter->mbuf_defrag_failed); evcnt_detach(&adapter->efbig2_tx_dma_setup); evcnt_detach(&adapter->einval_tx_dma_setup); evcnt_detach(&adapter->other_tx_dma_setup); @@ -788,26 +850,30 @@ ixgbe_detach(device_t dev, int flags) txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { + evcnt_detach(&adapter->queues[i].irqs); evcnt_detach(&txr->no_desc_avail); evcnt_detach(&txr->total_packets); evcnt_detach(&txr->tso_tx); +#ifndef IXGBE_LEGACY_TX + evcnt_detach(&txr->pcq_drops); +#endif - if (i < __arraycount(adapter->stats.mpc)) { - evcnt_detach(&adapter->stats.mpc[i]); + if (i < __arraycount(stats->mpc)) { + evcnt_detach(&stats->mpc[i]); } - if (i < __arraycount(adapter->stats.pxontxc)) { - evcnt_detach(&adapter->stats.pxontxc[i]); - evcnt_detach(&adapter->stats.pxonrxc[i]); - evcnt_detach(&adapter->stats.pxofftxc[i]); - evcnt_detach(&adapter->stats.pxoffrxc[i]); - evcnt_detach(&adapter->stats.pxon2offc[i]); - } - if (i < __arraycount(adapter->stats.qprc)) { - evcnt_detach(&adapter->stats.qprc[i]); - evcnt_detach(&adapter->stats.qptc[i]); - evcnt_detach(&adapter->stats.qbrc[i]); - evcnt_detach(&adapter->stats.qbtc[i]); - evcnt_detach(&adapter->stats.qprdc[i]); + if (i < __arraycount(stats->pxontxc)) { + evcnt_detach(&stats->pxontxc[i]); + evcnt_detach(&stats->pxonrxc[i]); + evcnt_detach(&stats->pxofftxc[i]); + evcnt_detach(&stats->pxoffrxc[i]); + evcnt_detach(&stats->pxon2offc[i]); + } + if (i < __arraycount(stats->qprc)) { + evcnt_detach(&stats->qprc[i]); + evcnt_detach(&stats->qptc[i]); + evcnt_detach(&stats->qbrc[i]); + evcnt_detach(&stats->qbtc[i]); + evcnt_detach(&stats->qprdc[i]); } evcnt_detach(&rxr->rx_packets); @@ -815,7 +881,6 @@ ixgbe_detach(device_t dev, int flags) evcnt_detach(&rxr->rx_copies); evcnt_detach(&rxr->no_jmbuf); evcnt_detach(&rxr->rx_discarded); - evcnt_detach(&rxr->rx_irq); } evcnt_detach(&stats->ipcs); evcnt_detach(&stats->l4cs); @@ -888,242 +953,72 @@ static int ixgbe_shutdown(device_t dev) { struct adapter *adapter = device_private(dev); + int error = 0; + + INIT_DEBUGOUT("ixgbe_shutdown: begin"); + IXGBE_CORE_LOCK(adapter); - ixgbe_stop(adapter); + error = ixgbe_setup_low_power_mode(adapter); IXGBE_CORE_UNLOCK(adapter); - return (0); + + return (error); } #endif - -#ifdef IXGBE_LEGACY_TX -/********************************************************************* - * Transmit entry point - * - * ixgbe_start is called by the stack to initiate a transmit. - * The driver will remain in this routine as long as there are - * packets to transmit and transmit resources are available. - * In case resources are not available stack is notified and - * the packet is requeued. - **********************************************************************/ - -static void -ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) +/** + * Methods for going from: + * D0 -> D3: ixgbe_suspend + * D3 -> D0: ixgbe_resume + */ +static bool +ixgbe_suspend(device_t dev, const pmf_qual_t *qual) { - int rc; - struct mbuf *m_head; - struct adapter *adapter = txr->adapter; - - IXGBE_TX_LOCK_ASSERT(txr); - - if ((ifp->if_flags & IFF_RUNNING) == 0) - return; - if (!adapter->link_active) - return; - - while (!IFQ_IS_EMPTY(&ifp->if_snd)) { - if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) - break; - - IFQ_POLL(&ifp->if_snd, m_head); - if (m_head == NULL) - break; + struct adapter *adapter = device_private(dev); + int error = 0; - if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) { - break; - } - IFQ_DEQUEUE(&ifp->if_snd, m_head); - if (rc == EFBIG) { - struct mbuf *mtmp; + INIT_DEBUGOUT("ixgbe_suspend: begin"); - if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) { - m_head = mtmp; - rc = ixgbe_xmit(txr, m_head); - if (rc != 0) - adapter->efbig2_tx_dma_setup.ev_count++; - } else - adapter->m_defrag_failed.ev_count++; - } - if (rc != 0) { - m_freem(m_head); - continue; - } + IXGBE_CORE_LOCK(adapter); - /* Send a copy of the frame to the BPF listener */ - bpf_mtap(ifp, m_head); + error = ixgbe_setup_low_power_mode(adapter); - /* Set watchdog on */ - getmicrotime(&txr->watchdog_time); - txr->queue_status = IXGBE_QUEUE_WORKING; + IXGBE_CORE_UNLOCK(adapter); - } - return; + return (error); } -/* - * Legacy TX start - called by the stack, this - * always uses the first tx ring, and should - * not be used with multiqueue tx enabled. - */ -static void -ixgbe_start(struct ifnet *ifp) +static bool +ixgbe_resume(device_t dev, const pmf_qual_t *qual) { - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; + struct adapter *adapter = device_private(dev); + struct ifnet *ifp = adapter->ifp; + struct ixgbe_hw *hw = &adapter->hw; + u32 wus; - if (ifp->if_flags & IFF_RUNNING) { - IXGBE_TX_LOCK(txr); - ixgbe_start_locked(txr, ifp); - IXGBE_TX_UNLOCK(txr); - } - return; -} + INIT_DEBUGOUT("ixgbe_resume: begin"); -#else /* ! IXGBE_LEGACY_TX */ + IXGBE_CORE_LOCK(adapter); -/* -** Multiqueue Transmit driver -** -*/ -static int -ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) -{ - struct adapter *adapter = ifp->if_softc; - struct ix_queue *que; - struct tx_ring *txr; - int i, err = 0; -#ifdef RSS - uint32_t bucket_id; -#endif + /* Read & clear WUS register */ + wus = IXGBE_READ_REG(hw, IXGBE_WUS); + if (wus) + device_printf(dev, "Woken up by (WUS): %#010x\n", + IXGBE_READ_REG(hw, IXGBE_WUS)); + IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); + /* And clear WUFC until next low-power transition */ + IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0); - /* Which queue to use */ /* - * When doing RSS, map it to the same outbound queue - * as the incoming flow would be mapped to. - * - * If everything is setup correctly, it should be the - * same bucket that the current CPU we're on is. + * Required after D3->D0 transition; + * will re-advertise all previous advertised speeds */ - if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { -#ifdef RSS - if (rss_hash2bucket(m->m_pkthdr.flowid, - M_HASHTYPE_GET(m), &bucket_id) == 0) { - /* XXX TODO: spit out something if bucket_id > num_queues? */ - i = bucket_id % adapter->num_queues; - } else { -#endif - i = m->m_pkthdr.flowid % adapter->num_queues; -#ifdef RSS - } -#endif - } else { - i = curcpu % adapter->num_queues; - } - - txr = &adapter->tx_rings[i]; - que = &adapter->queues[i]; - - err = drbr_enqueue(ifp, txr->br, m); - if (err) - return (err); - if (IXGBE_TX_TRYLOCK(txr)) { - ixgbe_mq_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); - } else - softint_schedule(txr->txq_si); - - return (0); -} - -static int -ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct mbuf *next; - int enqueued = 0, err = 0; - - if (((ifp->if_flags & IFF_RUNNING) == 0) || - adapter->link_active == 0) - return (ENETDOWN); - - /* Process the queue */ -#if __FreeBSD_version < 901504 - next = drbr_dequeue(ifp, txr->br); - while (next != NULL) { - if ((err = ixgbe_xmit(txr, &next)) != 0) { - if (next != NULL) - err = drbr_enqueue(ifp, txr->br, next); -#else - while ((next = drbr_peek(ifp, txr->br)) != NULL) { - if ((err = ixgbe_xmit(txr, &next)) != 0) { - if (next == NULL) { - drbr_advance(ifp, txr->br); - } else { - drbr_putback(ifp, txr->br, next); - } -#endif - break; - } -#if __FreeBSD_version >= 901504 - drbr_advance(ifp, txr->br); -#endif - enqueued++; - /* Send a copy of the frame to the BPF listener */ - bpf_mtap(ifp, next); - if ((ifp->if_flags & IFF_RUNNING) == 0) - break; -#if __FreeBSD_version < 901504 - next = drbr_dequeue(ifp, txr->br); -#endif - } - - if (enqueued > 0) { - /* Set watchdog on */ - txr->queue_status = IXGBE_QUEUE_WORKING; - getmicrotime(&txr->watchdog_time); - } - - if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) - ixgbe_txeof(txr); - - return (err); -} - -/* - * Called from a taskqueue to drain queued transmit packets. - */ -static void -ixgbe_deferred_mq_start(void *arg, int pending) -{ - struct tx_ring *txr = arg; - struct adapter *adapter = txr->adapter; - struct ifnet *ifp = adapter->ifp; + if (ifp->if_flags & IFF_UP) + ixgbe_init_locked(adapter); - IXGBE_TX_LOCK(txr); - if (!drbr_empty(ifp, txr->br)) - ixgbe_mq_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); -} - -/* -** Flush all ring buffers -*/ -static void -ixgbe_qflush(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - struct mbuf *m; + IXGBE_CORE_UNLOCK(adapter); - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IXGBE_TX_LOCK(txr); - while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) - m_freem(m); - IXGBE_TX_UNLOCK(txr); - } - if_qflush(ifp); + return true; } -#endif /* IXGBE_LEGACY_TX */ static int ixgbe_ifflags_cb(struct ethercom *ec) @@ -1163,7 +1058,6 @@ static int ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data) { struct adapter *adapter = ifp->if_softc; - struct ixgbe_hw *hw = &adapter->hw; struct ifcapreq *ifcr = data; struct ifreq *ifr = data; int error = 0; @@ -1189,8 +1083,37 @@ ixgbe_ioctl(struct ifnet * ifp, u_long c case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); break; +#ifdef __NetBSD__ + case SIOCINITIFADDR: + IOCTL_DEBUGOUT("ioctl: SIOCINITIFADDR"); + break; + case SIOCGIFFLAGS: + IOCTL_DEBUGOUT("ioctl: SIOCGIFFLAGS"); + break; + case SIOCGIFAFLAG_IN: + IOCTL_DEBUGOUT("ioctl: SIOCGIFAFLAG_IN"); + break; + case SIOCGIFADDR: + IOCTL_DEBUGOUT("ioctl: SIOCGIFADDR"); + break; + case SIOCGIFMTU: + IOCTL_DEBUGOUT("ioctl: SIOCGIFMTU (Get Interface MTU)"); + break; + case SIOCGIFCAP: + IOCTL_DEBUGOUT("ioctl: SIOCGIFCAP (Get IF cap)"); + break; + case SIOCGETHERCAP: + IOCTL_DEBUGOUT("ioctl: SIOCGETHERCAP (Get ethercap)"); + break; + case SIOCGLIFADDR: + IOCTL_DEBUGOUT("ioctl: SIOCGLIFADDR (Get Interface addr)"); + break; + case SIOCAIFADDR: + IOCTL_DEBUGOUT("ioctl: SIOCAIFADDR (add/chg IF alias)"); + break; +#endif default: - IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); + IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)", (int)command); break; } @@ -1200,6 +1123,7 @@ ixgbe_ioctl(struct ifnet * ifp, u_long c return ifmedia_ioctl(ifp, ifr, &adapter->media, command); case SIOCGI2C: { + struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_i2c_req i2c; IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)"); error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); @@ -1239,6 +1163,9 @@ ixgbe_ioctl(struct ifnet * ifp, u_long c else if (command == SIOCSIFCAP || command == SIOCSIFMTU) { IXGBE_CORE_LOCK(adapter); ixgbe_init_locked(adapter); +#ifdef PCI_IOV + ixgbe_recalculate_max_frame(adapter); +#endif IXGBE_CORE_UNLOCK(adapter); } else if (command == SIOCADDMULTI || command == SIOCDELMULTI) { /* @@ -1257,10 +1184,23 @@ ixgbe_ioctl(struct ifnet * ifp, u_long c return error; } -/********************************************************************* - * Init entry point +/* + * Set the various hardware offload abilities. * - * This routine is used in two ways. It is used by the stack as + * This takes the ifnet's if_capenable flags (e.g. set by the user using + * ifconfig) and indicates to the OS via the ifnet's if_hwassist field what + * mbuf offload flags the driver will understand. + */ +static void +ixgbe_set_if_hwassist(struct adapter *adapter) +{ + /* XXX */ +} + +/********************************************************************* + * Init entry point + * + * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. @@ -1275,13 +1215,20 @@ ixgbe_init_locked(struct adapter *adapte struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; - u32 k, txdctl, mhadd, gpie; + struct tx_ring *txr; + struct rx_ring *rxr; + u32 txdctl, mhadd; u32 rxdctl, rxctrl; + int err = 0; +#ifdef PCI_IOV + enum ixgbe_iov_mode mode; +#endif /* XXX check IFF_UP and IFF_RUNNING, power-saving state! */ KASSERT(mutex_owned(&adapter->core_mtx)); INIT_DEBUGOUT("ixgbe_init_locked: begin"); + hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); callout_stop(&adapter->timer); @@ -1290,44 +1237,52 @@ ixgbe_init_locked(struct adapter *adapte adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; +#ifdef PCI_IOV + mode = ixgbe_get_iov_mode(adapter); + adapter->pool = ixgbe_max_vfs(mode); + /* Queue indices may change with IOV mode */ + for (int i = 0; i < adapter->num_queues; i++) { + adapter->rx_rings[i].me = ixgbe_pf_que_index(mode, i); + adapter->tx_rings[i].me = ixgbe_pf_que_index(mode, i); + } +#endif /* reprogram the RAR[0] in case user changed it. */ - ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); + ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ - memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl), + memcpy(hw->mac.addr, CLLADDR(ifp->if_sadl), IXGBE_ETH_LENGTH_OF_ADDRESS); - ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1); + ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, 1); hw->addr_ctrl.rar_used_count = 1; + /* Set hardware offload abilities from ifnet flags */ + ixgbe_set_if_hwassist(adapter); + /* Prepare transmit descriptors and buffers */ if (ixgbe_setup_transmit_structures(adapter)) { - device_printf(dev,"Could not setup transmit structures\n"); + device_printf(dev, "Could not setup transmit structures\n"); ixgbe_stop(adapter); return; } ixgbe_init_hw(hw); +#ifdef PCI_IOV + ixgbe_initialize_iov(adapter); +#endif ixgbe_initialize_transmit_units(adapter); /* Setup Multicast table */ ixgbe_set_multi(adapter); - /* - ** Determine the correct mbuf pool - ** for doing jumbo frames - */ - if (adapter->max_frame_size <= 2048) + /* Determine the correct mbuf pool, based on frame size */ + if (adapter->max_frame_size <= MCLBYTES) adapter->rx_mbuf_sz = MCLBYTES; - else if (adapter->max_frame_size <= 4096) - adapter->rx_mbuf_sz = MJUMPAGESIZE; - else if (adapter->max_frame_size <= 9216) - adapter->rx_mbuf_sz = MJUM9BYTES; else - adapter->rx_mbuf_sz = MJUM16BYTES; + adapter->rx_mbuf_sz = MJUMPAGESIZE; /* Prepare receive descriptors and buffers */ if (ixgbe_setup_receive_structures(adapter)) { - device_printf(dev,"Could not setup receive structures\n"); + device_printf(dev, "Could not setup receive structures\n"); ixgbe_stop(adapter); return; } @@ -1335,29 +1290,12 @@ ixgbe_init_locked(struct adapter *adapte /* Configure RX settings */ ixgbe_initialize_receive_units(adapter); - gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE); - - /* Enable Fan Failure Interrupt */ - gpie |= IXGBE_SDP1_GPIEN; - - /* Add for Module detection */ - if (hw->mac.type == ixgbe_mac_82599EB) - gpie |= IXGBE_SDP2_GPIEN; - - /* Thermal Failure Detection */ - if (hw->mac.type == ixgbe_mac_X540) - gpie |= IXGBE_SDP0_GPIEN; - - if (adapter->msix > 1) { - /* Enable Enhanced MSIX mode */ - gpie |= IXGBE_GPIE_MSIX_MODE; - gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | - IXGBE_GPIE_OCD; - } - IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); + /* Enable SDP & MSIX interrupts based on adapter */ + ixgbe_config_gpie(adapter); /* Set MTU size */ if (ifp->if_mtu > ETHERMTU) { + /* aka IXGBE_MAXFRS on 82599 and newer */ mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); mhadd &= ~IXGBE_MHADD_MFS_MASK; mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; @@ -1365,9 +1303,9 @@ ixgbe_init_locked(struct adapter *adapte } /* Now enable all the queues */ - for (int i = 0; i < adapter->num_queues; i++) { - txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i)); + txr = &adapter->tx_rings[i]; + txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me)); txdctl |= IXGBE_TXDCTL_ENABLE; /* Set WTHRESH to 8, burst writeback */ txdctl |= (8 << 16); @@ -1379,11 +1317,12 @@ ixgbe_init_locked(struct adapter *adapte * Prefetching enables tx line rate even with 1 queue. */ txdctl |= (32 << 0) | (1 << 8); - IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl); + IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl); } - for (int i = 0; i < adapter->num_queues; i++) { - rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); + for (int i = 0, j = 0; i < adapter->num_queues; i++) { + rxr = &adapter->rx_rings[i]; + rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); if (hw->mac.type == ixgbe_mac_82598EB) { /* ** PTHRESH = 21 @@ -1394,12 +1333,9 @@ ixgbe_init_locked(struct adapter *adapte rxdctl |= 0x080420; } rxdctl |= IXGBE_RXDCTL_ENABLE; - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl); - /* XXX I don't trust this loop, and I don't trust the - * XXX memory barrier. What is this meant to do? --dyoung - */ - for (k = 0; k < 10; k++) { - if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) & + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), rxdctl); + for (; j < 10; j++) { + if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)) & IXGBE_RXDCTL_ENABLE) break; else @@ -1428,10 +1364,10 @@ ixgbe_init_locked(struct adapter *adapte struct netmap_kring *kring = &na->rx_rings[i]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); - IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t); + IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), t); } else #endif /* DEV_NETMAP */ - IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1); + IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), adapter->num_rx_desc - 1); } /* Enable Receive engine */ @@ -1470,11 +1406,11 @@ ixgbe_init_locked(struct adapter *adapte #endif /* - ** Check on any SFP devices that - ** need to be kick-started - */ + * Check on any SFP devices that + * need to be kick-started + */ if (hw->phy.type == ixgbe_phy_none) { - int err = hw->phy.ops.identify(hw); + err = hw->phy.ops.identify(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); @@ -1483,46 +1419,45 @@ ixgbe_init_locked(struct adapter *adapte } /* Set moderation on the Link interrupt */ - IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR); + IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR); + + /* Configure Energy Efficient Ethernet for supported devices */ + if (hw->mac.ops.setup_eee) { + err = hw->mac.ops.setup_eee(hw, adapter->eee_enabled); + if (err) + device_printf(dev, "Error setting up EEE: %d\n", err); + } + + /* Enable power to the phy. */ + ixgbe_set_phy_power(hw, TRUE); /* Config/Enable Link */ ixgbe_config_link(adapter); /* Hardware Packet Buffer & Flow Control setup */ - { - u32 rxpb, frame, size, tmp; - - frame = adapter->max_frame_size; + ixgbe_config_delay_values(adapter); - /* Calculate High Water */ - if (hw->mac.type == ixgbe_mac_X540) - tmp = IXGBE_DV_X540(frame, frame); - else - tmp = IXGBE_DV(frame, frame); - size = IXGBE_BT2KB(tmp); - rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; - hw->fc.high_water[0] = rxpb - size; - - /* Now calculate Low Water */ - if (hw->mac.type == ixgbe_mac_X540) - tmp = IXGBE_LOW_DV_X540(frame); - else - tmp = IXGBE_LOW_DV(frame); - hw->fc.low_water[0] = IXGBE_BT2KB(tmp); - - hw->fc.requested_mode = adapter->fc; - hw->fc.pause_time = IXGBE_FC_PAUSE; - hw->fc.send_xon = TRUE; - } /* Initialize the FC settings */ ixgbe_start_hw(hw); /* Set up VLAN support and filter */ ixgbe_setup_vlan_hw_support(adapter); + /* Setup DMA Coalescing */ + ixgbe_config_dmac(adapter); + /* And now turn on interrupts */ ixgbe_enable_intr(adapter); +#ifdef PCI_IOV + /* Enable the use of the MBX by the VF's */ + { + u32 reg = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); + reg |= IXGBE_CTRL_EXT_PFRSTD; + IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, reg); + } +#endif + /* Now inform the stack we're ready */ ifp->if_flags |= IFF_RUNNING; @@ -1540,6 +1475,91 @@ ixgbe_init(struct ifnet *ifp) return 0; /* XXX ixgbe_init_locked cannot fail? really? */ } +static void +ixgbe_config_gpie(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 gpie; + + gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); + + /* Fan Failure Interrupt */ + if (hw->device_id == IXGBE_DEV_ID_82598AT) + gpie |= IXGBE_SDP1_GPIEN; + + /* + * Module detection (SDP2) + * Media ready (SDP1) + */ + if (hw->mac.type == ixgbe_mac_82599EB) { + gpie |= IXGBE_SDP2_GPIEN; + if (hw->device_id != IXGBE_DEV_ID_82599_QSFP_SF_QP) + gpie |= IXGBE_SDP1_GPIEN; + } + + /* + * Thermal Failure Detection (X540) + * Link Detection (X552 SFP+, X552/X557-AT) + */ + if (hw->mac.type == ixgbe_mac_X540 || + hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || + hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) + gpie |= IXGBE_SDP0_GPIEN_X540; + + if (adapter->msix > 1) { + /* Enable Enhanced MSIX mode */ + gpie |= IXGBE_GPIE_MSIX_MODE; + gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | + IXGBE_GPIE_OCD; + } + + IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); + return; +} + +/* + * Requires adapter->max_frame_size to be set. + */ +static void +ixgbe_config_delay_values(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 rxpb, frame, size, tmp; + + frame = adapter->max_frame_size; + + /* Calculate High Water */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + tmp = IXGBE_DV_X540(frame, frame); + break; + default: + tmp = IXGBE_DV(frame, frame); + break; + } + size = IXGBE_BT2KB(tmp); + rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; + hw->fc.high_water[0] = rxpb - size; + + /* Now calculate Low Water */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + tmp = IXGBE_LOW_DV_X540(frame); + break; + default: + tmp = IXGBE_LOW_DV(frame); + break; + } + hw->fc.low_water[0] = IXGBE_BT2KB(tmp); + + hw->fc.requested_mode = adapter->fc; + hw->fc.pause_time = IXGBE_FC_PAUSE; + hw->fc.send_xon = TRUE; +} /* ** @@ -1602,12 +1622,13 @@ ixgbe_handle_que(void *context) IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); #ifndef IXGBE_LEGACY_TX - if (!drbr_empty(ifp, txr->br)) + if (pcq_peek(txr->txr_interq) != NULL) ixgbe_mq_start_locked(ifp, txr); -#else - if (!IFQ_IS_EMPTY(&ifp->if_snd)) - ixgbe_start_locked(txr, ifp); #endif + /* Only for queue 0 */ + if ((&adapter->queues[0] == que) + && (!IFQ_IS_EMPTY(&ifp->if_snd))) + ixgbe_start_locked(txr, ifp); IXGBE_TX_UNLOCK(txr); } @@ -1640,10 +1661,10 @@ ixgbe_legacy_irq(void *arg) reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR); - adapter->stats.legint.ev_count++; - ++que->irqs; + adapter->stats.pf.legint.ev_count++; + ++que->irqs.ev_count; if (reg_eicr == 0) { - adapter->stats.intzero.ev_count++; + adapter->stats.pf.intzero.ev_count++; if ((ifp->if_flags & IFF_UP) != 0) ixgbe_enable_intr(adapter); return 0; @@ -1659,41 +1680,34 @@ ixgbe_legacy_irq(void *arg) IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); -#ifdef IXGBE_LEGACY_TX - if (!IFQ_IS_EMPTY(&ifp->if_snd)) - ixgbe_start_locked(txr, ifp); -#else - if (!drbr_empty(ifp, txr->br)) - ixgbe_mq_start_locked(ifp, txr); -#endif IXGBE_TX_UNLOCK(txr); } /* Check for fan failure */ - if ((hw->phy.media_type == ixgbe_media_type_copper) && + if ((hw->device_id == IXGBE_DEV_ID_82598AT) && (reg_eicr & IXGBE_EICR_GPI_SDP1)) { device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " "REPLACE IMMEDIATELY!!\n"); - IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); } /* Link status change */ if (reg_eicr & IXGBE_EICR_LSC) softint_schedule(adapter->link_si); + /* External PHY interrupt */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && + (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) + softint_schedule(adapter->phy_si); + if (more) -#ifndef IXGBE_LEGACY_TX - softint_schedule(txr->txq_si); -#else softint_schedule(que->que_si); -#endif else ixgbe_enable_intr(adapter); return 1; } -#if defined(NETBSD_MSI_OR_MSIX) /********************************************************************* * * MSIX Queue Interrupt Service routine @@ -1715,7 +1729,7 @@ ixgbe_msix_que(void *arg) return 0; ixgbe_disable_queue(adapter, que->msix); - ++que->irqs; + ++que->irqs.ev_count; #ifdef __NetBSD__ /* Don't run ixgbe_rxeof in interrupt context */ @@ -1726,18 +1740,11 @@ ixgbe_msix_que(void *arg) IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); -#ifdef IXGBE_LEGACY_TX - if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd)) - ixgbe_start_locked(txr, ifp); -#else - if (!drbr_empty(ifp, txr->br)) - ixgbe_mq_start_locked(ifp, txr); -#endif IXGBE_TX_UNLOCK(txr); /* Do AIM now? */ - if (ixgbe_enable_aim == FALSE) + if (adapter->enable_aim == false) goto no_calc; /* ** Do Adaptive Interrupt Moderation: @@ -1799,10 +1806,13 @@ ixgbe_msix_link(void *arg) { struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; - u32 reg_eicr; + u32 reg_eicr, mod_mask; ++adapter->link_irq.ev_count; + /* Pause other interrupts */ + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_OTHER); + /* First get the cause */ reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS); /* Be sure the queue bits are not cleared */ @@ -1811,8 +1821,10 @@ ixgbe_msix_link(void *arg) IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr); /* Link status change */ - if (reg_eicr & IXGBE_EICR_LSC) + if (reg_eicr & IXGBE_EICR_LSC) { + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); softint_schedule(adapter->link_si); + } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { #ifdef IXGBE_FDIR @@ -1826,43 +1838,59 @@ ixgbe_msix_link(void *arg) } else #endif if (reg_eicr & IXGBE_EICR_ECC) { - device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! " + device_printf(adapter->dev, "CRITICAL: ECC ERROR!! " "Please Reboot!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); - } else + } - if (reg_eicr & IXGBE_EICR_GPI_SDP1) { - /* Clear the interrupt */ - IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1); - softint_schedule(adapter->msf_si); - } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) { - /* Clear the interrupt */ - IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2); + /* Check for over temp condition */ + if (reg_eicr & IXGBE_EICR_TS) { + device_printf(adapter->dev, "CRITICAL: OVER TEMP!! " + "PHY IS SHUT DOWN!!\n"); + device_printf(adapter->dev, "System shutdown required!\n"); + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); + } +#ifdef PCI_IOV + if (reg_eicr & IXGBE_EICR_MAILBOX) + taskqueue_enqueue(adapter->tq, &adapter->mbx_task); +#endif + } + + /* Pluggable optics-related interrupt */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP) + mod_mask = IXGBE_EICR_GPI_SDP0_X540; + else + mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw); + + if (ixgbe_is_sfp(hw)) { + if (reg_eicr & mod_mask) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, mod_mask); softint_schedule(adapter->mod_si); + } else if (reg_eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw)) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); + softint_schedule(adapter->msf_si); } - } + } /* Check for fan failure */ if ((hw->device_id == IXGBE_DEV_ID_82598AT) && (reg_eicr & IXGBE_EICR_GPI_SDP1)) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1); device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " "REPLACE IMMEDIATELY!!\n"); - IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1); } - /* Check for over temp condition */ - if ((hw->mac.type == ixgbe_mac_X540) && - (reg_eicr & IXGBE_EICR_TS)) { - device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! " - "PHY IS SHUT DOWN!!\n"); - device_printf(adapter->dev, "System shutdown required\n"); - IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); - } + /* External PHY interrupt */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && + (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540); + softint_schedule(adapter->phy_si); + } + /* Re-enable other interrupts */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER); return 1; } -#endif /********************************************************************* * @@ -1877,6 +1905,7 @@ ixgbe_media_status(struct ifnet * ifp, s { struct adapter *adapter = ifp->if_softc; struct ixgbe_hw *hw = &adapter->hw; + int layer; INIT_DEBUGOUT("ixgbe_media_status: begin"); IXGBE_CORE_LOCK(adapter); @@ -1886,34 +1915,140 @@ ixgbe_media_status(struct ifnet * ifp, s ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { + ifmr->ifm_active |= IFM_NONE; IXGBE_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; + layer = adapter->phy_layer; - /* - * Not all NIC are 1000baseSX as an example X540T. - * We must set properly the media based on NIC model. - */ - switch (hw->device_id) { - case IXGBE_DEV_ID_X540T: - if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL) - ifmr->ifm_active |= IFM_100_TX | IFM_FDX; - else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL) + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T || + layer & IXGBE_PHYSICAL_LAYER_1000BASE_T || + layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_T | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_T | IFM_FDX; - else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL) - ifmr->ifm_active |= adapter->optics | IFM_FDX; - break; - default: - if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL) + break; + case IXGBE_LINK_SPEED_100_FULL: ifmr->ifm_active |= IFM_100_TX | IFM_FDX; - else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL) + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || + layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_LR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR || + layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; - else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL) - ifmr->ifm_active |= adapter->optics | IFM_FDX; - break; - } + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; + break; + } + /* + ** XXX: These need to use the proper media types once + ** they're added. + */ +#ifndef IFM_ETH_XTYPE + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; + break; + } + else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 + || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; + break; + } +#else + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_KR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; + break; + } + else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 + || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_KX4 | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; + break; + } +#endif + + /* If nothing is recognized... */ +#if 0 + if (IFM_SUBTYPE(ifmr->ifm_active) == 0) + ifmr->ifm_active |= IFM_UNKNOWN; +#endif + + /* Display current flow control setting used on link */ + if (hw->fc.current_mode == ixgbe_fc_rx_pause || + hw->fc.current_mode == ixgbe_fc_full) + ifmr->ifm_active |= IFM_ETH_RXPAUSE; + if (hw->fc.current_mode == ixgbe_fc_tx_pause || + hw->fc.current_mode == ixgbe_fc_full) + ifmr->ifm_active |= IFM_ETH_TXPAUSE; IXGBE_CORE_UNLOCK(adapter); @@ -1933,173 +2068,94 @@ ixgbe_media_change(struct ifnet * ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; + struct ixgbe_hw *hw = &adapter->hw; + ixgbe_link_speed speed = 0; INIT_DEBUGOUT("ixgbe_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); - switch (IFM_SUBTYPE(ifm->ifm_media)) { - case IFM_10G_T: - case IFM_AUTO: - adapter->hw.phy.autoneg_advertised = - IXGBE_LINK_SPEED_100_FULL | - IXGBE_LINK_SPEED_1GB_FULL | - IXGBE_LINK_SPEED_10GB_FULL; - break; - default: - device_printf(adapter->dev, "Only auto media type\n"); - return (EINVAL); - } - - return (0); -} - -/********************************************************************* - * - * This routine maps the mbufs to tx descriptors, allowing the - * TX engine to transmit the packets. - * - return 0 on success, positive on failure - * - **********************************************************************/ - -static int -ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head) -{ - struct m_tag *mtag; - struct adapter *adapter = txr->adapter; - struct ethercom *ec = &adapter->osdep.ec; - u32 olinfo_status = 0, cmd_type_len; - int i, j, error; - int first; - bus_dmamap_t map; - struct ixgbe_tx_buf *txbuf; - union ixgbe_adv_tx_desc *txd = NULL; - - /* Basic descriptor defines */ - cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); - - if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL) - cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; - - /* - * Important to capture the first descriptor - * used because it will contain the index of - * the one we tell the hardware to report back - */ - first = txr->next_avail_desc; - txbuf = &txr->tx_buffers[first]; - map = txbuf->map; - - /* - * Map the packet for DMA. - */ - error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, - m_head, BUS_DMA_NOWAIT); - - if (__predict_false(error)) { - - switch (error) { - case EAGAIN: - adapter->eagain_tx_dma_setup.ev_count++; - return EAGAIN; - case ENOMEM: - adapter->enomem_tx_dma_setup.ev_count++; - return EAGAIN; - case EFBIG: - /* - * XXX Try it again? - * do m_defrag() and retry bus_dmamap_load_mbuf(). - */ - adapter->efbig_tx_dma_setup.ev_count++; - return error; - case EINVAL: - adapter->einval_tx_dma_setup.ev_count++; - return error; - default: - adapter->other_tx_dma_setup.ev_count++; - return error; - } - } - - /* Make certain there are enough descriptors */ - if (map->dm_nsegs > txr->tx_avail - 2) { - txr->no_desc_avail.ev_count++; - ixgbe_dmamap_unload(txr->txtag, txbuf->map); - return EAGAIN; - } + if (hw->phy.media_type == ixgbe_media_type_backplane) + return (ENODEV); /* - ** Set up the appropriate offload context - ** this will consume the first descriptor + ** We don't actually need to check against the supported + ** media types of the adapter; ifmedia will take care of + ** that for us. */ - error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); - if (__predict_false(error)) { - return (error); +#ifndef IFM_ETH_XTYPE + switch (IFM_SUBTYPE(ifm->ifm_media)) { + case IFM_AUTO: + case IFM_10G_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_10G_LRM: + case IFM_10G_SR: /* KR, too */ + case IFM_10G_LR: + case IFM_10G_CX4: /* KX4 */ + speed |= IXGBE_LINK_SPEED_1GB_FULL; + case IFM_10G_TWINAX: + speed |= IXGBE_LINK_SPEED_10GB_FULL; + break; + case IFM_1000_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_1000_LX: + case IFM_1000_SX: + case IFM_1000_CX: /* KX */ + speed |= IXGBE_LINK_SPEED_1GB_FULL; + break; + case IFM_100_TX: + speed |= IXGBE_LINK_SPEED_100_FULL; + break; + default: + goto invalid; } - -#ifdef IXGBE_FDIR - /* Do the flow director magic */ - if ((txr->atr_sample) && (!adapter->fdir_reinit)) { - ++txr->atr_count; - if (txr->atr_count >= atr_sample_rate) { - ixgbe_atr(txr, m_head); - txr->atr_count = 0; - } +#else + switch (IFM_SUBTYPE(ifm->ifm_media)) { + case IFM_AUTO: + case IFM_10G_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_10G_LRM: + case IFM_10G_KR: + case IFM_10G_LR: + case IFM_10G_KX4: + speed |= IXGBE_LINK_SPEED_1GB_FULL; + case IFM_10G_TWINAX: + speed |= IXGBE_LINK_SPEED_10GB_FULL; + break; + case IFM_1000_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_1000_LX: + case IFM_1000_SX: + case IFM_1000_KX: + speed |= IXGBE_LINK_SPEED_1GB_FULL; + break; + case IFM_100_TX: + speed |= IXGBE_LINK_SPEED_100_FULL; + break; + default: + goto invalid; } #endif - i = txr->next_avail_desc; - for (j = 0; j < map->dm_nsegs; j++) { - bus_size_t seglen; - bus_addr_t segaddr; - - txbuf = &txr->tx_buffers[i]; - txd = &txr->tx_base[i]; - seglen = map->dm_segs[j].ds_len; - segaddr = htole64(map->dm_segs[j].ds_addr); - - txd->read.buffer_addr = segaddr; - txd->read.cmd_type_len = htole32(txr->txd_cmd | - cmd_type_len |seglen); - txd->read.olinfo_status = htole32(olinfo_status); - - if (++i == txr->num_desc) - i = 0; + hw->mac.autotry_restart = TRUE; + hw->mac.ops.setup_link(hw, speed, TRUE); + if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) { + adapter->advertise = 0; + } else { + if ((speed & IXGBE_LINK_SPEED_10GB_FULL) != 0) + adapter->advertise |= 1 << 2; + if ((speed & IXGBE_LINK_SPEED_1GB_FULL) != 0) + adapter->advertise |= 1 << 1; + if ((speed & IXGBE_LINK_SPEED_100_FULL) != 0) + adapter->advertise |= 1 << 0; } - txd->read.cmd_type_len |= - htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); - txr->tx_avail -= map->dm_nsegs; - txr->next_avail_desc = i; - - txbuf->m_head = m_head; - /* - ** Here we swap the map so the last descriptor, - ** which gets the completion interrupt has the - ** real map, and the first descriptor gets the - ** unused map from this descriptor. - */ - txr->tx_buffers[first].map = txbuf->map; - txbuf->map = map; - bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len, - BUS_DMASYNC_PREWRITE); - - /* Set the EOP descriptor that will be marked done */ - txbuf = &txr->tx_buffers[first]; - txbuf->eop = txd; - - ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * Advance the Transmit Descriptor Tail (Tdt), this tells the - * hardware that this frame is available to transmit. - */ - ++txr->total_packets.ev_count; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i); + return (0); - return 0; +invalid: + device_printf(adapter->dev, "Invalid media type!\n"); + return (EINVAL); } static void @@ -2152,20 +2208,19 @@ ixgbe_set_promisc(struct adapter *adapte static void ixgbe_set_multi(struct adapter *adapter) { - struct ether_multi *enm; - struct ether_multistep step; - u32 fctrl; - u8 *mta; - u8 *update_ptr; - int mcnt = 0; - struct ethercom *ec = &adapter->osdep.ec; - struct ifnet *ifp = adapter->ifp; + u32 fctrl; + u8 *update_ptr; + struct ixgbe_mc_addr *mta; + int mcnt = 0; + struct ifnet *ifp = adapter->ifp; + struct ethercom *ec = &adapter->osdep.ec; + struct ether_multi *enm; + struct ether_multistep step; IOCTL_DEBUGOUT("ixgbe_set_multi: begin"); mta = adapter->mta; - bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS * - MAX_NUM_MULTICAST_ADDRESSES); + bzero(mta, sizeof(*mta) * MAX_NUM_MULTICAST_ADDRESSES); ifp->if_flags &= ~IFF_ALLMULTI; ETHER_FIRST_MULTI(step, ec, enm); @@ -2177,8 +2232,8 @@ ixgbe_set_multi(struct adapter *adapter) break; } bcopy(enm->enm_addrlo, - &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS], - IXGBE_ETH_LENGTH_OF_ADDRESS); + mta[mcnt].addr, IXGBE_ETH_LENGTH_OF_ADDRESS); + mta[mcnt].vmdq = adapter->pool; mcnt++; ETHER_NEXT_MULTI(step, enm); } @@ -2194,7 +2249,7 @@ ixgbe_set_multi(struct adapter *adapter) IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) { - update_ptr = mta; + update_ptr = (u8 *)mta; ixgbe_update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixgbe_mc_array_itr, TRUE); } @@ -2210,13 +2265,13 @@ ixgbe_set_multi(struct adapter *adapter) static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { - u8 *addr = *update_ptr; - u8 *newptr; - *vmdq = 0; - - newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; - *update_ptr = newptr; - return addr; + struct ixgbe_mc_addr *mta; + + mta = (struct ixgbe_mc_addr *)*update_ptr; + *vmdq = mta->vmdq; + + *update_ptr = (u8*)(mta + 1); + return (mta->addr); } @@ -2229,13 +2284,23 @@ ixgbe_mc_array_itr(struct ixgbe_hw *hw, **********************************************************************/ static void +ixgbe_local_timer(void *arg) +{ + struct adapter *adapter = arg; + + IXGBE_CORE_LOCK(adapter); + ixgbe_local_timer1(adapter); + IXGBE_CORE_UNLOCK(adapter); +} + +static void ixgbe_local_timer1(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; - struct tx_ring *txr = adapter->tx_rings; - int hung = 0, paused = 0; + u64 queues = 0; + int hung = 0; KASSERT(mutex_owned(&adapter->core_mtx)); @@ -2248,30 +2313,44 @@ ixgbe_local_timer1(void *arg) ixgbe_update_stats_counters(adapter); /* - * If the interface has been paused - * then don't do the watchdog check - */ - if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) - paused = 1; - - /* ** Check the TX queues status + ** - mark hung queues so we don't schedule on them ** - watchdog only if all queues show hung */ - for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { - if ((txr->queue_status == IXGBE_QUEUE_HUNG) && - (paused == 0)) + for (int i = 0; i < adapter->num_queues; i++, que++) { + /* Keep track of queues with work for soft irq */ + if (que->txr->busy) + queues |= ((u64)1 << que->me); + /* + ** Each time txeof runs without cleaning, but there + ** are uncleaned descriptors it increments busy. If + ** we get to the MAX we declare it hung. + */ + if (que->busy == IXGBE_QUEUE_HUNG) { ++hung; - else if (txr->queue_status == IXGBE_QUEUE_WORKING) -#ifndef IXGBE_LEGACY_TX - softint_schedule(txr->txq_si); -#else - softint_schedule(que->que_si); -#endif + /* Mark the queue as inactive */ + adapter->active_queues &= ~((u64)1 << que->me); + continue; + } else { + /* Check if we've come back from hung */ + if ((adapter->active_queues & ((u64)1 << que->me)) == 0) + adapter->active_queues |= ((u64)1 << que->me); + } + if (que->busy >= IXGBE_MAX_TX_BUSY) { + device_printf(dev,"Warning queue %d " + "appears to be hung!\n", i); + que->txr->busy = IXGBE_QUEUE_HUNG; + ++hung; + } + } + /* Only truely watchdog if all queues show hung */ if (hung == adapter->num_queues) goto watchdog; + else if (queues != 0) { /* Force an IRQ on queues with work */ + ixgbe_rearm_queues(adapter, queues); + } out: callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); @@ -2279,26 +2358,11 @@ out: watchdog: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, - IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)), - IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me))); - device_printf(dev,"TX(%d) desc avail = %d," - "Next TX to Clean = %d\n", - txr->me, txr->tx_avail, txr->next_to_clean); adapter->ifp->if_flags &= ~IFF_RUNNING; adapter->watchdog_events.ev_count++; ixgbe_init_locked(adapter); } -static void -ixgbe_local_timer(void *arg) -{ - struct adapter *adapter = arg; - - IXGBE_CORE_LOCK(adapter); - ixgbe_local_timer1(adapter); - IXGBE_CORE_UNLOCK(adapter); -} /* ** Note: this routine updates the OS on the link state @@ -2311,7 +2375,6 @@ ixgbe_update_link_status(struct adapter struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; - if (adapter->link_up){ if (adapter->link_active == FALSE) { if (bootverbose) @@ -2321,7 +2384,12 @@ ixgbe_update_link_status(struct adapter adapter->link_active = TRUE; /* Update any Flow Control changes */ ixgbe_fc_enable(&adapter->hw); + /* Update DMA coalescing config */ + ixgbe_config_dmac(adapter); if_link_state_change(ifp, LINK_STATE_UP); +#ifdef PCI_IOV + ixgbe_ping_all_vfs(adapter); +#endif } } else { /* Link down */ if (adapter->link_active == TRUE) { @@ -2329,6 +2397,9 @@ ixgbe_update_link_status(struct adapter device_printf(dev,"Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); adapter->link_active = FALSE; +#ifdef PCI_IOV + ixgbe_ping_all_vfs(adapter); +#endif } } @@ -2416,10 +2487,15 @@ ixgbe_identify_hardware(struct adapter * hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid); hw->subsystem_device_id = PCI_SUBSYS_ID(subid); + /* + ** Make sure BUSMASTER is set + */ + ixgbe_pci_enable_busmaster(pc, tag); + /* We need this here to set the num_segs below */ ixgbe_set_mac_type(hw); - /* Pick up the 82599 and VF settings */ + /* Pick up the 82599 settings */ if (hw->mac.type != ixgbe_mac_82598EB) { hw->phy.smart_speed = ixgbe_smart_speed; adapter->num_segs = IXGBE_82599_SCATTER; @@ -2440,7 +2516,7 @@ ixgbe_setup_optics(struct adapter *adapt struct ixgbe_hw *hw = &adapter->hw; int layer; - layer = ixgbe_get_supported_physical_layer(hw); + layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) { adapter->optics = IFM_10G_T; @@ -2498,27 +2574,11 @@ ixgbe_allocate_legacy(struct adapter *ad #ifndef IXGBE_LEGACY_TX struct tx_ring *txr = adapter->tx_rings; #endif -#ifndef NETBSD_MSI_OR_MSIX - pci_intr_handle_t ih; -#else int counts[PCI_INTR_TYPE_SIZE]; pci_intr_type_t intr_type, max_type; -#endif char intrbuf[PCI_INTRSTR_LEN]; const char *intrstr = NULL; -#ifndef NETBSD_MSI_OR_MSIX - /* We allocate a single interrupt resource */ - if (pci_intr_map(pa, &ih) != 0) { - aprint_error_dev(dev, "unable to map interrupt\n"); - return ENXIO; - } else { - intrstr = pci_intr_string(adapter->osdep.pc, ih, intrbuf, - sizeof(intrbuf)); - } - adapter->osdep.ihs[0] = pci_intr_establish(adapter->osdep.pc, ih, - IPL_NET, ixgbe_legacy_irq, que); -#else /* Allocation settings */ max_type = PCI_INTR_TYPE_MSI; counts[PCI_INTR_TYPE_MSIX] = 0; @@ -2533,8 +2593,9 @@ alloc_retry: adapter->osdep.nintrs = 1; intrstr = pci_intr_string(adapter->osdep.pc, adapter->osdep.intrs[0], intrbuf, sizeof(intrbuf)); - adapter->osdep.ihs[0] = pci_intr_establish(adapter->osdep.pc, - adapter->osdep.intrs[0], IPL_NET, ixgbe_legacy_irq, que); + adapter->osdep.ihs[0] = pci_intr_establish_xname(adapter->osdep.pc, + adapter->osdep.intrs[0], IPL_NET, ixgbe_legacy_irq, que, + device_xname(dev)); if (adapter->osdep.ihs[0] == NULL) { intr_type = pci_intr_type(adapter->osdep.pc, adapter->osdep.intrs[0]); @@ -2553,14 +2614,11 @@ alloc_retry: break; } } -#endif if (adapter->osdep.ihs[0] == NULL) { aprint_error_dev(dev, "couldn't establish interrupt%s%s\n", intrstr ? " at " : "", intrstr ? intrstr : ""); -#ifdef NETBSD_MSI_OR_MSIX pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs, 1); -#endif return ENXIO; } aprint_normal_dev(dev, "interrupting at %s\n", intrstr); @@ -2569,22 +2627,26 @@ alloc_retry: * processing contexts. */ #ifndef IXGBE_LEGACY_TX - txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start, - txr); + txr->txr_si = softint_establish(SOFTINT_NET | IXGBE_SOFTINFT_FLAGS, + ixgbe_deferred_mq_start, txr); #endif - que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que); + que->que_si = softint_establish(SOFTINT_NET | IXGBE_SOFTINFT_FLAGS, + ixgbe_handle_que, que); /* Tasklets for Link, SFP and Multispeed Fiber */ - adapter->link_si = - softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter); - adapter->mod_si = - softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter); - adapter->msf_si = - softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter); + adapter->link_si = softint_establish(SOFTINT_NET |IXGBE_SOFTINFT_FLAGS, + ixgbe_handle_link, adapter); + adapter->mod_si = softint_establish(SOFTINT_NET | IXGBE_SOFTINFT_FLAGS, + ixgbe_handle_mod, adapter); + adapter->msf_si = softint_establish(SOFTINT_NET | IXGBE_SOFTINFT_FLAGS, + ixgbe_handle_msf, adapter); + adapter->phy_si = softint_establish(SOFTINT_NET | IXGBE_SOFTINFT_FLAGS, + ixgbe_handle_phy, adapter); #ifdef IXGBE_FDIR adapter->fdir_si = - softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter); + softint_establish(SOFTINT_NET | IXGBE_SOFTINFT_FLAGS, + ixgbe_reinit_fdir, adapter); #endif if (que->que_si == NULL || adapter->link_si == NULL || @@ -2599,7 +2661,7 @@ alloc_retry: } /* For simplicity in the handlers */ - adapter->que_mask = IXGBE_EIMS_ENABLE_MASK; + adapter->active_queues = IXGBE_EIMS_ENABLE_MASK; return (0); } @@ -2613,22 +2675,22 @@ alloc_retry: static int ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa) { -#if !defined(NETBSD_MSI_OR_MSIX) - return 0; -#else device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; pci_chipset_tag_t pc; char intrbuf[PCI_INTRSTR_LEN]; + char intr_xname[32]; const char *intrstr = NULL; int error, vector = 0; int cpu_id = 0; kcpuset_t *affinity; +#ifdef RSS + cpuset_t cpu_mask; +#endif pc = adapter->osdep.pc; #ifdef RSS - cpuset_t cpu_mask; /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. @@ -2660,15 +2722,18 @@ ixgbe_allocate_msix(struct adapter *adap kcpuset_create(&affinity, false); for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { + snprintf(intr_xname, sizeof(intr_xname), "%s TXRX%d", + device_xname(dev), i); intrstr = pci_intr_string(pc, adapter->osdep.intrs[i], intrbuf, sizeof(intrbuf)); -#ifdef IXG_MPSAFE - pci_intr_setattr(pc, adapter->osdep.intrs[i], PCI_INTR_MPSAFE, +#ifdef IXGBE_MPSAFE + pci_intr_setattr(pc, &adapter->osdep.intrs[i], PCI_INTR_MPSAFE, true); #endif /* Set the handler function */ - que->res = adapter->osdep.ihs[i] = pci_intr_establish(pc, - adapter->osdep.intrs[i], IPL_NET, ixgbe_msix_que, que); + que->res = adapter->osdep.ihs[i] = pci_intr_establish_xname(pc, + adapter->osdep.intrs[i], IPL_NET, ixgbe_msix_que, que, + intr_xname); if (que->res == NULL) { pci_intr_release(pc, adapter->osdep.intrs, adapter->osdep.nintrs); @@ -2678,7 +2743,7 @@ ixgbe_allocate_msix(struct adapter *adap return ENXIO; } que->msix = vector; - adapter->que_mask |= (u64)(1 << que->msix); + adapter->active_queues |= (u64)(1 << que->msix); #ifdef RSS /* * The queue ID is used as the RSS layer bucket ID. @@ -2705,22 +2770,27 @@ ixgbe_allocate_msix(struct adapter *adap aprint_normal_dev(dev, "for TX/RX, interrupting at %s", intrstr); if (error == 0) { +#if 1 /* def IXGBE_DEBUG */ #ifdef RSS - aprintf_normal(", bound RSS bucket %d to CPU %d\n", - i, cpu_id); + aprintf_normal( + ", bound RSS bucket %d to CPU %d", + i, cpu_id % ncpu); #else - aprint_normal(", bound queue %d to cpu %d\n", - i, cpu_id); + aprint_normal( + ", bound queue %d to cpu %d", + i, cpu_id % ncpu); #endif - } else - aprint_normal("\n"); - +#endif /* IXGBE_DEBUG */ + } + aprint_normal("\n"); #ifndef IXGBE_LEGACY_TX - txr->txq_si = softint_establish(SOFTINT_NET, + txr->txr_si + = softint_establish(SOFTINT_NET | IXGBE_SOFTINFT_FLAGS, ixgbe_deferred_mq_start, txr); #endif - que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, - que); + que->que_si + = softint_establish(SOFTINT_NET | IXGBE_SOFTINFT_FLAGS, + ixgbe_handle_que, que); if (que->que_si == NULL) { aprint_error_dev(dev, "could not establish software interrupt\n"); @@ -2729,15 +2799,17 @@ ixgbe_allocate_msix(struct adapter *adap /* and Link */ cpu_id++; + snprintf(intr_xname, sizeof(intr_xname), "%s link", device_xname(dev)); intrstr = pci_intr_string(pc, adapter->osdep.intrs[vector], intrbuf, sizeof(intrbuf)); -#ifdef IXG_MPSAFE +#ifdef IXGBE_MPSAFE pci_intr_setattr(pc, &adapter->osdep.intrs[vector], PCI_INTR_MPSAFE, true); #endif /* Set the link handler function */ - adapter->osdep.ihs[vector] = pci_intr_establish(pc, - adapter->osdep.intrs[vector], IPL_NET, ixgbe_msix_link, adapter); + adapter->osdep.ihs[vector] = pci_intr_establish_xname(pc, + adapter->osdep.intrs[vector], IPL_NET, ixgbe_msix_link, adapter, + intr_xname); if (adapter->osdep.ihs[vector] == NULL) { adapter->res = NULL; aprint_error_dev(dev, "Failed to register LINK handler\n"); @@ -2752,26 +2824,30 @@ ixgbe_allocate_msix(struct adapter *adap aprint_normal_dev(dev, "for link, interrupting at %s", intrstr); if (error == 0) - aprint_normal(", affinity to cpu %d\n", cpu_id); + aprint_normal(", affinity to cpu %d\n", cpu_id % ncpu); else aprint_normal("\n"); - adapter->linkvec = vector; + adapter->vector = vector; /* Tasklets for Link, SFP and Multispeed Fiber */ - adapter->link_si = - softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter); - adapter->mod_si = - softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter); - adapter->msf_si = - softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter); + adapter->link_si = softint_establish(SOFTINT_NET |IXGBE_SOFTINFT_FLAGS, + ixgbe_handle_link, adapter); + adapter->mod_si = softint_establish(SOFTINT_NET | IXGBE_SOFTINFT_FLAGS, + ixgbe_handle_mod, adapter); + adapter->msf_si = softint_establish(SOFTINT_NET | IXGBE_SOFTINFT_FLAGS, + ixgbe_handle_msf, adapter); +#ifdef PCI_IOV + TASK_INIT(&adapter->mbx_task, 0, ixgbe_handle_mbx, adapter); +#endif + adapter->phy_si = softint_establish(SOFTINT_NET | IXGBE_SOFTINFT_FLAGS, + ixgbe_handle_phy, adapter); #ifdef IXGBE_FDIR - adapter->fdir_si = - softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter); + adapter->fdir_si = softint_establish(SOFTINT_NET | XGBE_SOFTINFT_FLAGS, + ixgbe_reinit_fdir, adapter); #endif kcpuset_destroy(affinity); return (0); -#endif } /* @@ -2780,9 +2856,6 @@ ixgbe_allocate_msix(struct adapter *adap static int ixgbe_setup_msix(struct adapter *adapter) { -#if !defined(NETBSD_MSI_OR_MSIX) - return 0; -#else device_t dev = adapter->dev; int want, queues, msgs; @@ -2792,17 +2865,14 @@ ixgbe_setup_msix(struct adapter *adapter /* First try MSI/X */ msgs = pci_msix_count(adapter->osdep.pc, adapter->osdep.tag); - if (msgs < IXG_MSIX_NINTR) + msgs = MIN(msgs, IXG_MAX_NINTR); + if (msgs < 2) goto msi; adapter->msix_mem = (void *)1; /* XXX */ /* Figure out a reasonable auto config value */ - queues = (ncpu > (msgs-1)) ? (msgs-1) : ncpu; - - /* Override based on tuneable */ - if (ixgbe_num_queues != 0) - queues = ixgbe_num_queues; + queues = (ncpu > (msgs - 1)) ? (msgs - 1) : ncpu; #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ @@ -2810,6 +2880,12 @@ ixgbe_setup_msix(struct adapter *adapter queues = rss_getnumbuckets(); #endif + if (ixgbe_num_queues != 0) + queues = ixgbe_num_queues; + /* Set max queues to 8 when autoconfiguring */ + else if ((ixgbe_num_queues == 0) && (queues > 8)) + queues = 8; + /* reflect correct sysctl value */ ixgbe_num_queues = queues; @@ -2840,9 +2916,8 @@ msi: msgs = pci_msi_count(adapter->osdep.pc, adapter->osdep.tag); adapter->msix_mem = NULL; /* XXX */ msgs = 1; - aprint_normal_dev(dev,"Using an MSI interrupt\n"); + aprint_normal_dev(dev, "Using an MSI interrupt\n"); return (msgs); -#endif } @@ -2880,13 +2955,13 @@ map_err: aprint_error_dev(dev, "unexpected type on BAR0\n"); return ENXIO; } + adapter->hw.back = adapter; - /* Legacy defaults */ + /* Default to 1 queue if MSI-X setup fails */ adapter->num_queues = 1; - adapter->hw.back = &adapter->osdep; /* - ** Now setup MSI or MSI/X, should + ** Now setup MSI or MSI-X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ @@ -2897,12 +2972,9 @@ map_err: static void ixgbe_free_pci_resources(struct adapter * adapter) { -#if defined(NETBSD_MSI_OR_MSIX) struct ix_queue *que = adapter->queues; -#endif int rid; -#if defined(NETBSD_MSI_OR_MSIX) /* ** Release all msix queue resources: */ @@ -2911,11 +2983,11 @@ ixgbe_free_pci_resources(struct adapter pci_intr_disestablish(adapter->osdep.pc, adapter->osdep.ihs[i]); } -#endif + /* Clean the Legacy or Link interrupt last */ - if (adapter->linkvec) /* we are doing MSIX */ - rid = adapter->linkvec; + if (adapter->vector) /* we are doing MSIX */ + rid = adapter->vector; else rid = 0; @@ -2925,10 +2997,8 @@ ixgbe_free_pci_resources(struct adapter adapter->osdep.ihs[rid] = NULL; } -#if defined(NETBSD_MSI_OR_MSIX) pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs, adapter->osdep.nintrs); -#endif if (adapter->osdep.mem_size != 0) { bus_space_unmap(adapter->osdep.mem_bus_space_tag, @@ -2948,7 +3018,6 @@ static int ixgbe_setup_interface(device_t dev, struct adapter *adapter) { struct ethercom *ec = &adapter->osdep.ec; - struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp; INIT_DEBUGOUT("ixgbe_setup_interface: begin"); @@ -2960,21 +3029,30 @@ ixgbe_setup_interface(device_t dev, stru ifp->if_stop = ixgbe_ifstop; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; +#ifdef IXGBE_MPSAFE + ifp->if_extflags = IFEF_START_MPSAFE; +#endif ifp->if_ioctl = ixgbe_ioctl; +#if __FreeBSD_version >= 1100045 + /* TSO parameters */ + ifp->if_hw_tsomax = 65518; + ifp->if_hw_tsomaxsegcount = IXGBE_82599_SCATTER; + ifp->if_hw_tsomaxsegsize = 2048; +#endif #ifndef IXGBE_LEGACY_TX ifp->if_transmit = ixgbe_mq_start; - ifp->if_qflush = ixgbe_qflush; -#else +#endif ifp->if_start = ixgbe_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2); -#if 0 - ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2; -#endif IFQ_SET_READY(&ifp->if_snd); -#endif if_initialize(ifp); + adapter->ipq = if_percpuq_create(&adapter->osdep.ec.ec_if); ether_ifattach(ifp, adapter->hw.mac.addr); + /* + * We use per TX queue softint, so if_deferred_start_init() isn't + * used. + */ if_register(ifp); ether_set_ifflags_cb(ec, ixgbe_ifflags_cb); @@ -2986,14 +3064,20 @@ ixgbe_setup_interface(device_t dev, stru */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); - ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6; + /* Set capability flags */ + ifp->if_capabilities |= IFCAP_RXCSUM + | IFCAP_TXCSUM + | IFCAP_TSOv4 + | IFCAP_TSOv6 + | IFCAP_LRO; ifp->if_capenable = 0; - ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM; - ec->ec_capabilities |= ETHERCAP_JUMBO_MTU; - ifp->if_capabilities |= IFCAP_LRO; ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING - | ETHERCAP_VLAN_MTU; + | ETHERCAP_VLAN_HWCSUM + | ETHERCAP_JUMBO_MTU + | ETHERCAP_VLAN_MTU; + + /* Enable the above capabilities by default */ ec->ec_capenable = ec->ec_capabilities; /* @@ -3011,22 +3095,121 @@ ixgbe_setup_interface(device_t dev, stru * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change, - ixgbe_media_status); - ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL); - ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics); - if (hw->device_id == IXGBE_DEV_ID_82598AT) { - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T, 0, NULL); - } - ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); + ixgbe_media_status); + + adapter->phy_layer = ixgbe_get_supported_physical_layer(&adapter->hw); + ixgbe_add_media_types(adapter); + + /* Set autoselect media by default */ ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } static void +ixgbe_add_media_types(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = adapter->dev; + int layer; + + layer = adapter->phy_layer; + +#define ADD(mm, dd) \ + ifmedia_add(&adapter->media, IFM_ETHER | (mm), (dd), NULL); + + /* Media types with matching NetBSD media defines */ + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) { + ADD(IFM_10G_T, 0); + ADD(IFM_10G_T | IFM_FDX, 0); + } + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) { + ADD(IFM_1000_T, 0); + ADD(IFM_1000_T | IFM_FDX, 0); + } + if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) { + ADD(IFM_100_TX, 0); + ADD(IFM_100_TX | IFM_FDX, 0); + } + + if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || + layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) { + ADD(IFM_10G_TWINAX, 0); + ADD(IFM_10G_TWINAX | IFM_FDX, 0); + } + + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) { + ADD(IFM_10G_LR, 0); + ADD(IFM_10G_LR | IFM_FDX, 0); + if (hw->phy.multispeed_fiber) { + ADD(IFM_1000_LX, 0); + ADD(IFM_1000_LX | IFM_FDX, 0); + } + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { + ADD(IFM_10G_SR, 0); + ADD(IFM_10G_SR | IFM_FDX, 0); + if (hw->phy.multispeed_fiber) { + ADD(IFM_1000_SX, 0); + ADD(IFM_1000_SX | IFM_FDX, 0); + } + } else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) { + ADD(IFM_1000_SX, 0); + ADD(IFM_1000_SX | IFM_FDX, 0); + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) { + ADD(IFM_10G_CX4, 0); + ADD(IFM_10G_CX4 | IFM_FDX, 0); + } + +#ifdef IFM_ETH_XTYPE + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) { + ADD(IFM_10G_KR, 0); + ADD(IFM_10G_KR | IFM_FDX, 0); + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) { + ADD(AIFM_10G_KX4, 0); + ADD(AIFM_10G_KX4 | IFM_FDX, 0); + } + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) { + ADD(IFM_1000_KX, 0); + ADD(IFM_1000_KX | IFM_FDX, 0); + } +#else + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) { + device_printf(dev, "Media supported: 10GbaseKR\n"); + device_printf(dev, "10GbaseKR mapped to 10GbaseSR\n"); + ADD(IFM_10G_SR, 0); + ADD(IFM_10G_SR | IFM_FDX, 0); + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) { + device_printf(dev, "Media supported: 10GbaseKX4\n"); + device_printf(dev, "10GbaseKX4 mapped to 10GbaseCX4\n"); + ADD(IFM_10G_CX4, 0); + ADD(IFM_10G_CX4 | IFM_FDX, 0); + } + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) { + device_printf(dev, "Media supported: 1000baseKX\n"); + device_printf(dev, "1000baseKX mapped to 1000baseCX\n"); + ADD(IFM_1000_CX, 0); + ADD(IFM_1000_CX | IFM_FDX, 0); + } +#endif + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) + device_printf(dev, "Media supported: 1000baseBX\n"); + /* XXX no ifmedia_set? */ + + if (hw->device_id == IXGBE_DEV_ID_82598AT) { + ADD(IFM_1000_T | IFM_FDX, 0); + ADD(IFM_1000_T, 0); + } + + ADD(IFM_AUTO, 0); + +#undef ADD +} + +static void ixgbe_config_link(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -3036,18 +3219,8 @@ ixgbe_config_link(struct adapter *adapte sfp = ixgbe_is_sfp(hw); if (sfp) { - void *ip; - - if (hw->phy.multispeed_fiber) { - hw->mac.ops.setup_sfp(hw); - ixgbe_enable_tx_laser(hw); - ip = adapter->msf_si; - } else { - ip = adapter->mod_si; - } - kpreempt_disable(); - softint_schedule(ip); + softint_schedule(adapter->mod_si); kpreempt_enable(); } else { if (hw->mac.ops.check_link) @@ -3071,3413 +3244,3140 @@ out: return; } -/******************************************************************** - * Manage DMA'able memory. - *******************************************************************/ - -static int -ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size, - struct ixgbe_dma_alloc *dma, const int mapflags) -{ - device_t dev = adapter->dev; - int r, rsegs; - - r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */ - DBA_ALIGN, 0, /* alignment, bounds */ - size, /* maxsize */ - 1, /* nsegments */ - size, /* maxsegsize */ - BUS_DMA_ALLOCNOW, /* flags */ - &dma->dma_tag); - if (r != 0) { - aprint_error_dev(dev, - "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r); - goto fail_0; - } - - r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, - size, - dma->dma_tag->dt_alignment, - dma->dma_tag->dt_boundary, - &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT); - if (r != 0) { - aprint_error_dev(dev, - "%s: bus_dmamem_alloc failed; error %d\n", __func__, r); - goto fail_1; - } - - r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs, - size, &dma->dma_vaddr, BUS_DMA_NOWAIT); - if (r != 0) { - aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n", - __func__, r); - goto fail_2; - } - - r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map); - if (r != 0) { - aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n", - __func__, r); - goto fail_3; - } - - r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr, - size, - NULL, - mapflags | BUS_DMA_NOWAIT); - if (r != 0) { - aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n", - __func__, r); - goto fail_4; - } - dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr; - dma->dma_size = size; - return 0; -fail_4: - ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map); -fail_3: - bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size); -fail_2: - bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs); -fail_1: - ixgbe_dma_tag_destroy(dma->dma_tag); -fail_0: - return r; -} - -static void -ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) -{ - bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map); - bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1); - ixgbe_dma_tag_destroy(dma->dma_tag); -} - /********************************************************************* * - * Allocate memory for the transmit and receive rings, and then - * the descriptors associated with each, called only once at attach. + * Enable transmit units. * **********************************************************************/ -static int -ixgbe_allocate_queues(struct adapter *adapter) +static void +ixgbe_initialize_transmit_units(struct adapter *adapter) { - device_t dev = adapter->dev; - struct ix_queue *que; - struct tx_ring *txr; - struct rx_ring *rxr; - int rsize, tsize, error = IXGBE_SUCCESS; - int txconf = 0, rxconf = 0; - - /* First allocate the top level queue structs */ - if (!(adapter->queues = - (struct ix_queue *) malloc(sizeof(struct ix_queue) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - aprint_error_dev(dev, "Unable to allocate queue memory\n"); - error = ENOMEM; - goto fail; - } - - /* First allocate the TX ring struct memory */ - if (!(adapter->tx_rings = - (struct tx_ring *) malloc(sizeof(struct tx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - aprint_error_dev(dev, "Unable to allocate TX ring memory\n"); - error = ENOMEM; - goto tx_fail; - } - - /* Next allocate the RX */ - if (!(adapter->rx_rings = - (struct rx_ring *) malloc(sizeof(struct rx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - aprint_error_dev(dev, "Unable to allocate RX ring memory\n"); - error = ENOMEM; - goto rx_fail; - } - - /* For the ring itself */ - tsize = roundup2(adapter->num_tx_desc * - sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); + struct tx_ring *txr = adapter->tx_rings; + struct ixgbe_hw *hw = &adapter->hw; - /* - * Now set up the TX queues, txconf is needed to handle the - * possibility that things fail midcourse and we need to - * undo memory gracefully - */ - for (int i = 0; i < adapter->num_queues; i++, txconf++) { - /* Set up some basics */ - txr = &adapter->tx_rings[i]; - txr->adapter = adapter; - txr->me = i; - txr->num_desc = adapter->num_tx_desc; - - /* Initialize the TX side lock */ - snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", - device_xname(dev), txr->me); - mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET); + /* Setup the Base and Length of the Tx Descriptor Ring */ + for (int i = 0; i < adapter->num_queues; i++, txr++) { + u64 tdba = txr->txdma.dma_paddr; + u32 txctrl = 0; + int j = txr->me; - if (ixgbe_dma_malloc(adapter, tsize, - &txr->txdma, BUS_DMA_NOWAIT)) { - aprint_error_dev(dev, - "Unable to allocate TX Descriptor memory\n"); - error = ENOMEM; - goto err_tx_desc; - } - txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; - bzero((void *)txr->tx_base, tsize); + IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j), + (tdba & 0x00000000ffffffffULL)); + IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), + adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc)); - /* Now allocate transmit buffers for the ring */ - if (ixgbe_allocate_transmit_buffers(txr)) { - aprint_error_dev(dev, - "Critical Failure setting up transmit buffers\n"); - error = ENOMEM; - goto err_tx_desc; - } -#ifndef IXGBE_LEGACY_TX - /* Allocate a buf ring */ - txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, - M_WAITOK, &txr->tx_mtx); - if (txr->br == NULL) { - aprint_error_dev(dev, - "Critical Failure setting up buf ring\n"); - error = ENOMEM; - goto err_tx_desc; - } -#endif - } + /* Setup the HW Tx Head and Tail descriptor pointers */ + IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); + IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); - /* - * Next the RX queues... - */ - rsize = roundup2(adapter->num_rx_desc * - sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); - for (int i = 0; i < adapter->num_queues; i++, rxconf++) { - rxr = &adapter->rx_rings[i]; - /* Set up some basics */ - rxr->adapter = adapter; - rxr->me = i; - rxr->num_desc = adapter->num_rx_desc; - - /* Initialize the RX side lock */ - snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", - device_xname(dev), rxr->me); - mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET); + /* Cache the tail address */ + txr->tail = IXGBE_TDT(j); - if (ixgbe_dma_malloc(adapter, rsize, - &rxr->rxdma, BUS_DMA_NOWAIT)) { - aprint_error_dev(dev, - "Unable to allocate RxDescriptor memory\n"); - error = ENOMEM; - goto err_rx_desc; + /* Disable Head Writeback */ + /* + * Note: for X550 series devices, these registers are actually + * prefixed with TPH_ isntead of DCA_, but the addresses and + * fields remain the same. + */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j)); + break; + default: + txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(j)); + break; + } + txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl); + break; + default: + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(j), txctrl); + break; } - rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; - bzero((void *)rxr->rx_base, rsize); - /* Allocate receive buffers for the ring*/ - if (ixgbe_allocate_receive_buffers(rxr)) { - aprint_error_dev(dev, - "Critical Failure setting up receive buffers\n"); - error = ENOMEM; - goto err_rx_desc; - } } - /* - ** Finally set up the queue holding structs - */ - for (int i = 0; i < adapter->num_queues; i++) { - que = &adapter->queues[i]; - que->adapter = adapter; - que->txr = &adapter->tx_rings[i]; - que->rxr = &adapter->rx_rings[i]; + if (hw->mac.type != ixgbe_mac_82598EB) { + u32 dmatxctl, rttdcs; +#ifdef PCI_IOV + enum ixgbe_iov_mode mode = ixgbe_get_iov_mode(adapter); +#endif + dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); + dmatxctl |= IXGBE_DMATXCTL_TE; + IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); + /* Disable arbiter to set MTQC */ + rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); + rttdcs |= IXGBE_RTTDCS_ARBDIS; + IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); +#ifdef PCI_IOV + IXGBE_WRITE_REG(hw, IXGBE_MTQC, ixgbe_get_mtqc(mode)); +#else + IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB); +#endif + rttdcs &= ~IXGBE_RTTDCS_ARBDIS; + IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); } - return (0); - -err_rx_desc: - for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) - ixgbe_dma_free(adapter, &rxr->rxdma); -err_tx_desc: - for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) - ixgbe_dma_free(adapter, &txr->txdma); - free(adapter->rx_rings, M_DEVBUF); -rx_fail: - free(adapter->tx_rings, M_DEVBUF); -tx_fail: - free(adapter->queues, M_DEVBUF); -fail: - return (error); + return; } -/********************************************************************* - * - * Allocate memory for tx_buffer structures. The tx_buffer stores all - * the information needed to transmit a packet on the wire. This is - * called only once at attach, setup is done every reset. - * - **********************************************************************/ -static int -ixgbe_allocate_transmit_buffers(struct tx_ring *txr) +static void +ixgbe_initialize_rss_mapping(struct adapter *adapter) { - struct adapter *adapter = txr->adapter; - device_t dev = adapter->dev; - struct ixgbe_tx_buf *txbuf; - int error, i; - - /* - * Setup DMA descriptor areas. - */ - if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */ - 1, 0, /* alignment, bounds */ - IXGBE_TSO_SIZE, /* maxsize */ - adapter->num_segs, /* nsegments */ - PAGE_SIZE, /* maxsegsize */ - 0, /* flags */ - &txr->txtag))) { - aprint_error_dev(dev,"Unable to allocate TX DMA tag\n"); - goto fail; - } + struct ixgbe_hw *hw = &adapter->hw; + u32 reta = 0, mrqc, rss_key[10]; + int queue_id, table_size, index_mult; +#ifdef RSS + u32 rss_hash_config; +#endif +#ifdef PCI_IOV + enum ixgbe_iov_mode mode; +#endif - if (!(txr->tx_buffers = - (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * - adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { - aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n"); - error = ENOMEM; - goto fail; - } +#ifdef RSS + /* Fetch the configured RSS key */ + rss_getkey((uint8_t *) &rss_key); +#else + /* set up random bits */ + cprng_fast(&rss_key, sizeof(rss_key)); +#endif - /* Create the descriptor buffer dma maps */ - txbuf = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { - error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map); - if (error != 0) { - aprint_error_dev(dev, - "Unable to create TX DMA map (%d)\n", error); - goto fail; - } + /* Set multiplier for RETA setup and table size based on MAC */ + index_mult = 0x1; + table_size = 128; + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + index_mult = 0x11; + break; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + table_size = 512; + break; + default: + break; } - return 0; -fail: - /* We free all, it handles case where we are in the middle */ - ixgbe_free_transmit_structures(adapter); - return (error); -} - -/********************************************************************* - * - * Initialize a transmit ring. - * - **********************************************************************/ -static void -ixgbe_setup_transmit_ring(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct ixgbe_tx_buf *txbuf; - int i; -#ifdef DEV_NETMAP - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_slot *slot; -#endif /* DEV_NETMAP */ - - /* Clear the old ring contents */ - IXGBE_TX_LOCK(txr); -#ifdef DEV_NETMAP - /* - * (under lock): if in netmap mode, do some consistency - * checks and set slot to entry 0 of the netmap ring. - */ - slot = netmap_reset(na, NR_TX, txr->me, 0); -#endif /* DEV_NETMAP */ - bzero((void *)txr->tx_base, - (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); - /* Reset indices */ - txr->next_avail_desc = 0; - txr->next_to_clean = 0; - - /* Free any existing tx buffers. */ - txbuf = txr->tx_buffers; - for (i = 0; i < txr->num_desc; i++, txbuf++) { - if (txbuf->m_head != NULL) { - bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map, - 0, txbuf->m_head->m_pkthdr.len, - BUS_DMASYNC_POSTWRITE); - ixgbe_dmamap_unload(txr->txtag, txbuf->map); - m_freem(txbuf->m_head); - txbuf->m_head = NULL; - } -#ifdef DEV_NETMAP + /* Set up the redirection table */ + for (int i = 0, j = 0; i < table_size; i++, j++) { + if (j == adapter->num_queues) j = 0; +#ifdef RSS /* - * In netmap mode, set the map for the packet buffer. - * NOTE: Some drivers (not this one) also need to set - * the physical buffer address in the NIC ring. - * Slots in the netmap ring (indexed by "si") are - * kring->nkr_hwofs positions "ahead" wrt the - * corresponding slot in the NIC ring. In some drivers - * (not here) nkr_hwofs can be negative. Function - * netmap_idx_n2k() handles wraparounds properly. + * Fetch the RSS bucket id for the given indirection entry. + * Cap it at the number of configured buckets (which is + * num_queues.) */ - if (slot) { - int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); - netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); - } -#endif /* DEV_NETMAP */ - /* Clear the EOP descriptor pointer */ - txbuf->eop = NULL; - } - -#ifdef IXGBE_FDIR - /* Set the rate at which we sample packets */ - if (adapter->hw.mac.type != ixgbe_mac_82598EB) - txr->atr_sample = atr_sample_rate; + queue_id = rss_get_indirection_to_bucket(i); + queue_id = queue_id % adapter->num_queues; +#else + queue_id = (j * index_mult); #endif + /* + * The low 8 bits are for hash value (n+0); + * The next 8 bits are for hash value (n+1), etc. + */ + reta = reta >> 8; + reta = reta | ( ((uint32_t) queue_id) << 24); + if ((i & 3) == 3) { + if (i < 128) + IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); + else + IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32), reta); + reta = 0; + } + } - /* Set number of descriptors available */ - txr->tx_avail = adapter->num_tx_desc; + /* Now fill our hash function seeds */ + for (int i = 0; i < 10; i++) + IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]); - ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - IXGBE_TX_UNLOCK(txr); + /* Perform hash on these packet types */ +#ifdef RSS + mrqc = IXGBE_MRQC_RSSEN; + rss_hash_config = rss_gethashconfig(); + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4; + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP; + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6; + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP; + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX; + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP; + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX) + device_printf(adapter->dev, + "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, " + "but not supported\n", __func__); + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; +#else + /* + * Disable UDP - IP fragments aren't currently being handled + * and so we end up with a mix of 2-tuple and 4-tuple + * traffic. + */ + mrqc = IXGBE_MRQC_RSSEN + | IXGBE_MRQC_RSS_FIELD_IPV4 + | IXGBE_MRQC_RSS_FIELD_IPV4_TCP + | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP + | IXGBE_MRQC_RSS_FIELD_IPV6_EX + | IXGBE_MRQC_RSS_FIELD_IPV6 + | IXGBE_MRQC_RSS_FIELD_IPV6_TCP + ; +#endif /* RSS */ +#ifdef PCI_IOV + mode = ixgbe_get_iov_mode(adapter); + mrqc |= ixgbe_get_mrqc(mode); +#endif + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); } -/********************************************************************* - * - * Initialize all transmit rings. - * - **********************************************************************/ -static int -ixgbe_setup_transmit_structures(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - - for (int i = 0; i < adapter->num_queues; i++, txr++) - ixgbe_setup_transmit_ring(txr); - - return (0); -} /********************************************************************* * - * Enable transmit unit. + * Setup receive registers and features. * **********************************************************************/ +#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 + +#define BSIZEPKT_ROUNDUP ((1<tx_rings; + int i; + struct rx_ring *rxr = adapter->rx_rings; struct ixgbe_hw *hw = &adapter->hw; + struct ifnet *ifp = adapter->ifp; + u32 bufsz, fctrl, srrctl, rxcsum; + u32 hlreg; - /* Setup the Base and Length of the Tx Descriptor Ring */ + /* + * Make sure receives are disabled while + * setting up the descriptor ring + */ + ixgbe_disable_rx(hw); - for (int i = 0; i < adapter->num_queues; i++, txr++) { - u64 tdba = txr->txdma.dma_paddr; - u32 txctrl; + /* Enable broadcasts */ + fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + fctrl |= IXGBE_FCTRL_BAM; + if (adapter->hw.mac.type == ixgbe_mac_82598EB) { + fctrl |= IXGBE_FCTRL_DPF; + fctrl |= IXGBE_FCTRL_PMCF; + } + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); - IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), - (tdba & 0x00000000ffffffffULL)); - IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32)); - IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i), - adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc)); + /* Set for Jumbo Frames? */ + hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); + if (ifp->if_mtu > ETHERMTU) + hlreg |= IXGBE_HLREG0_JUMBOEN; + else + hlreg &= ~IXGBE_HLREG0_JUMBOEN; +#ifdef DEV_NETMAP + /* crcstrip is conditional in netmap (in RDRXCTL too ?) */ + if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) + hlreg &= ~IXGBE_HLREG0_RXCRCSTRP; + else + hlreg |= IXGBE_HLREG0_RXCRCSTRP; +#endif /* DEV_NETMAP */ + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); - /* Setup the HW Tx Head and Tail descriptor pointers */ - IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0); - IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0); + bufsz = (adapter->rx_mbuf_sz + + BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - /* Setup Transmit Descriptor Cmd Settings */ - txr->txd_cmd = IXGBE_TXD_CMD_IFCS; - txr->queue_status = IXGBE_QUEUE_IDLE; + for (i = 0; i < adapter->num_queues; i++, rxr++) { + u64 rdba = rxr->rxdma.dma_paddr; + int j = rxr->me; - /* Set the processing limit */ - txr->process_limit = ixgbe_tx_process_limit; + /* Setup the Base and Length of the Rx Descriptor Ring */ + IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j), + (rdba & 0x00000000ffffffffULL)); + IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j), + adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); - /* Disable Head Writeback */ - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - default: - txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i)); - break; - } - txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl); - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - default: - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl); - break; + /* Set up the SRRCTL register */ + srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j)); + srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; + srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; + srrctl |= bufsz; + srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; + + /* + * Set DROP_EN iff we have no flow control and >1 queue. + * Note that srrctl was cleared shortly before during reset, + * so we do not need to clear the bit, but do it just in case + * this code is moved elsewhere. + */ + if (adapter->num_queues > 1 && + adapter->hw.fc.requested_mode == ixgbe_fc_none) { + srrctl |= IXGBE_SRRCTL_DROP_EN; + } else { + srrctl &= ~IXGBE_SRRCTL_DROP_EN; } + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(j), srrctl); + + /* Setup the HW Rx Head and Tail Descriptor Pointers */ + IXGBE_WRITE_REG(hw, IXGBE_RDH(j), 0); + IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0); + + /* Set the driver rx tail address */ + rxr->tail = IXGBE_RDT(rxr->me); } - if (hw->mac.type != ixgbe_mac_82598EB) { - u32 dmatxctl, rttdcs; - dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); - dmatxctl |= IXGBE_DMATXCTL_TE; - IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); - /* Disable arbiter to set MTQC */ - rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); - rttdcs |= IXGBE_RTTDCS_ARBDIS; - IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); - IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB); - rttdcs &= ~IXGBE_RTTDCS_ARBDIS; - IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); + if (adapter->hw.mac.type != ixgbe_mac_82598EB) { + u32 psrtype = IXGBE_PSRTYPE_TCPHDR | + IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | + IXGBE_PSRTYPE_IPV6HDR; + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); } - return; -} + rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); -/********************************************************************* - * - * Free all transmit rings. - * - **********************************************************************/ -static void -ixgbe_free_transmit_structures(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; + ixgbe_initialize_rss_mapping(adapter); - for (int i = 0; i < adapter->num_queues; i++, txr++) { - ixgbe_free_transmit_buffers(txr); - ixgbe_dma_free(adapter, &txr->txdma); - IXGBE_TX_LOCK_DESTROY(txr); + if (adapter->num_queues > 1) { + /* RSS and RX IPP Checksum are mutually exclusive */ + rxcsum |= IXGBE_RXCSUM_PCSD; } - free(adapter->tx_rings, M_DEVBUF); -} -/********************************************************************* - * - * Free transmit ring related data structures. - * - **********************************************************************/ -static void -ixgbe_free_transmit_buffers(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct ixgbe_tx_buf *tx_buffer; - int i; + if (ifp->if_capenable & IFCAP_RXCSUM) + rxcsum |= IXGBE_RXCSUM_PCSD; - INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); + /* This is useful for calculating UDP/IP fragment checksums */ + if (!(rxcsum & IXGBE_RXCSUM_PCSD)) + rxcsum |= IXGBE_RXCSUM_IPPCSE; - if (txr->tx_buffers == NULL) - return; + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); - tx_buffer = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { - if (tx_buffer->m_head != NULL) { - bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map, - 0, tx_buffer->m_head->m_pkthdr.len, - BUS_DMASYNC_POSTWRITE); - ixgbe_dmamap_unload(txr->txtag, tx_buffer->map); - m_freem(tx_buffer->m_head); - tx_buffer->m_head = NULL; - if (tx_buffer->map != NULL) { - ixgbe_dmamap_destroy(txr->txtag, - tx_buffer->map); - tx_buffer->map = NULL; - } - } else if (tx_buffer->map != NULL) { - ixgbe_dmamap_unload(txr->txtag, tx_buffer->map); - ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map); - tx_buffer->map = NULL; - } - } -#ifndef IXGBE_LEGACY_TX - if (txr->br != NULL) - buf_ring_free(txr->br, M_DEVBUF); -#endif - if (txr->tx_buffers != NULL) { - free(txr->tx_buffers, M_DEVBUF); - txr->tx_buffers = NULL; - } - if (txr->txtag != NULL) { - ixgbe_dma_tag_destroy(txr->txtag); - txr->txtag = NULL; - } return; } -/********************************************************************* - * - * Advanced Context Descriptor setup for VLAN, CSUM or TSO - * - **********************************************************************/ -static int -ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, - u32 *cmd_type_len, u32 *olinfo_status) +#if 0 /* XXX Badly need to overhaul vlan(4) on NetBSD. */ +/* +** This routine is run via an vlan config EVENT, +** it enables us to use the HW Filter table since +** we can get the vlan id. This just creates the +** entry in the soft version of the VFTA, init will +** repopulate the real table. +*/ +static void +ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { - struct m_tag *mtag; - struct adapter *adapter = txr->adapter; - struct ethercom *ec = &adapter->osdep.ec; - struct ixgbe_adv_tx_context_desc *TXD; - struct ether_vlan_header *eh; - struct ip ip; - struct ip6_hdr ip6; - u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; - int ehdrlen, ip_hlen = 0; - u16 etype; - u8 ipproto __diagused = 0; - int offload = TRUE; - int ctxd = txr->next_avail_desc; - u16 vtag = 0; - - /* First check if TSO is to be used */ - if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6)) - return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); + struct adapter *adapter = ifp->if_softc; + u16 index, bit; - if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0) - offload = FALSE; + if (ifp->if_softc != arg) /* Not our event */ + return; - /* Indicate the whole packet as payload when not doing TSO */ - *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; + if ((vtag == 0) || (vtag > 4095)) /* Invalid */ + return; - /* Now ready a context descriptor */ - TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; + IXGBE_CORE_LOCK(adapter); + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + adapter->shadow_vfta[index] |= (1 << bit); + ixgbe_setup_vlan_hw_support(adapter); + IXGBE_CORE_UNLOCK(adapter); +} - /* - ** In advanced descriptors the vlan tag must - ** be placed into the context descriptor. Hence - ** we need to make one even if not doing offloads. - */ - if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) { - vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff); - vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); - } else if (offload == FALSE) /* ... no offload to do */ - return 0; - - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present, - * helpful for QinQ too. - */ - KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag)); - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - KASSERT(mp->m_len >= sizeof(struct ether_vlan_header)); - etype = ntohs(eh->evl_proto); - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - } else { - etype = ntohs(eh->evl_encap_proto); - ehdrlen = ETHER_HDR_LEN; - } - - /* Set the ether header length */ - vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; - - switch (etype) { - case ETHERTYPE_IP: - m_copydata(mp, ehdrlen, sizeof(ip), &ip); - ip_hlen = ip.ip_hl << 2; - ipproto = ip.ip_p; -#if 0 - ip.ip_sum = 0; - m_copyback(mp, ehdrlen, sizeof(ip), &ip); -#else - KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 || - ip.ip_sum == 0); -#endif - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - break; - case ETHERTYPE_IPV6: - m_copydata(mp, ehdrlen, sizeof(ip6), &ip6); - ip_hlen = sizeof(ip6); - /* XXX-BZ this will go badly in case of ext hdrs. */ - ipproto = ip6.ip6_nxt; - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; - default: - break; - } - - if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0) - *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; +/* +** This routine is run via an vlan +** unconfig EVENT, remove our entry +** in the soft vfta. +*/ +static void +ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) +{ + struct adapter *adapter = ifp->if_softc; + u16 index, bit; - vlan_macip_lens |= ip_hlen; - type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; + if (ifp->if_softc != arg) + return; - if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) { - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; - *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; - KASSERT(ipproto == IPPROTO_TCP); - } else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) { - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; - *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; - KASSERT(ipproto == IPPROTO_UDP); - } - - /* Now copy bits into descriptor */ - TXD->vlan_macip_lens = htole32(vlan_macip_lens); - TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); - TXD->seqnum_seed = htole32(0); - TXD->mss_l4len_idx = htole32(0); - - /* We've consumed the first desc, adjust counters */ - if (++ctxd == txr->num_desc) - ctxd = 0; - txr->next_avail_desc = ctxd; - --txr->tx_avail; + if ((vtag == 0) || (vtag > 4095)) /* Invalid */ + return; - return 0; + IXGBE_CORE_LOCK(adapter); + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + adapter->shadow_vfta[index] &= ~(1 << bit); + /* Re-init to load the changes */ + ixgbe_setup_vlan_hw_support(adapter); + IXGBE_CORE_UNLOCK(adapter); } +#endif -/********************************************************************** - * - * Setup work for hardware segmentation offload (TSO) on - * adapters using advanced tx descriptors - * - **********************************************************************/ -static int -ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, - u32 *cmd_type_len, u32 *olinfo_status) +static void +ixgbe_setup_vlan_hw_support(struct adapter *adapter) { - struct m_tag *mtag; - struct adapter *adapter = txr->adapter; struct ethercom *ec = &adapter->osdep.ec; - struct ixgbe_adv_tx_context_desc *TXD; - u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; - u32 mss_l4len_idx = 0, paylen; - u16 vtag = 0, eh_type; - int ctxd, ehdrlen, ip_hlen, tcp_hlen; - struct ether_vlan_header *eh; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif -#ifdef INET - struct ip *ip; -#endif - struct tcphdr *th; + struct ixgbe_hw *hw = &adapter->hw; + struct rx_ring *rxr; + u32 ctrl; /* - * Determine where frame payload starts. - * Jump over vlan headers if already present - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - eh_type = eh->evl_proto; - } else { - ehdrlen = ETHER_HDR_LEN; - eh_type = eh->evl_encap_proto; - } - - switch (ntohs(eh_type)) { -#ifdef INET6 - case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - /* XXX-BZ For now we do not pretend to support ext. hdrs. */ - if (ip6->ip6_nxt != IPPROTO_TCP) - return (ENXIO); - ip_hlen = sizeof(struct ip6_hdr); - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - th = (struct tcphdr *)((char *)ip6 + ip_hlen); - th->th_sum = in6_cksum_phdr(&ip6->ip6_src, - &ip6->ip6_dst, 0, htonl(IPPROTO_TCP)); - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; -#endif -#ifdef INET - case ETHERTYPE_IP: - ip = (struct ip *)(mp->m_data + ehdrlen); - if (ip->ip_p != IPPROTO_TCP) - return (ENXIO); - ip->ip_sum = 0; - ip_hlen = ip->ip_hl << 2; - th = (struct tcphdr *)((char *)ip + ip_hlen); - th->th_sum = in_cksum_phdr(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htons(IPPROTO_TCP)); - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - /* Tell transmit desc to also do IPv4 checksum. */ - *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; - break; -#endif - default: - panic("%s: CSUM_TSO but no supported IP version (0x%04x)", - __func__, ntohs(eh_type)); - break; - } - - ctxd = txr->next_avail_desc; - TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; - - tcp_hlen = th->th_off << 2; - - /* This is used in the transmit desc in encap */ - paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; - - /* VLAN MACLEN IPLEN */ - if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) { - vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff); - vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); - } - - vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= ip_hlen; - TXD->vlan_macip_lens = htole32(vlan_macip_lens); - - /* ADV DTYPE TUCMD */ - type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; - TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); - - /* MSS L4LEN IDX */ - mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT); - mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); - TXD->mss_l4len_idx = htole32(mss_l4len_idx); - - TXD->seqnum_seed = htole32(0); - - if (++ctxd == txr->num_desc) - ctxd = 0; - - txr->tx_avail--; - txr->next_avail_desc = ctxd; - *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; - *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; - *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; - ++txr->tso_tx.ev_count; - return (0); -} - -#ifdef IXGBE_FDIR -/* -** This routine parses packet headers so that Flow -** Director can make a hashed filter table entry -** allowing traffic flows to be identified and kept -** on the same cpu. This would be a performance -** hit, but we only do it at IXGBE_FDIR_RATE of -** packets. -*/ -static void -ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) -{ - struct adapter *adapter = txr->adapter; - struct ix_queue *que; - struct ip *ip; - struct tcphdr *th; - struct udphdr *uh; - struct ether_vlan_header *eh; - union ixgbe_atr_hash_dword input = {.dword = 0}; - union ixgbe_atr_hash_dword common = {.dword = 0}; - int ehdrlen, ip_hlen; - u16 etype; - - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - etype = eh->evl_proto; - } else { - ehdrlen = ETHER_HDR_LEN; - etype = eh->evl_encap_proto; - } - - /* Only handling IPv4 */ - if (etype != htons(ETHERTYPE_IP)) + ** We get here thru init_locked, meaning + ** a soft reset, this has already cleared + ** the VFTA and other state, so if there + ** have been no vlan's registered do nothing. + */ + if (!VLAN_ATTACHED(&adapter->osdep.ec)) return; - ip = (struct ip *)(mp->m_data + ehdrlen); - ip_hlen = ip->ip_hl << 2; - - /* check if we're UDP or TCP */ - switch (ip->ip_p) { - case IPPROTO_TCP: - th = (struct tcphdr *)((char *)ip + ip_hlen); - /* src and dst are inverted */ - common.port.dst ^= th->th_sport; - common.port.src ^= th->th_dport; - input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; - break; - case IPPROTO_UDP: - uh = (struct udphdr *)((char *)ip + ip_hlen); - /* src and dst are inverted */ - common.port.dst ^= uh->uh_sport; - common.port.src ^= uh->uh_dport; - input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; - break; - default: - return; + /* Setup the queues for vlans */ + for (int i = 0; i < adapter->num_queues; i++) { + rxr = &adapter->rx_rings[i]; + /* On 82599 the VLAN enable is per/queue in RXDCTL */ + if (hw->mac.type != ixgbe_mac_82598EB) { + ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); + ctrl |= IXGBE_RXDCTL_VME; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), ctrl); + } + rxr->vtag_strip = TRUE; } - input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); - if (mp->m_pkthdr.ether_vtag) - common.flex_bytes ^= htons(ETHERTYPE_VLAN); - else - common.flex_bytes ^= etype; - common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; - - que = &adapter->queues[txr->me]; + if ((ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) == 0) + return; /* - ** This assumes the Rx queue and Tx - ** queue are bound to the same CPU + ** A soft reset zero's out the VFTA, so + ** we need to repopulate it now. */ - ixgbe_fdir_add_signature_filter_82599(&adapter->hw, - input, common, que->msix); + for (int i = 0; i < IXGBE_VFTA_SIZE; i++) + if (adapter->shadow_vfta[i] != 0) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), + adapter->shadow_vfta[i]); + + ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + /* Enable the Filter Table if enabled */ + if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) { + ctrl &= ~IXGBE_VLNCTRL_CFIEN; + ctrl |= IXGBE_VLNCTRL_VFE; + } + if (hw->mac.type == ixgbe_mac_82598EB) + ctrl |= IXGBE_VLNCTRL_VME; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); } -#endif /* IXGBE_FDIR */ -/********************************************************************** - * - * Examine each tx_buffer in the used queue. If the hardware is done - * processing the packet then free associated resources. The - * tx_buffer is put back on the free queue. - * - **********************************************************************/ static void -ixgbe_txeof(struct tx_ring *txr) +ixgbe_enable_intr(struct adapter *adapter) { - struct adapter *adapter = txr->adapter; - struct ifnet *ifp = adapter->ifp; - u32 work, processed = 0; - u16 limit = txr->process_limit; - struct ixgbe_tx_buf *buf; - union ixgbe_adv_tx_desc *txd; - struct timeval now, elapsed; - - KASSERT(mutex_owned(&txr->tx_mtx)); + struct ixgbe_hw *hw = &adapter->hw; + struct ix_queue *que = adapter->queues; + u32 mask, fwsm; -#ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - struct netmap_kring *kring = &na->tx_rings[txr->me]; - txd = txr->tx_base; - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - /* - * In netmap mode, all the work is done in the context - * of the client thread. Interrupt handlers only wake up - * clients, which may be sleeping on individual rings - * or on a global resource for all rings. - * To implement tx interrupt mitigation, we wake up the client - * thread roughly every half ring, even if the NIC interrupts - * more frequently. This is implemented as follows: - * - ixgbe_txsync() sets kring->nr_kflags with the index of - * the slot that should wake up the thread (nkr_num_slots - * means the user thread should not be woken up); - * - the driver ignores tx interrupts unless netmap_mitigate=0 - * or the slot has the DD bit set. - */ - if (!netmap_mitigate || - (kring->nr_kflags < kring->nkr_num_slots && - txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { - netmap_tx_irq(ifp, txr->me); - } - return; - } -#endif /* DEV_NETMAP */ + mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); + /* Enable Fan Failure detection */ + if (hw->device_id == IXGBE_DEV_ID_82598AT) + mask |= IXGBE_EIMS_GPI_SDP1; - if (txr->tx_avail == txr->num_desc) { - txr->queue_status = IXGBE_QUEUE_IDLE; - return; + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + mask |= IXGBE_EIMS_ECC; + /* Temperature sensor on some adapters */ + mask |= IXGBE_EIMS_GPI_SDP0; + /* SFP+ (RX_LOS_N & MOD_ABS_N) */ + mask |= IXGBE_EIMS_GPI_SDP1; + mask |= IXGBE_EIMS_GPI_SDP2; +#ifdef IXGBE_FDIR + mask |= IXGBE_EIMS_FLOW_DIR; +#endif +#ifdef PCI_IOV + mask |= IXGBE_EIMS_MAILBOX; +#endif + break; + case ixgbe_mac_X540: + /* Detect if Thermal Sensor is enabled */ + fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); + if (fwsm & IXGBE_FWSM_TS_ENABLED) + mask |= IXGBE_EIMS_TS; + mask |= IXGBE_EIMS_ECC; +#ifdef IXGBE_FDIR + mask |= IXGBE_EIMS_FLOW_DIR; +#endif + break; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + /* MAC thermal sensor is automatically enabled */ + mask |= IXGBE_EIMS_TS; + /* Some devices use SDP0 for important information */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || + hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) + mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw); + mask |= IXGBE_EIMS_ECC; +#ifdef IXGBE_FDIR + mask |= IXGBE_EIMS_FLOW_DIR; +#endif +#ifdef PCI_IOV + mask |= IXGBE_EIMS_MAILBOX; +#endif + /* falls through */ + default: + break; } - /* Get work starting point */ - work = txr->next_to_clean; - buf = &txr->tx_buffers[work]; - txd = &txr->tx_base[work]; - work -= txr->num_desc; /* The distance to ring end */ - ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - do { - union ixgbe_adv_tx_desc *eop= buf->eop; - if (eop == NULL) /* No work */ - break; - - if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) - break; /* I/O not complete */ - - if (buf->m_head) { - txr->bytes += - buf->m_head->m_pkthdr.len; - bus_dmamap_sync(txr->txtag->dt_dmat, - buf->map, - 0, buf->m_head->m_pkthdr.len, - BUS_DMASYNC_POSTWRITE); - ixgbe_dmamap_unload(txr->txtag, - buf->map); - m_freem(buf->m_head); - buf->m_head = NULL; - /* - * NetBSD: Don't override buf->map with NULL here. - * It'll panic when a ring runs one lap around. - */ - } - buf->eop = NULL; - ++txr->tx_avail; - - /* We clean the range if multi segment */ - while (txd != eop) { - ++txd; - ++buf; - ++work; - /* wrap the ring? */ - if (__predict_false(!work)) { - work -= txr->num_desc; - buf = txr->tx_buffers; - txd = txr->tx_base; - } - if (buf->m_head) { - txr->bytes += - buf->m_head->m_pkthdr.len; - bus_dmamap_sync(txr->txtag->dt_dmat, - buf->map, - 0, buf->m_head->m_pkthdr.len, - BUS_DMASYNC_POSTWRITE); - ixgbe_dmamap_unload(txr->txtag, - buf->map); - m_freem(buf->m_head); - buf->m_head = NULL; - /* - * NetBSD: Don't override buf->map with NULL - * here. It'll panic when a ring runs one lap - * around. - */ - } - ++txr->tx_avail; - buf->eop = NULL; - - } - ++txr->packets; - ++processed; - ++ifp->if_opackets; - getmicrotime(&txr->watchdog_time); - - /* Try the next packet */ - ++txd; - ++buf; - ++work; - /* reset with a wrap */ - if (__predict_false(!work)) { - work -= txr->num_desc; - buf = txr->tx_buffers; - txd = txr->tx_base; - } - prefetch(txd); - } while (__predict_true(--limit)); - - ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); - work += txr->num_desc; - txr->next_to_clean = work; + /* With MSI-X we use auto clear */ + if (adapter->msix_mem) { + mask = IXGBE_EIMS_ENABLE_MASK; + /* Don't autoclear Link */ + mask &= ~IXGBE_EIMS_OTHER; + mask &= ~IXGBE_EIMS_LSC; +#ifdef PCI_IOV + mask &= ~IXGBE_EIMS_MAILBOX; +#endif + IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); + } /* - ** Watchdog calculation, we know there's - ** work outstanding or the first return - ** would have been taken, so none processed - ** for too long indicates a hang. + ** Now enable all queues, this is done separately to + ** allow for handling the extended (beyond 32) MSIX + ** vectors that can be used by 82599 */ - getmicrotime(&now); - timersub(&now, &txr->watchdog_time, &elapsed); - if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG) - txr->queue_status = IXGBE_QUEUE_HUNG; + for (int i = 0; i < adapter->num_queues; i++, que++) + ixgbe_enable_queue(adapter, que->msix); - if (txr->tx_avail == txr->num_desc) - txr->queue_status = IXGBE_QUEUE_IDLE; + IXGBE_WRITE_FLUSH(hw); return; } -/********************************************************************* - * - * Refresh mbuf buffers for RX descriptor rings - * - now keeps its own state so discards due to resource - * exhaustion are unnecessary, if an mbuf cannot be obtained - * it just returns, keeping its placeholder, thus it can simply - * be recalled to try again. - * - **********************************************************************/ static void -ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) -{ - struct adapter *adapter = rxr->adapter; - struct ixgbe_rx_buf *rxbuf; - struct mbuf *mp; - int i, j, error; - bool refreshed = false; - - i = j = rxr->next_to_refresh; - /* Control the loop with one beyond */ - if (++j == rxr->num_desc) - j = 0; - - while (j != limit) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->buf == NULL) { - mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT, - MT_DATA, M_PKTHDR, rxr->mbuf_sz); - if (mp == NULL) { - rxr->no_jmbuf.ev_count++; - goto update; - } - if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) - m_adj(mp, ETHER_ALIGN); - } else - mp = rxbuf->buf; - - mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; - - /* If we're dealing with an mbuf that was copied rather - * than replaced, there's no need to go through busdma. - */ - if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, - rxbuf->pmap, mp, BUS_DMA_NOWAIT); - if (error != 0) { - printf("Refresh mbufs: payload dmamap load" - " failure - %d\n", error); - m_free(mp); - rxbuf->buf = NULL; - goto update; - } - rxbuf->buf = mp; - bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap, - 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD); - rxbuf->addr = rxr->rx_base[i].read.pkt_addr = - htole64(rxbuf->pmap->dm_segs[0].ds_addr); - } else { - rxr->rx_base[i].read.pkt_addr = rxbuf->addr; - rxbuf->flags &= ~IXGBE_RX_COPY; - } - - refreshed = true; - /* Next is precalculated */ - i = j; - rxr->next_to_refresh = i; - if (++j == rxr->num_desc) - j = 0; - } -update: - if (refreshed) /* Update hardware tail index */ - IXGBE_WRITE_REG(&adapter->hw, - IXGBE_RDT(rxr->me), rxr->next_to_refresh); - return; -} - -/********************************************************************* - * - * Allocate memory for rx_buffer structures. Since we use one - * rx_buffer per received packet, the maximum number of rx_buffer's - * that we'll need is equal to the number of receive descriptors - * that we've allocated. - * - **********************************************************************/ -static int -ixgbe_allocate_receive_buffers(struct rx_ring *rxr) +ixgbe_disable_intr(struct adapter *adapter) { - struct adapter *adapter = rxr->adapter; - device_t dev = adapter->dev; - struct ixgbe_rx_buf *rxbuf; - int i, bsize, error; - - bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; - if (!(rxr->rx_buffers = - (struct ixgbe_rx_buf *) malloc(bsize, - M_DEVBUF, M_NOWAIT | M_ZERO))) { - aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n"); - error = ENOMEM; - goto fail; - } - - if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */ - 1, 0, /* alignment, bounds */ - MJUM16BYTES, /* maxsize */ - 1, /* nsegments */ - MJUM16BYTES, /* maxsegsize */ - 0, /* flags */ - &rxr->ptag))) { - aprint_error_dev(dev, "Unable to create RX DMA tag\n"); - goto fail; - } - - for (i = 0; i < rxr->num_desc; i++, rxbuf++) { - rxbuf = &rxr->rx_buffers[i]; - error = ixgbe_dmamap_create(rxr->ptag, - BUS_DMA_NOWAIT, &rxbuf->pmap); - if (error) { - aprint_error_dev(dev, "Unable to create RX dma map\n"); - goto fail; - } + if (adapter->msix_mem) + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); + if (adapter->hw.mac.type == ixgbe_mac_82598EB) { + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); + } else { + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); } - - return (0); - -fail: - /* Frees all, but can handle partial completion */ - ixgbe_free_receive_structures(adapter); - return (error); + IXGBE_WRITE_FLUSH(&adapter->hw); + return; } /* -** Used to detect a descriptor that has -** been merged by Hardware RSC. +** Get the width and transaction speed of +** the slot this adapter is plugged into. */ -static inline u32 -ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) -{ - return (le32toh(rx->wb.lower.lo_dword.data) & - IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; -} - -/********************************************************************* - * - * Initialize Hardware RSC (LRO) feature on 82599 - * for an RX ring, this is toggled by the LRO capability - * even though it is transparent to the stack. - * - * NOTE: since this HW feature only works with IPV4 and - * our testing has shown soft LRO to be as effective - * I have decided to disable this by default. - * - **********************************************************************/ static void -ixgbe_setup_hw_rsc(struct rx_ring *rxr) +ixgbe_get_slot_info(struct adapter *adapter) { - struct adapter *adapter = rxr->adapter; - struct ixgbe_hw *hw = &adapter->hw; - u32 rscctrl, rdrxctl; - - /* If turning LRO/RSC off we need to disable it */ - if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { - rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); - rscctrl &= ~IXGBE_RSCCTL_RSCEN; - return; - } + device_t dev = adapter->dev; + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_mac_info *mac = &hw->mac; + u16 link; - rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); - rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; -#ifdef DEV_NETMAP /* crcstrip is optional in netmap */ - if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) -#endif /* DEV_NETMAP */ - rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; - rdrxctl |= IXGBE_RDRXCTL_RSCACKC; - IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); + /* For most devices simply call the shared code routine */ + if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) { + ixgbe_get_bus_info(hw); + /* These devices don't use PCI-E */ + switch (hw->mac.type) { + case ixgbe_mac_X550EM_x: + return; + default: + goto display; + } + } - rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); - rscctrl |= IXGBE_RSCCTL_RSCEN; /* - ** Limit the total number of descriptors that - ** can be combined, so it does not exceed 64K - */ - if (rxr->mbuf_sz == MCLBYTES) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; - else if (rxr->mbuf_sz == MJUMPAGESIZE) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; - else if (rxr->mbuf_sz == MJUM9BYTES) - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; - else /* Using 16K cluster */ - rscctrl |= IXGBE_RSCCTL_MAXDESC_1; - - IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); - - /* Enable TCP header recognition */ - IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), - (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | - IXGBE_PSRTYPE_TCPHDR)); - - /* Disable RSC for ACK packets */ - IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, - (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); - - rxr->hw_rsc = TRUE; -} + ** For the Quad port adapter we need to parse back + ** up the PCI tree to find the speed of the expansion + ** slot into which this adapter is plugged. A bit more work. + */ + dev = device_parent(device_parent(dev)); +#ifdef IXGBE_DEBUG + device_printf(dev, "parent pcib = %x,%x,%x\n", + pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); +#endif + dev = device_parent(device_parent(dev)); +#ifdef IXGBE_DEBUG + device_printf(dev, "slot pcib = %x,%x,%x\n", + pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); +#endif + /* Now get the PCI Express Capabilities offset */ + /* ...and read the Link Status Register */ + link = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS); + switch (link & IXGBE_PCI_LINK_WIDTH) { + case IXGBE_PCI_LINK_WIDTH_1: + hw->bus.width = ixgbe_bus_width_pcie_x1; + break; + case IXGBE_PCI_LINK_WIDTH_2: + hw->bus.width = ixgbe_bus_width_pcie_x2; + break; + case IXGBE_PCI_LINK_WIDTH_4: + hw->bus.width = ixgbe_bus_width_pcie_x4; + break; + case IXGBE_PCI_LINK_WIDTH_8: + hw->bus.width = ixgbe_bus_width_pcie_x8; + break; + default: + hw->bus.width = ixgbe_bus_width_unknown; + break; + } + + switch (link & IXGBE_PCI_LINK_SPEED) { + case IXGBE_PCI_LINK_SPEED_2500: + hw->bus.speed = ixgbe_bus_speed_2500; + break; + case IXGBE_PCI_LINK_SPEED_5000: + hw->bus.speed = ixgbe_bus_speed_5000; + break; + case IXGBE_PCI_LINK_SPEED_8000: + hw->bus.speed = ixgbe_bus_speed_8000; + break; + default: + hw->bus.speed = ixgbe_bus_speed_unknown; + break; + } + mac->ops.set_lan_id(hw); -static void -ixgbe_free_receive_ring(struct rx_ring *rxr) -{ - struct ixgbe_rx_buf *rxbuf; - int i; +display: + device_printf(dev,"PCI Express Bus: Speed %s Width %s\n", + ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s": + (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s": + (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"), + (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "x8" : + (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "x4" : + (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "x1" : + ("Unknown")); - for (i = 0; i < rxr->num_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->buf != NULL) { - bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap, - 0, rxbuf->buf->m_pkthdr.len, - BUS_DMASYNC_POSTREAD); - ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap); - rxbuf->buf->m_flags |= M_PKTHDR; - m_freem(rxbuf->buf); - rxbuf->buf = NULL; - rxbuf->flags = 0; - } - } + if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) && + ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && + (hw->bus.speed == ixgbe_bus_speed_2500))) { + device_printf(dev, "PCI-Express bandwidth available" + " for this card\n is not sufficient for" + " optimal performance.\n"); + device_printf(dev, "For optimal performance a x8 " + "PCIE, or x4 PCIE Gen2 slot is required.\n"); + } + if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) && + ((hw->bus.width <= ixgbe_bus_width_pcie_x8) && + (hw->bus.speed < ixgbe_bus_speed_8000))) { + device_printf(dev, "PCI-Express bandwidth available" + " for this card\n is not sufficient for" + " optimal performance.\n"); + device_printf(dev, "For optimal performance a x8 " + "PCIE Gen3 slot is required.\n"); + } + + return; } -/********************************************************************* - * - * Initialize a receive ring and its buffers. - * - **********************************************************************/ -static int -ixgbe_setup_receive_ring(struct rx_ring *rxr) +/* +** Setup the correct IVAR register for a particular MSIX interrupt +** (yes this is all very magic and confusing :) +** - entry is the register array entry +** - vector is the MSIX vector for this queue +** - type is RX/TX/MISC +*/ +static void +ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) { - struct adapter *adapter; - struct ixgbe_rx_buf *rxbuf; -#ifdef LRO - struct ifnet *ifp; - struct lro_ctrl *lro = &rxr->lro; -#endif /* LRO */ - int rsize, error = 0; -#ifdef DEV_NETMAP - struct netmap_adapter *na = NA(rxr->adapter->ifp); - struct netmap_slot *slot; -#endif /* DEV_NETMAP */ - - adapter = rxr->adapter; -#ifdef LRO - ifp = adapter->ifp; -#endif /* LRO */ + struct ixgbe_hw *hw = &adapter->hw; + u32 ivar, index; - /* Clear the ring contents */ - IXGBE_RX_LOCK(rxr); -#ifdef DEV_NETMAP - /* same as in ixgbe_setup_transmit_ring() */ - slot = netmap_reset(na, NR_RX, rxr->me, 0); -#endif /* DEV_NETMAP */ - rsize = roundup2(adapter->num_rx_desc * - sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); - bzero((void *)rxr->rx_base, rsize); - /* Cache the size */ - rxr->mbuf_sz = adapter->rx_mbuf_sz; + vector |= IXGBE_IVAR_ALLOC_VAL; - /* Free current RX buffer structs and their mbufs */ - ixgbe_free_receive_ring(rxr); + switch (hw->mac.type) { - IXGBE_RX_UNLOCK(rxr); + case ixgbe_mac_82598EB: + if (type == -1) + entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; + else + entry += (type * 64); + index = (entry >> 2) & 0x1F; + ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); + ivar &= ~(0xFF << (8 * (entry & 0x3))); + ivar |= (vector << (8 * (entry & 0x3))); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); + break; - /* Now reinitialize our supply of jumbo mbufs. The number - * or size of jumbo mbufs may have changed. - */ - ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat, - 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz); + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + if (type == -1) { /* MISC IVAR */ + index = (entry & 1) * 8; + ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); + ivar &= ~(0xFF << index); + ivar |= (vector << index); + IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); + } else { /* RX/TX IVARS */ + index = (16 * (entry & 1)) + (8 * type); + ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); + ivar &= ~(0xFF << index); + ivar |= (vector << index); + IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); + } - IXGBE_RX_LOCK(rxr); + default: + break; + } +} - /* Now replenish the mbufs */ - for (int j = 0; j != rxr->num_desc; ++j) { - struct mbuf *mp; +static void +ixgbe_configure_ivars(struct adapter *adapter) +{ + struct ix_queue *que = adapter->queues; + u32 newitr; - rxbuf = &rxr->rx_buffers[j]; -#ifdef DEV_NETMAP + if (ixgbe_max_interrupt_rate > 0) + newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; + else { /* - * In netmap mode, fill the map and set the buffer - * address in the NIC ring, considering the offset - * between the netmap and NIC rings (see comment in - * ixgbe_setup_transmit_ring() ). No need to allocate - * an mbuf, so end the block with a continue; - */ - if (slot) { - int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); - uint64_t paddr; - void *addr; - - addr = PNMB(na, slot + sj, &paddr); - netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); - /* Update descriptor and the cached value */ - rxr->rx_base[j].read.pkt_addr = htole64(paddr); - rxbuf->addr = htole64(paddr); - continue; - } -#endif /* DEV_NETMAP */ - rxbuf->flags = 0; - rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT, - MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); - if (rxbuf->buf == NULL) { - error = ENOBUFS; - goto fail; - } - mp = rxbuf->buf; - mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, - rxbuf->pmap, mp, BUS_DMA_NOWAIT); - if (error != 0) - goto fail; - bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap, - 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD); - /* Update the descriptor and the cached value */ - rxr->rx_base[j].read.pkt_addr = - htole64(rxbuf->pmap->dm_segs[0].ds_addr); - rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr); + ** Disable DMA coalescing if interrupt moderation is + ** disabled. + */ + adapter->dmac = 0; + newitr = 0; } + for (int i = 0; i < adapter->num_queues; i++, que++) { + struct rx_ring *rxr = &adapter->rx_rings[i]; + struct tx_ring *txr = &adapter->tx_rings[i]; + /* First the RX queue entry */ + ixgbe_set_ivar(adapter, rxr->me, que->msix, 0); + /* ... and the TX */ + ixgbe_set_ivar(adapter, txr->me, que->msix, 1); + /* Set an Initial EITR value */ + IXGBE_WRITE_REG(&adapter->hw, + IXGBE_EITR(que->msix), newitr); + } - /* Setup our descriptor indices */ - rxr->next_to_check = 0; - rxr->next_to_refresh = 0; - rxr->lro_enabled = FALSE; - rxr->rx_copies.ev_count = 0; - rxr->rx_bytes.ev_count = 0; - rxr->vtag_strip = FALSE; + /* For the Link interrupt */ + ixgbe_set_ivar(adapter, 1, adapter->vector, -1); +} - ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); +/* +** ixgbe_sfp_probe - called in the local timer to +** determine if a port had optics inserted. +*/ +static bool +ixgbe_sfp_probe(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = adapter->dev; + bool result = FALSE; - /* - ** Now set up the LRO interface: - */ - if (ixgbe_rsc_enable) - ixgbe_setup_hw_rsc(rxr); -#ifdef LRO - else if (ifp->if_capenable & IFCAP_LRO) { - device_t dev = adapter->dev; - int err = tcp_lro_init(lro); - if (err) { - device_printf(dev, "LRO Initialization failed!\n"); - goto fail; - } - INIT_DEBUGOUT("RX Soft LRO Initialized\n"); - rxr->lro_enabled = TRUE; - lro->ifp = adapter->ifp; + if ((hw->phy.type == ixgbe_phy_nl) && + (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) { + s32 ret = hw->phy.ops.identify_sfp(hw); + if (ret) + goto out; + ret = hw->phy.ops.reset(hw); + if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev,"Unsupported SFP+ module detected!"); + device_printf(dev, "Reload driver with supported module.\n"); + adapter->sfp_probe = FALSE; + goto out; + } else + device_printf(dev, "SFP+ module detected!\n"); + /* We now have supported optics */ + adapter->sfp_probe = FALSE; + /* Set the optics type so system reports correctly */ + ixgbe_setup_optics(adapter); + result = TRUE; } -#endif /* LRO */ +out: + return (result); +} - IXGBE_RX_UNLOCK(rxr); - return (0); +/* +** Tasklet handler for MSIX Link interrupts +** - do outside interrupt since it might sleep +*/ +static void +ixgbe_handle_link(void *context) +{ + struct adapter *adapter = context; + struct ixgbe_hw *hw = &adapter->hw; -fail: - ixgbe_free_receive_ring(rxr); - IXGBE_RX_UNLOCK(rxr); - return (error); + ixgbe_check_link(hw, + &adapter->link_speed, &adapter->link_up, 0); + ixgbe_update_link_status(adapter); + + /* Re-enable link interrupts */ + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_LSC); } -/********************************************************************* - * - * Initialize all receive rings. - * - **********************************************************************/ -static int -ixgbe_setup_receive_structures(struct adapter *adapter) +/* +** Tasklet for handling SFP module interrupts +*/ +static void +ixgbe_handle_mod(void *context) { - struct rx_ring *rxr = adapter->rx_rings; - int j; + struct adapter *adapter = context; + struct ixgbe_hw *hw = &adapter->hw; + enum ixgbe_phy_type orig_type = hw->phy.type; + device_t dev = adapter->dev; + u32 err; - for (j = 0; j < adapter->num_queues; j++, rxr++) - if (ixgbe_setup_receive_ring(rxr)) - goto fail; + IXGBE_CORE_LOCK(adapter); - return (0); -fail: - /* - * Free RX buffers allocated so far, we will only handle - * the rings that completed, the failing case will have - * cleaned up for itself. 'j' failed, so its the terminus. - */ - for (int i = 0; i < j; ++i) { - rxr = &adapter->rx_rings[i]; - ixgbe_free_receive_ring(rxr); + /* Check to see if the PHY type changed */ + if (hw->phy.ops.identify) { + hw->phy.type = ixgbe_phy_unknown; + hw->phy.ops.identify(hw); } - return (ENOBUFS); -} + if (hw->phy.type != orig_type) { + device_printf(dev, "Detected phy_type %d\n", hw->phy.type); -static void -ixgbe_initialise_rss_mapping(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - uint32_t reta; - int i, j, queue_id; - uint32_t rss_key[10]; - uint32_t mrqc; -#ifdef RSS - uint32_t rss_hash_config; -#endif + if (hw->phy.type == ixgbe_phy_none) { + hw->phy.sfp_type = ixgbe_sfp_type_unknown; + goto out; + } - /* Setup RSS */ - reta = 0; + /* Try to do the initialization that was skipped before */ + if (hw->phy.ops.init) + hw->phy.ops.init(hw); + if (hw->phy.ops.reset) + hw->phy.ops.reset(hw); + } -#ifdef RSS - /* Fetch the configured RSS key */ - rss_getkey((uint8_t *) &rss_key); -#else - /* set up random bits */ - cprng_fast(&rss_key, sizeof(rss_key)); -#endif + err = hw->phy.ops.identify_sfp(hw); + if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev, + "Unsupported SFP+ module type was detected.\n"); + goto out; + } - /* Set up the redirection table */ - for (i = 0, j = 0; i < 128; i++, j++) { - if (j == adapter->num_queues) j = 0; -#ifdef RSS - /* - * Fetch the RSS bucket id for the given indirection entry. - * Cap it at the number of configured buckets (which is - * num_queues.) - */ - queue_id = rss_get_indirection_to_bucket(i); - queue_id = queue_id % adapter->num_queues; -#else - queue_id = (j * 0x11); -#endif - /* - * The low 8 bits are for hash value (n+0); - * The next 8 bits are for hash value (n+1), etc. - */ - reta = reta >> 8; - reta = reta | ( ((uint32_t) queue_id) << 24); - if ((i & 3) == 3) { - IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); - reta = 0; - } + err = hw->mac.ops.setup_sfp(hw); + if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev, + "Setup failure - unsupported SFP+ module type.\n"); + goto out; + } +out: + /* Update media type */ + switch (hw->mac.ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + adapter->optics = IFM_10G_SR; + break; + case ixgbe_media_type_copper: + adapter->optics = IFM_10G_TWINAX; + break; + case ixgbe_media_type_cx4: + adapter->optics = IFM_10G_CX4; + break; + default: + adapter->optics = 0; + break; } - /* Now fill our hash function seeds */ - for (i = 0; i < 10; i++) - IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]); + /* Adjust media types shown in ifconfig */ + ifmedia_removeall(&adapter->media); + /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ + adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); + ixgbe_add_media_types(adapter); + ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); - /* Perform hash on these packet types */ -#ifdef RSS - mrqc = IXGBE_MRQC_RSSEN; - rss_hash_config = rss_gethashconfig(); - if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4; - if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP; - if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6; - if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP; - if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX; - if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP; - if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; - if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX) - device_printf(adapter->dev, - "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, " - "but not supported\n", __func__); - if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; - if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; -#else - /* - * Disable UDP - IP fragments aren't currently being handled - * and so we end up with a mix of 2-tuple and 4-tuple - * traffic. - */ - mrqc = IXGBE_MRQC_RSSEN - | IXGBE_MRQC_RSS_FIELD_IPV4 - | IXGBE_MRQC_RSS_FIELD_IPV4_TCP -#if 0 - | IXGBE_MRQC_RSS_FIELD_IPV4_UDP -#endif - | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP - | IXGBE_MRQC_RSS_FIELD_IPV6_EX - | IXGBE_MRQC_RSS_FIELD_IPV6 - | IXGBE_MRQC_RSS_FIELD_IPV6_TCP -#if 0 - | IXGBE_MRQC_RSS_FIELD_IPV6_UDP - | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP -#endif - ; -#endif /* RSS */ - IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + softint_schedule(adapter->msf_si); + IXGBE_CORE_UNLOCK(adapter); + return; } -/********************************************************************* - * - * Setup receive registers and features. - * - **********************************************************************/ -#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 - -#define BSIZEPKT_ROUNDUP ((1<rx_rings; - struct ixgbe_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - u32 bufsz, rxctrl, fctrl, srrctl, rxcsum; - u32 hlreg; - - - /* - * Make sure receives are disabled while - * setting up the descriptor ring - */ - rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); - IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, - rxctrl & ~IXGBE_RXCTRL_RXEN); + struct adapter *adapter = context; + struct ixgbe_hw *hw = &adapter->hw; + u32 autoneg; + bool negotiate; - /* Enable broadcasts */ - fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); - fctrl |= IXGBE_FCTRL_BAM; - fctrl |= IXGBE_FCTRL_DPF; - fctrl |= IXGBE_FCTRL_PMCF; - IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); + IXGBE_CORE_LOCK(adapter); - /* Set for Jumbo Frames? */ - hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); - if (ifp->if_mtu > ETHERMTU) - hlreg |= IXGBE_HLREG0_JUMBOEN; - else - hlreg &= ~IXGBE_HLREG0_JUMBOEN; -#ifdef DEV_NETMAP - /* crcstrip is conditional in netmap (in RDRXCTL too ?) */ - if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) - hlreg &= ~IXGBE_HLREG0_RXCRCSTRP; + autoneg = hw->phy.autoneg_advertised; + if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) + hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); else - hlreg |= IXGBE_HLREG0_RXCRCSTRP; -#endif /* DEV_NETMAP */ - IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); - - bufsz = (adapter->rx_mbuf_sz + - BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - - for (i = 0; i < adapter->num_queues; i++, rxr++) { - u64 rdba = rxr->rxdma.dma_paddr; - - /* Setup the Base and Length of the Rx Descriptor Ring */ - IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), - (rdba & 0x00000000ffffffffULL)); - IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32)); - IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i), - adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); - - /* Set up the SRRCTL register */ - srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); - srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; - srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; - srrctl |= bufsz; - srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - - /* - * Set DROP_EN iff we have no flow control and >1 queue. - * Note that srrctl was cleared shortly before during reset, - * so we do not need to clear the bit, but do it just in case - * this code is moved elsewhere. - */ - if (adapter->num_queues > 1 && - adapter->fc == ixgbe_fc_none) { - srrctl |= IXGBE_SRRCTL_DROP_EN; - } else { - srrctl &= ~IXGBE_SRRCTL_DROP_EN; - } - - IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); - - /* Setup the HW Rx Head and Tail Descriptor Pointers */ - IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0); - IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0); - - /* Set the processing limit */ - rxr->process_limit = ixgbe_rx_process_limit; - } - - if (adapter->hw.mac.type != ixgbe_mac_82598EB) { - u32 psrtype = IXGBE_PSRTYPE_TCPHDR | - IXGBE_PSRTYPE_UDPHDR | - IXGBE_PSRTYPE_IPV4HDR | - IXGBE_PSRTYPE_IPV6HDR; - IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); - } - - rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); - - ixgbe_initialise_rss_mapping(adapter); + negotiate = 0; + if (hw->mac.ops.setup_link) + hw->mac.ops.setup_link(hw, autoneg, TRUE); - if (adapter->num_queues > 1) { - /* RSS and RX IPP Checksum are mutually exclusive */ - rxcsum |= IXGBE_RXCSUM_PCSD; - } + IXGBE_CORE_UNLOCK(adapter); + return; +} - if (ifp->if_capenable & IFCAP_RXCSUM) - rxcsum |= IXGBE_RXCSUM_PCSD; +/* +** Tasklet for handling interrupts from an external PHY +*/ +static void +ixgbe_handle_phy(void *context) +{ + struct adapter *adapter = context; + struct ixgbe_hw *hw = &adapter->hw; + int error; - if (!(rxcsum & IXGBE_RXCSUM_PCSD)) - rxcsum |= IXGBE_RXCSUM_IPPCSE; + error = hw->phy.ops.handle_lasi(hw); + if (error == IXGBE_ERR_OVERTEMP) + device_printf(adapter->dev, + "CRITICAL: EXTERNAL PHY OVER TEMP!! " + " PHY will downshift to lower power state!\n"); + else if (error) + device_printf(adapter->dev, + "Error handling LASI interrupt: %d\n", + error); + return; +} - IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); +#ifdef IXGBE_FDIR +/* +** Tasklet for reinitializing the Flow Director filter table +*/ +static void +ixgbe_reinit_fdir(void *context) +{ + struct adapter *adapter = context; + struct ifnet *ifp = adapter->ifp; + if (adapter->fdir_reinit != 1) /* Shouldn't happen */ + return; + ixgbe_reinit_fdir_tables_82599(&adapter->hw); + adapter->fdir_reinit = 0; + /* re-enable flow director interrupts */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR); + /* Restart the interface */ + ifp->if_flags |= IFF_RUNNING; return; } +#endif /********************************************************************* * - * Free all receive rings. + * Configure DMA Coalescing * **********************************************************************/ static void -ixgbe_free_receive_structures(struct adapter *adapter) +ixgbe_config_dmac(struct adapter *adapter) { - struct rx_ring *rxr = adapter->rx_rings; + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config; - INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); + if (hw->mac.type < ixgbe_mac_X550 || + !hw->mac.ops.dmac_config) + return; - for (int i = 0; i < adapter->num_queues; i++, rxr++) { -#ifdef LRO - struct lro_ctrl *lro = &rxr->lro; -#endif /* LRO */ - ixgbe_free_receive_buffers(rxr); -#ifdef LRO - /* Free LRO memory */ - tcp_lro_free(lro); -#endif /* LRO */ - /* Free the ring memory as well */ - ixgbe_dma_free(adapter, &rxr->rxdma); - IXGBE_RX_LOCK_DESTROY(rxr); - } + if (dcfg->watchdog_timer ^ adapter->dmac || + dcfg->link_speed ^ adapter->link_speed) { + dcfg->watchdog_timer = adapter->dmac; + dcfg->fcoe_en = false; + dcfg->link_speed = adapter->link_speed; + dcfg->num_tcs = 1; + + INIT_DEBUGOUT2("dmac settings: watchdog %d, link speed %d\n", + dcfg->watchdog_timer, dcfg->link_speed); - free(adapter->rx_rings, M_DEVBUF); + hw->mac.ops.dmac_config(hw); + } } - -/********************************************************************* - * - * Free receive ring data structures +/* + * Checks whether the adapter's ports are capable of + * Wake On LAN by reading the adapter's NVM. * - **********************************************************************/ + * Sets each port's hw->wol_enabled value depending + * on the value read here. + */ static void -ixgbe_free_receive_buffers(struct rx_ring *rxr) +ixgbe_check_wol_support(struct adapter *adapter) { - struct adapter *adapter = rxr->adapter; - struct ixgbe_rx_buf *rxbuf; - - INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); + struct ixgbe_hw *hw = &adapter->hw; + u16 dev_caps = 0; - /* Cleanup any existing buffers */ - if (rxr->rx_buffers != NULL) { - for (int i = 0; i < adapter->num_rx_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->buf != NULL) { - bus_dmamap_sync(rxr->ptag->dt_dmat, - rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len, - BUS_DMASYNC_POSTREAD); - ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap); - rxbuf->buf->m_flags |= M_PKTHDR; - m_freem(rxbuf->buf); - } - rxbuf->buf = NULL; - if (rxbuf->pmap != NULL) { - ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap); - rxbuf->pmap = NULL; - } - } - if (rxr->rx_buffers != NULL) { - free(rxr->rx_buffers, M_DEVBUF); - rxr->rx_buffers = NULL; - } - } + /* Find out WoL support for port */ + adapter->wol_support = hw->wol_enabled = 0; + ixgbe_get_device_caps(hw, &dev_caps); + if ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0_1) || + ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0) && + hw->bus.func == 0)) + adapter->wol_support = hw->wol_enabled = 1; - if (rxr->ptag != NULL) { - ixgbe_dma_tag_destroy(rxr->ptag); - rxr->ptag = NULL; - } + /* Save initial wake up filter configuration */ + adapter->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC); return; } -static __inline void -ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) +/* + * Prepare the adapter/port for LPLU and/or WoL + */ +static int +ixgbe_setup_low_power_mode(struct adapter *adapter) { - int s; - -#ifdef LRO - struct adapter *adapter = ifp->if_softc; - struct ethercom *ec = &adapter->osdep.ec; + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = adapter->dev; + s32 error = 0; - /* - * ATM LRO is only for IP/TCP packets and TCP checksum of the packet - * should be computed by hardware. Also it should not have VLAN tag in - * ethernet header. In case of IPv6 we do not yet support ext. hdrs. - */ - if (rxr->lro_enabled && - (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 && - (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && - ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == - (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || - (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == - (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && - (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == - (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { - /* - * Send to the stack if: - ** - LRO not enabled, or - ** - no LRO resources, or - ** - lro enqueue fails - */ - if (rxr->lro.lro_cnt != 0) - if (tcp_lro_rx(&rxr->lro, m, 0) == 0) - return; - } -#endif /* LRO */ + KASSERT(mutex_owned(&adapter->core_mtx)); - IXGBE_RX_UNLOCK(rxr); + /* Limit power management flow to X550EM baseT */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T + && hw->phy.ops.enter_lplu) { + /* X550EM baseT adapters need a special LPLU flow */ + hw->phy.reset_disable = true; + ixgbe_stop(adapter); + error = hw->phy.ops.enter_lplu(hw); + if (error) + device_printf(dev, + "Error entering LPLU: %d\n", error); + hw->phy.reset_disable = false; + } else { + /* Just stop for other adapters */ + ixgbe_stop(adapter); + } - s = splnet(); - /* Pass this up to any BPF listeners. */ - bpf_mtap(ifp, m); - if_input(ifp, m); - splx(s); + if (!hw->wol_enabled) { + ixgbe_set_phy_power(hw, FALSE); + IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0); + IXGBE_WRITE_REG(hw, IXGBE_WUC, 0); + } else { + /* Turn off support for APM wakeup. (Using ACPI instead) */ + IXGBE_WRITE_REG(hw, IXGBE_GRC, + IXGBE_READ_REG(hw, IXGBE_GRC) & ~(u32)2); - IXGBE_RX_LOCK(rxr); -} - -static __inline void -ixgbe_rx_discard(struct rx_ring *rxr, int i) -{ - struct ixgbe_rx_buf *rbuf; - - rbuf = &rxr->rx_buffers[i]; + /* + * Clear Wake Up Status register to prevent any previous wakeup + * events from waking us up immediately after we suspend. + */ + IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); + /* + * Program the Wakeup Filter Control register with user filter + * settings + */ + IXGBE_WRITE_REG(hw, IXGBE_WUFC, adapter->wufc); - /* - ** With advanced descriptors the writeback - ** clobbers the buffer addrs, so its easier - ** to just free the existing mbufs and take - ** the normal refresh path to get new buffers - ** and mapping. - */ + /* Enable wakeups and power management in Wakeup Control */ + IXGBE_WRITE_REG(hw, IXGBE_WUC, + IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN); - if (rbuf->buf != NULL) {/* Partial chain ? */ - rbuf->fmp->m_flags |= M_PKTHDR; - m_freem(rbuf->fmp); - rbuf->fmp = NULL; - rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ - } else if (rbuf->buf) { - m_free(rbuf->buf); - rbuf->buf = NULL; } - rbuf->flags = 0; - - return; + return error; } - -/********************************************************************* - * - * This routine executes in interrupt context. It replenishes - * the mbufs in the descriptor and sends data which has been - * dma'ed into host memory to upper layer. +/********************************************************************** * - * We loop at most count times if count is > 0, or until done if - * count < 0. + * Update the board statistics counters. * - * Return TRUE for more work, FALSE for all clean. - *********************************************************************/ -static bool -ixgbe_rxeof(struct ix_queue *que) + **********************************************************************/ +static void +ixgbe_update_stats_counters(struct adapter *adapter) { - struct adapter *adapter = que->adapter; - struct rx_ring *rxr = que->rxr; - struct ifnet *ifp = adapter->ifp; -#ifdef LRO - struct lro_ctrl *lro = &rxr->lro; - struct lro_entry *queued; -#endif /* LRO */ - int i, nextp, processed = 0; - u32 staterr = 0; - u16 count = rxr->process_limit; - union ixgbe_adv_rx_desc *cur; - struct ixgbe_rx_buf *rbuf, *nbuf; -#ifdef RSS - u16 pkt_info; -#endif + struct ifnet *ifp = adapter->ifp; + struct ixgbe_hw *hw = &adapter->hw; + u32 missed_rx = 0, bprc, lxon, lxoff, total; + u64 total_missed_rx = 0; + uint64_t crcerrs, rlec; + struct ixgbe_hw_stats *stats = &adapter->stats.pf; - IXGBE_RX_LOCK(rxr); + crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS); + stats->crcerrs.ev_count += crcerrs; + stats->illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC); + stats->errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC); + stats->mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC); -#ifdef DEV_NETMAP - /* Same as the txeof routine: wakeup clients on intr. */ - if (netmap_rx_irq(ifp, rxr->me, &processed)) { - IXGBE_RX_UNLOCK(rxr); - return (FALSE); + for (int i = 0; i < __arraycount(stats->qprc); i++) { + int j = i % adapter->num_queues; + stats->qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); + stats->qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); + stats->qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); } -#endif /* DEV_NETMAP */ + stats->mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC); + stats->mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC); + rlec = IXGBE_READ_REG(hw, IXGBE_RLEC); + stats->rlec.ev_count += rlec; - for (i = rxr->next_to_check; count != 0;) { - struct mbuf *sendmp, *mp; - u32 rsc, ptype; - u16 len; - u16 vtag = 0; - bool eop; - - /* Sync the ring. */ - ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + /* Hardware workaround, gprc counts missed packets */ + stats->gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx; - cur = &rxr->rx_base[i]; - staterr = le32toh(cur->wb.upper.status_error); -#ifdef RSS - pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); -#endif + lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); + stats->lxontxc.ev_count += lxon; + lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); + stats->lxofftxc.ev_count += lxoff; + total = lxon + lxoff; - if ((staterr & IXGBE_RXD_STAT_DD) == 0) - break; - if ((ifp->if_flags & IFF_RUNNING) == 0) - break; + if (hw->mac.type != ixgbe_mac_82598EB) { + stats->gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) + + ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); + stats->gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) + + ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN; + stats->tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) + + ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); + stats->lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); + stats->lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); + } else { + stats->lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC); + stats->lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); + /* 82598 only has a counter in the high register */ + stats->gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH); + stats->gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN; + stats->tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH); + } - count--; - sendmp = NULL; - nbuf = NULL; - rsc = 0; - cur->wb.upper.status_error = 0; - rbuf = &rxr->rx_buffers[i]; - mp = rbuf->buf; - - len = le16toh(cur->wb.upper.length); - ptype = le32toh(cur->wb.lower.lo_dword.data) & - IXGBE_RXDADV_PKTTYPE_MASK; - eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); - - /* Make sure bad packets are discarded */ - if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { - rxr->rx_discarded.ev_count++; - ixgbe_rx_discard(rxr, i); - goto next_desc; - } + /* + * Workaround: mprc hardware is incorrectly counting + * broadcasts, so for now we subtract those. + */ + bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); + stats->bprc.ev_count += bprc; + stats->mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0); - /* - ** On 82599 which supports a hardware - ** LRO (called HW RSC), packets need - ** not be fragmented across sequential - ** descriptors, rather the next descriptor - ** is indicated in bits of the descriptor. - ** This also means that we might proceses - ** more than one packet at a time, something - ** that has never been true before, it - ** required eliminating global chain pointers - ** in favor of what we are doing here. -jfv - */ - if (!eop) { - /* - ** Figure out the next descriptor - ** of this frame. - */ - if (rxr->hw_rsc == TRUE) { - rsc = ixgbe_rsc_count(cur); - rxr->rsc_num += (rsc - 1); - } - if (rsc) { /* Get hardware index */ - nextp = ((staterr & - IXGBE_RXDADV_NEXTP_MASK) >> - IXGBE_RXDADV_NEXTP_SHIFT); - } else { /* Just sequential */ - nextp = i + 1; - if (nextp == adapter->num_rx_desc) - nextp = 0; - } - nbuf = &rxr->rx_buffers[nextp]; - prefetch(nbuf); - } - /* - ** Rather than using the fmp/lmp global pointers - ** we now keep the head of a packet chain in the - ** buffer struct and pass this along from one - ** descriptor to the next, until we get EOP. - */ - mp->m_len = len; - /* - ** See if there is a stored head - ** that determines what we are - */ - sendmp = rbuf->fmp; - if (sendmp != NULL) { /* secondary frag */ - rbuf->buf = rbuf->fmp = NULL; - mp->m_flags &= ~M_PKTHDR; - sendmp->m_pkthdr.len += mp->m_len; - } else { - /* - * Optimize. This might be a small packet, - * maybe just a TCP ACK. Do a fast copy that - * is cache aligned into a new mbuf, and - * leave the old mbuf+cluster for re-use. - */ - if (eop && len <= IXGBE_RX_COPY_LEN) { - sendmp = m_gethdr(M_NOWAIT, MT_DATA); - if (sendmp != NULL) { - sendmp->m_data += - IXGBE_RX_COPY_ALIGN; - ixgbe_bcopy(mp->m_data, - sendmp->m_data, len); - sendmp->m_len = len; - rxr->rx_copies.ev_count++; - rbuf->flags |= IXGBE_RX_COPY; - } - } - if (sendmp == NULL) { - rbuf->buf = rbuf->fmp = NULL; - sendmp = mp; - } - - /* first desc of a non-ps chain */ - sendmp->m_flags |= M_PKTHDR; - sendmp->m_pkthdr.len = mp->m_len; - } - ++processed; - - /* Pass the head pointer on */ - if (eop == 0) { - nbuf->fmp = sendmp; - sendmp = NULL; - mp->m_next = nbuf->buf; - } else { /* Sending this frame */ - m_set_rcvif(sendmp, ifp); - ifp->if_ipackets++; - rxr->rx_packets.ev_count++; - /* capture data for AIM */ - rxr->bytes += sendmp->m_pkthdr.len; - rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len; - /* Process vlan info */ - if ((rxr->vtag_strip) && - (staterr & IXGBE_RXD_STAT_VP)) - vtag = le16toh(cur->wb.upper.vlan); - if (vtag) { - VLAN_INPUT_TAG(ifp, sendmp, vtag, - printf("%s: could not apply VLAN " - "tag", __func__)); - } - if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) { - ixgbe_rx_checksum(staterr, sendmp, ptype, - &adapter->stats); - } -#if __FreeBSD_version >= 800000 -#ifdef RSS - sendmp->m_pkthdr.flowid = - le32toh(cur->wb.lower.hi_dword.rss); - switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { - case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: - M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4); - break; - case IXGBE_RXDADV_RSSTYPE_IPV4: - M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: - M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_EX: - M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6: - M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: - M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX); - break; - case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: - M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: - M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: - M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX); - break; - default: - /* XXX fallthrough */ - M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); - break; - } -#else /* RSS */ - sendmp->m_pkthdr.flowid = que->msix; - M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); -#endif /* RSS */ -#endif /* FreeBSD_version */ - } -next_desc: - ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + stats->prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64); + stats->prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127); + stats->prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255); + stats->prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511); + stats->prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023); + stats->prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522); + + stats->gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total; + stats->mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total; + stats->ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total; + + stats->ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC); + stats->rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC); + stats->roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC); + stats->rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC); + stats->mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC); + stats->mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC); + stats->mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC); + stats->tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR); + stats->tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT); + stats->ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127); + stats->ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255); + stats->ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511); + stats->ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023); + stats->ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522); + stats->bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC); + stats->xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC); + stats->fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC); + stats->fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST); + /* Only read FCOE on 82599 */ + if (hw->mac.type != ixgbe_mac_82598EB) { + stats->fcoerpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); + stats->fcoeprc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); + stats->fcoeptc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); + stats->fcoedwrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); + stats->fcoedwtc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); + } - /* Advance our pointers to the next descriptor. */ - if (++i == rxr->num_desc) - i = 0; + /* Fill out the OS statistics structure */ + /* + * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with + * adapter->stats counters. It's required to make ifconfig -z + * (SOICZIFDATA) work. + */ + ifp->if_collisions = 0; - /* Now send to the stack or do LRO */ - if (sendmp != NULL) { - rxr->next_to_check = i; - ixgbe_rx_input(rxr, ifp, sendmp, ptype); - i = rxr->next_to_check; - } + /* Rx Errors */ + ifp->if_iqdrops += total_missed_rx; + ifp->if_ierrors += crcerrs + rlec; +} - /* Every 8 descriptors we go to refresh mbufs */ - if (processed == 8) { - ixgbe_refresh_mbufs(rxr, i); - processed = 0; - } - } +/** ixgbe_sysctl_tdh_handler - Handler function + * Retrieves the TDH value from the hardware + */ +static int +ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + uint32_t val; + struct tx_ring *txr; - /* Refresh any remaining buf structs */ - if (ixgbe_rx_unrefreshed(rxr)) - ixgbe_refresh_mbufs(rxr, i); + txr = (struct tx_ring *)node.sysctl_data; + if (txr == NULL) + return 0; + val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me)); + node.sysctl_data = &val; + return sysctl_lookup(SYSCTLFN_CALL(&node)); +} - rxr->next_to_check = i; +/** ixgbe_sysctl_tdt_handler - Handler function + * Retrieves the TDT value from the hardware + */ +static int +ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + uint32_t val; + struct tx_ring *txr; -#ifdef LRO - /* - * Flush any outstanding LRO work - */ - while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { - SLIST_REMOVE_HEAD(&lro->lro_active, next); - tcp_lro_flush(lro, queued); - } -#endif /* LRO */ + txr = (struct tx_ring *)node.sysctl_data; + if (txr == NULL) + return 0; + val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me)); + node.sysctl_data = &val; + return sysctl_lookup(SYSCTLFN_CALL(&node)); +} - IXGBE_RX_UNLOCK(rxr); +/** ixgbe_sysctl_rdh_handler - Handler function + * Retrieves the RDH value from the hardware + */ +static int +ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + uint32_t val; + struct rx_ring *rxr; - /* - ** Still have cleaning to do? - */ - if ((staterr & IXGBE_RXD_STAT_DD) != 0) - return true; - else - return false; + rxr = (struct rx_ring *)node.sysctl_data; + if (rxr == NULL) + return 0; + val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me)); + node.sysctl_data = &val; + return sysctl_lookup(SYSCTLFN_CALL(&node)); } - -/********************************************************************* - * - * Verify that the hardware indicated that the checksum is valid. - * Inform the stack about the status of checksum so that stack - * doesn't spend time verifying the checksum. - * - *********************************************************************/ -static void -ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype, - struct ixgbe_hw_stats *stats) +/** ixgbe_sysctl_rdt_handler - Handler function + * Retrieves the RDT value from the hardware + */ +static int +ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS) { - u16 status = (u16) staterr; - u8 errors = (u8) (staterr >> 24); -#if 0 - bool sctp = FALSE; + struct sysctlnode node = *rnode; + uint32_t val; + struct rx_ring *rxr; - if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && - (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) - sctp = TRUE; -#endif + rxr = (struct rx_ring *)node.sysctl_data; + if (rxr == NULL) + return 0; + val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me)); + node.sysctl_data = &val; + return sysctl_lookup(SYSCTLFN_CALL(&node)); +} - if (status & IXGBE_RXD_STAT_IPCS) { - stats->ipcs.ev_count++; - if (!(errors & IXGBE_RXD_ERR_IPE)) { - /* IP Checksum Good */ - mp->m_pkthdr.csum_flags = M_CSUM_IPv4; +static int +ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + struct ix_queue *que; + uint32_t reg, usec, rate; + int error; - } else { - stats->ipcs_bad.ev_count++; - mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD; - } - } - if (status & IXGBE_RXD_STAT_L4CS) { - stats->l4cs.ev_count++; - int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6; - if (!(errors & IXGBE_RXD_ERR_TCPE)) { - mp->m_pkthdr.csum_flags |= type; - } else { - stats->l4cs_bad.ev_count++; - mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD; - } + que = (struct ix_queue *)node.sysctl_data; + if (que == NULL) + return 0; + reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); + usec = ((reg & 0x0FF8) >> 3); + if (usec > 0) + rate = 500000 / usec; + else + rate = 0; + node.sysctl_data = &rate; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error) + return error; + reg &= ~0xfff; /* default, no limitation */ + ixgbe_max_interrupt_rate = 0; + if (rate > 0 && rate < 500000) { + if (rate < 1000) + rate = 1000; + ixgbe_max_interrupt_rate = rate; + reg |= ((4000000/rate) & 0xff8 ); } - return; + IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); + return 0; } - -#if 0 /* XXX Badly need to overhaul vlan(4) on NetBSD. */ -/* -** This routine is run via an vlan config EVENT, -** it enables us to use the HW Filter table since -** we can get the vlan id. This just creates the -** entry in the soft version of the VFTA, init will -** repopulate the real table. -*/ -static void -ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) +const struct sysctlnode * +ixgbe_sysctl_instance(struct adapter *adapter) { - struct adapter *adapter = ifp->if_softc; - u16 index, bit; + const char *dvname; + struct sysctllog **log; + int rc; + const struct sysctlnode *rnode; - if (ifp->if_softc != arg) /* Not our event */ - return; + if (adapter->sysctltop != NULL) + return adapter->sysctltop; - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; + log = &adapter->sysctllog; + dvname = device_xname(adapter->dev); - IXGBE_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - adapter->shadow_vfta[index] |= (1 << bit); - ixgbe_setup_vlan_hw_support(adapter); - IXGBE_CORE_UNLOCK(adapter); + if ((rc = sysctl_createv(log, 0, NULL, &rnode, + 0, CTLTYPE_NODE, dvname, + SYSCTL_DESCR("ixgbe information and settings"), + NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0) + goto err; + + return rnode; +err: + printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc); + return NULL; } -/* -** This routine is run via an vlan -** unconfig EVENT, remove our entry -** in the soft vfta. -*/ static void -ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) +ixgbe_add_device_sysctls(struct adapter *adapter) { - struct adapter *adapter = ifp->if_softc; - u16 index, bit; - - if (ifp->if_softc != arg) - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IXGBE_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - adapter->shadow_vfta[index] &= ~(1 << bit); - /* Re-init to load the changes */ - ixgbe_setup_vlan_hw_support(adapter); - IXGBE_CORE_UNLOCK(adapter); -} -#endif - -static void -ixgbe_setup_vlan_hw_support(struct adapter *adapter) -{ - struct ethercom *ec = &adapter->osdep.ec; + device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; - struct rx_ring *rxr; - u32 ctrl; + struct sysctllog **log; + const struct sysctlnode *rnode, *cnode; + log = &adapter->sysctllog; - /* - ** We get here thru init_locked, meaning - ** a soft reset, this has already cleared - ** the VFTA and other state, so if there - ** have been no vlan's registered do nothing. - */ - if (!VLAN_ATTACHED(&adapter->osdep.ec)) { + if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) { + aprint_error_dev(dev, "could not create sysctl root\n"); return; } - /* Setup the queues for vlans */ - for (int i = 0; i < adapter->num_queues; i++) { - rxr = &adapter->rx_rings[i]; - /* On 82599 the VLAN enable is per/queue in RXDCTL */ - if (hw->mac.type != ixgbe_mac_82598EB) { - ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); - ctrl |= IXGBE_RXDCTL_VME; - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl); - } - rxr->vtag_strip = TRUE; - } + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READONLY, CTLTYPE_INT, + "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"), + NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); - if ((ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) == 0) - return; - /* - ** A soft reset zero's out the VFTA, so - ** we need to repopulate it now. - */ - for (int i = 0; i < IXGBE_VFTA_SIZE; i++) - if (adapter->shadow_vfta[i] != 0) - IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), - adapter->shadow_vfta[i]); + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READONLY, CTLTYPE_INT, + "num_queues", SYSCTL_DESCR("Number of queues"), + NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); - ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - /* Enable the Filter Table if enabled */ - if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) { - ctrl &= ~IXGBE_VLNCTRL_CFIEN; - ctrl |= IXGBE_VLNCTRL_VFE; - } - if (hw->mac.type == ixgbe_mac_82598EB) - ctrl |= IXGBE_VLNCTRL_VME; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); -} + /* Sysctls for all devices */ + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READWRITE, CTLTYPE_INT, + "fc", SYSCTL_DESCR(IXGBE_SYSCTL_DESC_SET_FC), + ixgbe_sysctl_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); -static void -ixgbe_enable_intr(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct ix_queue *que = adapter->queues; - u32 mask, fwsm; + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READWRITE, CTLTYPE_BOOL, + "enable_aim", SYSCTL_DESCR("Interrupt Moderation"), + NULL, 0, &adapter->enable_aim, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); - mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); - /* Enable Fan Failure detection */ - if (hw->device_id == IXGBE_DEV_ID_82598AT) - mask |= IXGBE_EIMS_GPI_SDP1; + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READWRITE, CTLTYPE_INT, + "advertise_speed", SYSCTL_DESCR(IXGBE_SYSCTL_DESC_ADV_SPEED), + ixgbe_sysctl_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); - switch (adapter->hw.mac.type) { - case ixgbe_mac_82599EB: - mask |= IXGBE_EIMS_ECC; - mask |= IXGBE_EIMS_GPI_SDP0; - mask |= IXGBE_EIMS_GPI_SDP1; - mask |= IXGBE_EIMS_GPI_SDP2; -#ifdef IXGBE_FDIR - mask |= IXGBE_EIMS_FLOW_DIR; -#endif - break; - case ixgbe_mac_X540: - mask |= IXGBE_EIMS_ECC; - /* Detect if Thermal Sensor is enabled */ - fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); - if (fwsm & IXGBE_FWSM_TS_ENABLED) - mask |= IXGBE_EIMS_TS; -#ifdef IXGBE_FDIR - mask |= IXGBE_EIMS_FLOW_DIR; + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READWRITE, CTLTYPE_INT, + "ts", SYSCTL_DESCR("Thermal Test"), + ixgbe_sysctl_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); + +#ifdef IXGBE_DEBUG + /* testing sysctls (for all devices) */ + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READWRITE, CTLTYPE_INT, + "power_state", SYSCTL_DESCR("PCI Power State"), + ixgbe_sysctl_power_state, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); + + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READONLY, CTLTYPE_STRING, + "print_rss_config", SYSCTL_DESCR("Prints RSS Configuration"), + ixgbe_sysctl_print_rss_config, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); #endif - /* falls through */ - default: - break; + /* for X550 series devices */ + if (hw->mac.type >= ixgbe_mac_X550) + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READWRITE, CTLTYPE_INT, + "dmac", SYSCTL_DESCR("DMA Coalesce"), + ixgbe_sysctl_dmac, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); + + /* for X552 backplane devices */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) { + const struct sysctlnode *eee_node; + + if (sysctl_createv(log, 0, &rnode, &eee_node, + 0, CTLTYPE_NODE, + "eee", SYSCTL_DESCR("Energy Efficient Ethernet sysctls"), + NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0) { + aprint_error_dev(dev, "could not create sysctl\n"); + return; + } + + if (sysctl_createv(log, 0, &eee_node, &cnode, + CTLFLAG_READWRITE, CTLTYPE_INT, + "enable", SYSCTL_DESCR("Enable or Disable EEE"), + ixgbe_sysctl_eee_enable, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); + + if (sysctl_createv(log, 0, &eee_node, &cnode, + CTLFLAG_READONLY, CTLTYPE_BOOL, + "negotiated", SYSCTL_DESCR("EEE negotiated on link"), + ixgbe_sysctl_eee_negotiated, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); + + if (sysctl_createv(log, 0, &eee_node, &cnode, + CTLFLAG_READONLY, CTLTYPE_BOOL, + "tx_lpi_status", SYSCTL_DESCR("Whether or not TX link is in LPI state"), + ixgbe_sysctl_eee_tx_lpi_status, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); + + if (sysctl_createv(log, 0, &eee_node, &cnode, + CTLFLAG_READONLY, CTLTYPE_BOOL, + "rx_lpi_status", SYSCTL_DESCR("Whether or not RX link is in LPI state"), + ixgbe_sysctl_eee_rx_lpi_status, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); + + if (sysctl_createv(log, 0, &eee_node, &cnode, + CTLFLAG_READONLY, CTLTYPE_BOOL, + "tx_lpi_delay", SYSCTL_DESCR("TX LPI entry delay in microseconds"), + ixgbe_sysctl_eee_tx_lpi_delay, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); } - IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); + /* for WoL-capable devices */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READWRITE, CTLTYPE_BOOL, + "wol_enable", SYSCTL_DESCR("Enable/Disable Wake on LAN"), + ixgbe_sysctl_wol_enable, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); - /* With RSS we use auto clear */ - if (adapter->msix_mem) { - mask = IXGBE_EIMS_ENABLE_MASK; - /* Don't autoclear Link */ - mask &= ~IXGBE_EIMS_OTHER; - mask &= ~IXGBE_EIMS_LSC; - IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READWRITE, CTLTYPE_INT, + "wufc", SYSCTL_DESCR("Enable/Disable Wake Up Filters"), + ixgbe_sysctl_wufc, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); } - /* - ** Now enable all queues, this is done separately to - ** allow for handling the extended (beyond 32) MSIX - ** vectors that can be used by 82599 - */ - for (int i = 0; i < adapter->num_queues; i++, que++) - ixgbe_enable_queue(adapter, que->msix); + /* for X552/X557-AT devices */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { + const struct sysctlnode *phy_node; - IXGBE_WRITE_FLUSH(hw); + if (sysctl_createv(log, 0, &rnode, &phy_node, + 0, CTLTYPE_NODE, + "phy", SYSCTL_DESCR("External PHY sysctls"), + NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0) { + aprint_error_dev(dev, "could not create sysctl\n"); + return; + } - return; -} + if (sysctl_createv(log, 0, &phy_node, &cnode, + CTLFLAG_READONLY, CTLTYPE_INT, + "temp", SYSCTL_DESCR("Current External PHY Temperature (Celsius)"), + ixgbe_sysctl_phy_temp, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); -static void -ixgbe_disable_intr(struct adapter *adapter) -{ - if (adapter->msix_mem) - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); - if (adapter->hw.mac.type == ixgbe_mac_82598EB) { - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); - } else { - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); + if (sysctl_createv(log, 0, &phy_node, &cnode, + CTLFLAG_READONLY, CTLTYPE_INT, + "overtemp_occurred", SYSCTL_DESCR("External PHY High Temperature Event Occurred"), + ixgbe_sysctl_phy_overtemp_occurred, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); } - IXGBE_WRITE_FLUSH(&adapter->hw); - return; } -u16 -ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg) +/* + * Add sysctl variables, one per statistic, to the system. + */ +static void +ixgbe_add_hw_stats(struct adapter *adapter) { - switch (reg % 4) { - case 0: - return pci_conf_read(hw->back->pc, hw->back->tag, reg) & - __BITS(15, 0); - case 2: - return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag, - reg - 2), __BITS(31, 16)); - default: - panic("%s: invalid register (%" PRIx32, __func__, reg); - break; - } -} + device_t dev = adapter->dev; + const struct sysctlnode *rnode, *cnode; + struct sysctllog **log = &adapter->sysctllog; + struct tx_ring *txr = adapter->tx_rings; + struct rx_ring *rxr = adapter->rx_rings; + struct ixgbe_hw_stats *stats = &adapter->stats.pf; + const char *xname = device_xname(dev); -void -ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value) -{ - pcireg_t old; + /* Driver Statistics */ +#if 0 + /* These counters are not updated by the software */ + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed", + CTLFLAG_RD, &adapter->mbuf_header_failed, + "???"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed", + CTLFLAG_RD, &adapter->mbuf_packet_failed, + "???"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail", + CTLFLAG_RD, &adapter->no_tx_map_avail, + "???"); +#endif + evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC, + NULL, xname, "Handled queue in softint"); + evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC, + NULL, xname, "Requeued in softint"); + evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC, + NULL, xname, "Driver tx dma soft fail EFBIG"); + evcnt_attach_dynamic(&adapter->mbuf_defrag_failed, EVCNT_TYPE_MISC, + NULL, xname, "m_defrag() failed"); + evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC, + NULL, xname, "Driver tx dma hard fail EFBIG"); + evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC, + NULL, xname, "Driver tx dma hard fail EINVAL"); + evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC, + NULL, xname, "Driver tx dma hard fail other"); + evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC, + NULL, xname, "Driver tx dma soft fail EAGAIN"); + evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC, + NULL, xname, "Driver tx dma soft fail ENOMEM"); + evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC, + NULL, xname, "Watchdog timeouts"); + evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC, + NULL, xname, "TSO errors"); + evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_INTR, + NULL, xname, "Link MSIX IRQ Handled"); - switch (reg % 4) { - case 0: - old = pci_conf_read(hw->back->pc, hw->back->tag, reg) & - __BITS(31, 16); - pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old); - break; - case 2: - old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) & - __BITS(15, 0); - pci_conf_write(hw->back->pc, hw->back->tag, reg - 2, - __SHIFTIN(value, __BITS(31, 16)) | old); - break; - default: - panic("%s: invalid register (%" PRIx32, __func__, reg); - break; - } + for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { + snprintf(adapter->queues[i].evnamebuf, + sizeof(adapter->queues[i].evnamebuf), "%s q%d", + xname, i); + snprintf(adapter->queues[i].namebuf, + sizeof(adapter->queues[i].namebuf), "q%d", i); - return; -} + if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) { + aprint_error_dev(dev, "could not create sysctl root\n"); + break; + } -/* -** Get the width and transaction speed of -** the slot this adapter is plugged into. -*/ -static void -ixgbe_get_slot_info(struct ixgbe_hw *hw) -{ - device_t dev = ((struct ixgbe_osdep *)hw->back)->dev; - struct ixgbe_mac_info *mac = &hw->mac; - u16 link; + if (sysctl_createv(log, 0, &rnode, &rnode, + 0, CTLTYPE_NODE, + adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"), + NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0) + break; - /* For most devices simply call the shared code routine */ - if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) { - ixgbe_get_bus_info(hw); - goto display; - } + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READWRITE, CTLTYPE_INT, + "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"), + ixgbe_sysctl_interrupt_rate_handler, 0, + (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0) + break; - /* - ** For the Quad port adapter we need to parse back - ** up the PCI tree to find the speed of the expansion - ** slot into which this adapter is plugged. A bit more work. - */ - dev = device_parent(device_parent(dev)); -#ifdef IXGBE_DEBUG - device_printf(dev, "parent pcib = %x,%x,%x\n", - pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); -#endif - dev = device_parent(device_parent(dev)); -#ifdef IXGBE_DEBUG - device_printf(dev, "slot pcib = %x,%x,%x\n", - pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); +#if 0 /* XXX msaitoh */ + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READONLY, CTLTYPE_QUAD, + "irqs", SYSCTL_DESCR("irqs on this queue"), + NULL, 0, &(adapter->queues[i].irqs), + 0, CTL_CREATE, CTL_EOL) != 0) + break; #endif - /* Now get the PCI Express Capabilities offset */ - /* ...and read the Link Status Register */ - link = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS); - switch (link & IXGBE_PCI_LINK_WIDTH) { - case IXGBE_PCI_LINK_WIDTH_1: - hw->bus.width = ixgbe_bus_width_pcie_x1; - break; - case IXGBE_PCI_LINK_WIDTH_2: - hw->bus.width = ixgbe_bus_width_pcie_x2; - break; - case IXGBE_PCI_LINK_WIDTH_4: - hw->bus.width = ixgbe_bus_width_pcie_x4; - break; - case IXGBE_PCI_LINK_WIDTH_8: - hw->bus.width = ixgbe_bus_width_pcie_x8; - break; - default: - hw->bus.width = ixgbe_bus_width_unknown; - break; + + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READONLY, CTLTYPE_INT, + "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"), + ixgbe_sysctl_tdh_handler, 0, (void *)txr, + 0, CTL_CREATE, CTL_EOL) != 0) + break; + + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READONLY, CTLTYPE_INT, + "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"), + ixgbe_sysctl_tdt_handler, 0, (void *)txr, + 0, CTL_CREATE, CTL_EOL) != 0) + break; + + evcnt_attach_dynamic(&adapter->queues[i].irqs, EVCNT_TYPE_INTR, + NULL, adapter->queues[i].evnamebuf, "IRQs on queue"); + evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC, + NULL, adapter->queues[i].evnamebuf, "TSO"); + evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC, + NULL, adapter->queues[i].evnamebuf, + "Queue No Descriptor Available"); + evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC, + NULL, adapter->queues[i].evnamebuf, + "Queue Packets Transmitted"); +#ifndef IXGBE_LEGACY_TX + evcnt_attach_dynamic(&txr->pcq_drops, EVCNT_TYPE_MISC, + NULL, adapter->queues[i].evnamebuf, + "Packets dropped in pcq"); +#endif + +#ifdef LRO + struct lro_ctrl *lro = &rxr->lro; +#endif /* LRO */ + + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READONLY, + CTLTYPE_INT, + "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"), + ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0, + CTL_CREATE, CTL_EOL) != 0) + break; + + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READONLY, + CTLTYPE_INT, + "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"), + ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0, + CTL_CREATE, CTL_EOL) != 0) + break; + + if (i < __arraycount(stats->mpc)) { + evcnt_attach_dynamic(&stats->mpc[i], + EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, + "Missed Packet Count"); + } + if (i < __arraycount(stats->pxontxc)) { + evcnt_attach_dynamic(&stats->pxontxc[i], + EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, + "pxontxc"); + evcnt_attach_dynamic(&stats->pxonrxc[i], + EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, + "pxonrxc"); + evcnt_attach_dynamic(&stats->pxofftxc[i], + EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, + "pxofftxc"); + evcnt_attach_dynamic(&stats->pxoffrxc[i], + EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, + "pxoffrxc"); + evcnt_attach_dynamic(&stats->pxon2offc[i], + EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, + "pxon2offc"); + } + if (i < __arraycount(stats->qprc)) { + evcnt_attach_dynamic(&stats->qprc[i], + EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, + "qprc"); + evcnt_attach_dynamic(&stats->qptc[i], + EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, + "qptc"); + evcnt_attach_dynamic(&stats->qbrc[i], + EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, + "qbrc"); + evcnt_attach_dynamic(&stats->qbtc[i], + EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, + "qbtc"); + evcnt_attach_dynamic(&stats->qprdc[i], + EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, + "qprdc"); + } + + evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC, + NULL, adapter->queues[i].evnamebuf, "Queue Packets Received"); + evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC, + NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received"); + evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC, + NULL, adapter->queues[i].evnamebuf, "Copied RX Frames"); + evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC, + NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf"); + evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC, + NULL, adapter->queues[i].evnamebuf, "Rx discarded"); +#ifdef LRO + SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued", + CTLFLAG_RD, &lro->lro_queued, 0, + "LRO Queued"); + SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed", + CTLFLAG_RD, &lro->lro_flushed, 0, + "LRO Flushed"); +#endif /* LRO */ + } + + /* MAC stats get the own sub node */ + + snprintf(stats->namebuf, + sizeof(stats->namebuf), "%s MAC Statistics", xname); + + evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "rx csum offload - IP"); + evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "rx csum offload - L4"); + evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "rx csum offload - IP bad"); + evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "rx csum offload - L4 bad"); + evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Interrupt conditions zero"); + evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Legacy interrupts"); + evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "CRC Errors"); + evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Illegal Byte Errors"); + evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Byte Errors"); + evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "MAC Short Packets Discarded"); + evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "MAC Local Faults"); + evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "MAC Remote Faults"); + evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Receive Length Errors"); + evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Link XON Transmitted"); + evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Link XON Received"); + evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Link XOFF Transmitted"); + evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Link XOFF Received"); + + /* Packet Reception Stats */ + evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Total Octets Received"); + evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Good Octets Received"); + evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Total Packets Received"); + evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Good Packets Received"); + evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Multicast Packets Received"); + evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Broadcast Packets Received"); + evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "64 byte frames received "); + evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "65-127 byte frames received"); + evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "128-255 byte frames received"); + evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "256-511 byte frames received"); + evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "512-1023 byte frames received"); + evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "1023-1522 byte frames received"); + evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Receive Undersized"); + evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Fragmented Packets Received "); + evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Oversized Packets Received"); + evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Received Jabber"); + evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Management Packets Received"); + evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Checksum Errors"); + + /* Packet Transmission Stats */ + evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Good Octets Transmitted"); + evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Total Packets Transmitted"); + evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Good Packets Transmitted"); + evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Broadcast Packets Transmitted"); + evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Multicast Packets Transmitted"); + evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "Management Packets Transmitted"); + evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "64 byte frames transmitted "); + evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "65-127 byte frames transmitted"); + evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "128-255 byte frames transmitted"); + evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "256-511 byte frames transmitted"); + evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "512-1023 byte frames transmitted"); + evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL, + stats->namebuf, "1024-1522 byte frames transmitted"); +} + +static void +ixgbe_set_sysctl_value(struct adapter *adapter, const char *name, + const char *description, int *limit, int value) +{ + device_t dev = adapter->dev; + struct sysctllog **log; + const struct sysctlnode *rnode, *cnode; + + log = &adapter->sysctllog; + if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) { + aprint_error_dev(dev, "could not create sysctl root\n"); + return; + } + if (sysctl_createv(log, 0, &rnode, &cnode, + CTLFLAG_READWRITE, CTLTYPE_INT, + name, SYSCTL_DESCR(description), + NULL, 0, limit, 0, CTL_CREATE, CTL_EOL) != 0) + aprint_error_dev(dev, "could not create sysctl\n"); + *limit = value; +} + +/* +** Set flow control using sysctl: +** Flow control values: +** 0 - off +** 1 - rx pause +** 2 - tx pause +** 3 - full +*/ +static int +ixgbe_sysctl_flowcntl(SYSCTLFN_ARGS) +{ + int error, fc; + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + + fc = adapter->fc; + node.sysctl_data = &fc; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error != 0 || newp == NULL) + return error; + + /* Don't bother if it's not changed */ + if (adapter->fc == fc) + return (0); + + return ixgbe_set_flowcntl(adapter, fc); +} + + +static int +ixgbe_set_flowcntl(struct adapter *adapter, int fc) +{ + + switch (fc) { + case ixgbe_fc_rx_pause: + case ixgbe_fc_tx_pause: + case ixgbe_fc_full: + adapter->hw.fc.requested_mode = adapter->fc; + if (adapter->num_queues > 1) + ixgbe_disable_rx_drop(adapter); + break; + case ixgbe_fc_none: + adapter->hw.fc.requested_mode = ixgbe_fc_none; + if (adapter->num_queues > 1) + ixgbe_enable_rx_drop(adapter); + break; + default: + return (EINVAL); + } + adapter->fc = fc; +#if 0 /* XXX NetBSD */ + /* Don't autoneg if forcing a value */ + adapter->hw.fc.disable_fc_autoneg = TRUE; +#endif + ixgbe_fc_enable(&adapter->hw); + return (0); +} + +/* +** Control advertised link speed: +** Flags: +** 0x0 - Default (all capable link speed) +** 0x1 - advertise 100 Mb +** 0x2 - advertise 1G +** 0x4 - advertise 10G +*/ +static int +ixgbe_sysctl_advertise(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + int error = 0, advertise; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + + advertise = adapter->advertise; + node.sysctl_data = &advertise; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error != 0 || newp == NULL) + return error; + + return ixgbe_set_advertise(adapter, advertise); +} + +static int +ixgbe_set_advertise(struct adapter *adapter, int advertise) +{ + device_t dev; + struct ixgbe_hw *hw; + ixgbe_link_speed speed; + + /* Checks to validate new value */ + if (adapter->advertise == advertise) /* no change */ + return (0); + + hw = &adapter->hw; + dev = adapter->dev; + + /* No speed changes for backplane media */ + if (hw->phy.media_type == ixgbe_media_type_backplane) + return (ENODEV); + + if (!((hw->phy.media_type == ixgbe_media_type_copper) || + (hw->phy.multispeed_fiber))) { + device_printf(dev, + "Advertised speed can only be set on copper or " + "multispeed fiber media types.\n"); + return (EINVAL); + } + + if (advertise < 0x0 || advertise > 0x7) { + device_printf(dev, + "Invalid advertised speed; valid modes are 0x0 through 0x7\n"); + return (EINVAL); + } + + /* Set new value and report new advertised mode */ + speed = 0; + if ((hw->mac.type != ixgbe_mac_X540) + && (hw->mac.type != ixgbe_mac_X550)) { + if (advertise & 0x1) { + device_printf(dev, + "Set Advertise: 100Mb on X540/X550 only\n"); + return (EINVAL); + } + } else if ((advertise & 0x1) || (advertise == 0)) + speed |= IXGBE_LINK_SPEED_100_FULL; + if ((advertise & 0x2) || (advertise == 0)) + speed |= IXGBE_LINK_SPEED_1GB_FULL; + if ((advertise & 0x4) || (advertise == 0)) + speed |= IXGBE_LINK_SPEED_10GB_FULL; + adapter->advertise = advertise; + + hw->mac.autotry_restart = TRUE; + hw->mac.ops.setup_link(hw, speed, TRUE); + + return 0; +} + +/* + * The following two sysctls are for X552/X557-AT devices; + * they deal with the external PHY used in them. + */ +static int +ixgbe_sysctl_phy_temp(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + struct ixgbe_hw *hw = &adapter->hw; + int val; + u16 reg; + int error; + + if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { + device_printf(adapter->dev, + "Device has no supported external thermal sensor.\n"); + return (ENODEV); + } + + if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + ®)) { + device_printf(adapter->dev, + "Error reading from PHY's current temperature register\n"); + return (EAGAIN); + } + + node.sysctl_data = &val; + + /* Shift temp for output */ + val = reg >> 8; + + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if ((error) || (newp == NULL)) + return (error); + + return (0); +} + +/* + * Reports whether the current PHY temperature is over + * the overtemp threshold. + * - This is reported directly from the PHY + */ +static int +ixgbe_sysctl_phy_overtemp_occurred(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + struct ixgbe_hw *hw = &adapter->hw; + int val, error; + u16 reg; + + if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { + device_printf(adapter->dev, + "Device has no supported external thermal sensor.\n"); + return (ENODEV); + } + + if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + ®)) { + device_printf(adapter->dev, + "Error reading from PHY's temperature status register\n"); + return (EAGAIN); + } + + node.sysctl_data = &val; + + /* Get occurrence bit */ + val = !!(reg & 0x4000); + + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if ((error) || (newp == NULL)) + return (error); + + return (0); +} + +/* +** Thermal Shutdown Trigger (internal MAC) +** - Set this to 1 to cause an overtemp event to occur +*/ +static int +ixgbe_sysctl_thermal_test(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + int error, fire = 0; + struct ixgbe_hw *hw; + + hw = &adapter->hw; + + node.sysctl_data = &fire; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if ((error) || (newp == NULL)) + return (error); + + if (fire) { + u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS); + reg |= IXGBE_EICR_TS; + IXGBE_WRITE_REG(hw, IXGBE_EICS, reg); } - switch (link & IXGBE_PCI_LINK_SPEED) { - case IXGBE_PCI_LINK_SPEED_2500: - hw->bus.speed = ixgbe_bus_speed_2500; + return (0); +} + +/* +** Manage DMA Coalescing. +** Control values: +** 0/1 - off / on (use default value of 1000) +** +** Legal timer values are: +** 50,100,250,500,1000,2000,5000,10000 +** +** Turning off interrupt moderation will also turn this off. +*/ +static int +ixgbe_sysctl_dmac(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + struct ifnet *ifp = adapter->ifp; + int error; + u16 oldval; + int newval; + + oldval = adapter->dmac; + newval = oldval; + node.sysctl_data = &newval; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if ((error) || (newp == NULL)) + return (error); + + switch (newval) { + case 0: + /* Disabled */ + adapter->dmac = 0; break; - case IXGBE_PCI_LINK_SPEED_5000: - hw->bus.speed = ixgbe_bus_speed_5000; + case 1: + /* Enable and use default */ + adapter->dmac = 1000; break; - case IXGBE_PCI_LINK_SPEED_8000: - hw->bus.speed = ixgbe_bus_speed_8000; + case 50: + case 100: + case 250: + case 500: + case 1000: + case 2000: + case 5000: + case 10000: + /* Legal values - allow */ + adapter->dmac = newval; break; default: - hw->bus.speed = ixgbe_bus_speed_unknown; - break; + /* Do nothing, illegal value */ + return (EINVAL); } - mac->ops.set_lan_id(hw); + /* Re-initialize hardware if it's already running */ + if (ifp->if_flags & IFF_RUNNING) + ixgbe_init(ifp); -display: - device_printf(dev,"PCI Express Bus: Speed %s %s\n", - ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s": - (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s": - (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"), - (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : - (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : - (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : - ("Unknown")); + return (0); +} - if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) && - ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && - (hw->bus.speed == ixgbe_bus_speed_2500))) { - device_printf(dev, "PCI-Express bandwidth available" - " for this card\n is not sufficient for" - " optimal performance.\n"); - device_printf(dev, "For optimal performance a x8 " - "PCIE, or x4 PCIE Gen2 slot is required.\n"); - } - if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) && - ((hw->bus.width <= ixgbe_bus_width_pcie_x8) && - (hw->bus.speed < ixgbe_bus_speed_8000))) { - device_printf(dev, "PCI-Express bandwidth available" - " for this card\n is not sufficient for" - " optimal performance.\n"); - device_printf(dev, "For optimal performance a x8 " - "PCIE Gen3 slot is required.\n"); - } +#ifdef IXGBE_DEBUG +/** + * Sysctl to test power states + * Values: + * 0 - set device to D0 + * 3 - set device to D3 + * (none) - get current device power state + */ +static int +ixgbe_sysctl_power_state(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + device_t dev = adapter->dev; + int curr_ps, new_ps, error = 0; - return; +#if notyet + curr_ps = new_ps = pci_get_powerstate(dev); + + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if ((error) || (req->newp == NULL)) + return (error); + + if (new_ps == curr_ps) + return (0); + + if (new_ps == 3 && curr_ps == 0) + error = DEVICE_SUSPEND(dev); + else if (new_ps == 0 && curr_ps == 3) + error = DEVICE_RESUME(dev); + else + return (EINVAL); + + device_printf(dev, "New state: %d\n", pci_get_powerstate(dev)); + + return (error); +#else + return 0; +#endif } +#endif +/* + * Sysctl to enable/disable the WoL capability, if supported by the adapter. + * Values: + * 0 - disabled + * 1 - enabled + */ +static int +ixgbe_sysctl_wol_enable(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + struct ixgbe_hw *hw = &adapter->hw; + bool new_wol_enabled; + int error = 0; + + new_wol_enabled = hw->wol_enabled; + node.sysctl_data = &new_wol_enabled; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if ((error) || (newp == NULL)) + return (error); + if (new_wol_enabled == hw->wol_enabled) + return (0); + + if (new_wol_enabled && !adapter->wol_support) + return (ENODEV); + else + hw->wol_enabled = new_wol_enabled; + return (0); +} /* -** Setup the correct IVAR register for a particular MSIX interrupt -** (yes this is all very magic and confusing :) -** - entry is the register array entry -** - vector is the MSIX vector for this queue -** - type is RX/TX/MISC -*/ -static void -ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) + * Sysctl to enable/disable the Energy Efficient Ethernet capability, + * if supported by the adapter. + * Values: + * 0 - disabled + * 1 - enabled + */ +static int +ixgbe_sysctl_eee_enable(SYSCTLFN_ARGS) { + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; struct ixgbe_hw *hw = &adapter->hw; - u32 ivar, index; + struct ifnet *ifp = adapter->ifp; + int new_eee_enabled, error = 0; - vector |= IXGBE_IVAR_ALLOC_VAL; + new_eee_enabled = adapter->eee_enabled; + node.sysctl_data = &new_eee_enabled; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if ((error) || (newp == NULL)) + return (error); + new_eee_enabled = !!(new_eee_enabled); + if (new_eee_enabled == adapter->eee_enabled) + return (0); - switch (hw->mac.type) { + if (new_eee_enabled > 0 && !hw->mac.ops.setup_eee) + return (ENODEV); + else + adapter->eee_enabled = new_eee_enabled; - case ixgbe_mac_82598EB: - if (type == -1) - entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; - else - entry += (type * 64); - index = (entry >> 2) & 0x1F; - ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); - ivar &= ~(0xFF << (8 * (entry & 0x3))); - ivar |= (vector << (8 * (entry & 0x3))); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); - break; + /* Re-initialize hardware if it's already running */ + if (ifp->if_flags & IFF_RUNNING) + ixgbe_init(ifp); - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - if (type == -1) { /* MISC IVAR */ - index = (entry & 1) * 8; - ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); - ivar &= ~(0xFF << index); - ivar |= (vector << index); - IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); - } else { /* RX/TX IVARS */ - index = (16 * (entry & 1)) + (8 * type); - ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); - ivar &= ~(0xFF << index); - ivar |= (vector << index); - IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); - } + return (0); +} - default: - break; - } +/* + * Read-only sysctl indicating whether EEE support was negotiated + * on the link. + */ +static int +ixgbe_sysctl_eee_negotiated(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + struct ixgbe_hw *hw = &adapter->hw; + bool status; + + status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & IXGBE_EEE_STAT_NEG); + + node.sysctl_data = &status; + return (sysctl_lookup(SYSCTLFN_CALL(&node))); } -static void -ixgbe_configure_ivars(struct adapter *adapter) +/* + * Read-only sysctl indicating whether RX Link is in LPI state. + */ +static int +ixgbe_sysctl_eee_rx_lpi_status(SYSCTLFN_ARGS) { - struct ix_queue *que = adapter->queues; - u32 newitr; + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + struct ixgbe_hw *hw = &adapter->hw; + bool status; - if (ixgbe_max_interrupt_rate > 0) - newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; - else - newitr = 0; + status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & + IXGBE_EEE_RX_LPI_STATUS); - for (int i = 0; i < adapter->num_queues; i++, que++) { - /* First the RX queue entry */ - ixgbe_set_ivar(adapter, i, que->msix, 0); - /* ... and the TX */ - ixgbe_set_ivar(adapter, i, que->msix, 1); - /* Set an Initial EITR value */ - IXGBE_WRITE_REG(&adapter->hw, - IXGBE_EITR(que->msix), newitr); - } + node.sysctl_data = &status; + return (sysctl_lookup(SYSCTLFN_CALL(&node))); +} - /* For the Link interrupt */ - ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1); +/* + * Read-only sysctl indicating whether TX Link is in LPI state. + */ +static int +ixgbe_sysctl_eee_tx_lpi_status(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + struct ixgbe_hw *hw = &adapter->hw; + bool status; + + status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & + IXGBE_EEE_TX_LPI_STATUS); + + node.sysctl_data = &status; + return (sysctl_lookup(SYSCTLFN_CALL(&node))); } /* -** ixgbe_sfp_probe - called in the local timer to -** determine if a port had optics inserted. -*/ -static bool ixgbe_sfp_probe(struct adapter *adapter) + * Read-only sysctl indicating TX Link LPI delay + */ +static int +ixgbe_sysctl_eee_tx_lpi_delay(SYSCTLFN_ARGS) { - struct ixgbe_hw *hw = &adapter->hw; - device_t dev = adapter->dev; - bool result = FALSE; + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + struct ixgbe_hw *hw = &adapter->hw; + u32 reg; - if ((hw->phy.type == ixgbe_phy_nl) && - (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) { - s32 ret = hw->phy.ops.identify_sfp(hw); - if (ret) - goto out; - ret = hw->phy.ops.reset(hw); - if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { - device_printf(dev,"Unsupported SFP+ module detected!"); - device_printf(dev, "Reload driver with supported module.\n"); - adapter->sfp_probe = FALSE; - goto out; - } else - device_printf(dev,"SFP+ module detected!\n"); - /* We now have supported optics */ - adapter->sfp_probe = FALSE; - /* Set the optics type so system reports correctly */ - ixgbe_setup_optics(adapter); - result = TRUE; + reg = IXGBE_READ_REG(hw, IXGBE_EEE_SU); + + reg >>= 26; + node.sysctl_data = ® + return (sysctl_lookup(SYSCTLFN_CALL(&node))); +} + +/* + * Sysctl to enable/disable the types of packets that the + * adapter will wake up on upon receipt. + * WUFC - Wake Up Filter Control + * Flags: + * 0x1 - Link Status Change + * 0x2 - Magic Packet + * 0x4 - Direct Exact + * 0x8 - Directed Multicast + * 0x10 - Broadcast + * 0x20 - ARP/IPv4 Request Packet + * 0x40 - Direct IPv4 Packet + * 0x80 - Direct IPv6 Packet + * + * Setting another flag will cause the sysctl to return an + * error. + */ +static int +ixgbe_sysctl_wufc(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + struct adapter *adapter = (struct adapter *)node.sysctl_data; + int error = 0; + u32 new_wufc; + + new_wufc = adapter->wufc; + node.sysctl_data = &new_wufc; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if ((error) || (newp == NULL)) + return (error); + if (new_wufc == adapter->wufc) + return (0); + + if (new_wufc & 0xffffff00) + return (EINVAL); + else { + new_wufc &= 0xff; + new_wufc |= (0xffffff & adapter->wufc); + adapter->wufc = new_wufc; } -out: - return (result); + + return (0); +} + +#ifdef IXGBE_DEBUG +static int +ixgbe_sysctl_print_rss_config(SYSCTLFN_ARGS) +{ + struct adapter *adapter = (struct adapter *)node.sysctl_data; + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = adapter->dev; + int error = 0, reta_size; + struct sbuf *buf; + u32 reg; + + buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); + if (!buf) { + device_printf(dev, "Could not allocate sbuf for output.\n"); + return (ENOMEM); + } + + // TODO: use sbufs to make a string to print out + /* Set multiplier for RETA setup and table size based on MAC */ + switch (adapter->hw.mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + reta_size = 128; + break; + default: + reta_size = 32; + break; + } + + /* Print out the redirection table */ + sbuf_cat(buf, "\n"); + for (int i = 0; i < reta_size; i++) { + if (i < 32) { + reg = IXGBE_READ_REG(hw, IXGBE_RETA(i)); + sbuf_printf(buf, "RETA(%2d): 0x%08x\n", i, reg); + } else { + reg = IXGBE_READ_REG(hw, IXGBE_ERETA(i - 32)); + sbuf_printf(buf, "ERETA(%2d): 0x%08x\n", i - 32, reg); + } + } + + // TODO: print more config + + error = sbuf_finish(buf); + if (error) + device_printf(dev, "Error finishing sbuf: %d\n", error); + + sbuf_delete(buf); + return (0); } +#endif /* IXGBE_DEBUG */ /* -** Tasklet handler for MSIX Link interrupts -** - do outside interrupt since it might sleep +** Enable the hardware to drop packets when the buffer is +** full. This is useful when multiqueue,so that no single +** queue being full stalls the entire RX engine. We only +** enable this when Multiqueue AND when Flow Control is +** disabled. */ static void -ixgbe_handle_link(void *context) +ixgbe_enable_rx_drop(struct adapter *adapter) { - struct adapter *adapter = context; + struct ixgbe_hw *hw = &adapter->hw; - if (ixgbe_check_link(&adapter->hw, - &adapter->link_speed, &adapter->link_up, 0) == 0) - ixgbe_update_link_status(adapter); + for (int i = 0; i < adapter->num_queues; i++) { + struct rx_ring *rxr = &adapter->rx_rings[i]; + u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); + srrctl |= IXGBE_SRRCTL_DROP_EN; + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); + } +#ifdef PCI_IOV + /* enable drop for each vf */ + for (int i = 0; i < adapter->num_vfs; i++) { + IXGBE_WRITE_REG(hw, IXGBE_QDE, + (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) | + IXGBE_QDE_ENABLE)); + } +#endif } -/* -** Tasklet for handling SFP module interrupts -*/ static void -ixgbe_handle_mod(void *context) +ixgbe_disable_rx_drop(struct adapter *adapter) { - struct adapter *adapter = context; - struct ixgbe_hw *hw = &adapter->hw; - device_t dev = adapter->dev; - u32 err; + struct ixgbe_hw *hw = &adapter->hw; - err = hw->phy.ops.identify_sfp(hw); - if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - device_printf(dev, - "Unsupported SFP+ module type was detected.\n"); - return; + for (int i = 0; i < adapter->num_queues; i++) { + struct rx_ring *rxr = &adapter->rx_rings[i]; + u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); + srrctl &= ~IXGBE_SRRCTL_DROP_EN; + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); } - err = hw->mac.ops.setup_sfp(hw); - if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - device_printf(dev, - "Setup failure - unsupported SFP+ module type.\n"); - return; +#ifdef PCI_IOV + /* disable drop for each vf */ + for (int i = 0; i < adapter->num_vfs; i++) { + IXGBE_WRITE_REG(hw, IXGBE_QDE, + (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT))); } - softint_schedule(adapter->msf_si); - return; +#endif } - -/* -** Tasklet for handling MSF (multispeed fiber) interrupts -*/ static void -ixgbe_handle_msf(void *context) +ixgbe_rearm_queues(struct adapter *adapter, u64 queues) { - struct adapter *adapter = context; - struct ixgbe_hw *hw = &adapter->hw; - u32 autoneg; - bool negotiate; + u32 mask; - autoneg = hw->phy.autoneg_advertised; - if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) - hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); - else - negotiate = 0; - if (hw->mac.ops.setup_link) - hw->mac.ops.setup_link(hw, autoneg, TRUE); - return; + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + mask = (IXGBE_EIMS_RTX_QUEUE & queues); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + mask = (queues & 0xFFFFFFFF); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask); + mask = (queues >> 32); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask); + break; + default: + break; + } } -#ifdef IXGBE_FDIR +#ifdef PCI_IOV + /* -** Tasklet for reinitializing the Flow Director filter table +** Support functions for SRIOV/VF management */ + static void -ixgbe_reinit_fdir(void *context) +ixgbe_ping_all_vfs(struct adapter *adapter) { - struct adapter *adapter = context; - struct ifnet *ifp = adapter->ifp; + struct ixgbe_vf *vf; - if (adapter->fdir_reinit != 1) /* Shouldn't happen */ - return; - ixgbe_reinit_fdir_tables_82599(&adapter->hw); - adapter->fdir_reinit = 0; - /* re-enable flow director interrupts */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR); - /* Restart the interface */ - ifp->if_flags |= IFF_RUNNING; - return; + for (int i = 0; i < adapter->num_vfs; i++) { + vf = &adapter->vfs[i]; + if (vf->flags & IXGBE_VF_ACTIVE) + ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG); + } } -#endif -/********************************************************************** - * - * Update the board statistics counters. - * - **********************************************************************/ + static void -ixgbe_update_stats_counters(struct adapter *adapter) +ixgbe_vf_set_default_vlan(struct adapter *adapter, struct ixgbe_vf *vf, + uint16_t tag) { - struct ifnet *ifp = adapter->ifp; - struct ixgbe_hw *hw = &adapter->hw; - u32 missed_rx = 0, bprc, lxon, lxoff, total; - u64 total_missed_rx = 0; - uint64_t crcerrs, rlec; + struct ixgbe_hw *hw; + uint32_t vmolr, vmvir; - crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS); - adapter->stats.crcerrs.ev_count += crcerrs; - adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC); - adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC); - adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC); + hw = &adapter->hw; - /* - ** Note: these are for the 8 possible traffic classes, - ** which in current implementation is unused, - ** therefore only 0 should read real data. - */ - for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) { - int j = i % adapter->num_queues; - u32 mp; - mp = IXGBE_READ_REG(hw, IXGBE_MPC(i)); - /* missed_rx tallies misses for the gprc workaround */ - missed_rx += mp; - /* global total per queue */ - adapter->stats.mpc[j].ev_count += mp; - /* Running comprehensive total for stats display */ - total_missed_rx += mp; - if (hw->mac.type == ixgbe_mac_82598EB) { - adapter->stats.rnbc[j] += - IXGBE_READ_REG(hw, IXGBE_RNBC(i)); - adapter->stats.qbtc[j].ev_count += - IXGBE_READ_REG(hw, IXGBE_QBTC(i)); - adapter->stats.qbrc[j].ev_count += - IXGBE_READ_REG(hw, IXGBE_QBRC(i)); - adapter->stats.pxonrxc[j].ev_count += - IXGBE_READ_REG(hw, IXGBE_PXONRXC(i)); - } else { - adapter->stats.pxonrxc[j].ev_count += - IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); - } - adapter->stats.pxontxc[j].ev_count += - IXGBE_READ_REG(hw, IXGBE_PXONTXC(i)); - adapter->stats.pxofftxc[j].ev_count += - IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i)); - adapter->stats.pxoffrxc[j].ev_count += - IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); - adapter->stats.pxon2offc[j].ev_count += - IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i)); - } - for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) { - int j = i % adapter->num_queues; - adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); - adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); - adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); - } - adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC); - adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC); - rlec = IXGBE_READ_REG(hw, IXGBE_RLEC); - adapter->stats.rlec.ev_count += rlec; + vf->vlan_tag = tag; + + vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf->pool)); - /* Hardware workaround, gprc counts missed packets */ - adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx; + /* Do not receive packets that pass inexact filters. */ + vmolr &= ~(IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_ROPE); - lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); - adapter->stats.lxontxc.ev_count += lxon; - lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); - adapter->stats.lxofftxc.ev_count += lxoff; - total = lxon + lxoff; + /* Disable Multicast Promicuous Mode. */ + vmolr &= ~IXGBE_VMOLR_MPE; - if (hw->mac.type != ixgbe_mac_82598EB) { - adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) + - ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); - adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) + - ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN; - adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) + - ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); - adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); - adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); - } else { - adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC); - adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); - /* 82598 only has a counter in the high register */ - adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH); - adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN; - adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH); - } + /* Accept broadcasts. */ + vmolr |= IXGBE_VMOLR_BAM; - /* - * Workaround: mprc hardware is incorrectly counting - * broadcasts, so for now we subtract those. - */ - bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); - adapter->stats.bprc.ev_count += bprc; - adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0); + if (tag == 0) { + /* Accept non-vlan tagged traffic. */ + //vmolr |= IXGBE_VMOLR_AUPE; - adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64); - adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127); - adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255); - adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511); - adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023); - adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522); - - adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total; - adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total; - adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total; - - adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC); - adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC); - adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC); - adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC); - adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC); - adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC); - adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC); - adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR); - adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT); - adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127); - adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255); - adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511); - adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023); - adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522); - adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC); - adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC); - adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC); - adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST); + /* Allow VM to tag outgoing traffic; no default tag. */ + vmvir = 0; + } else { + /* Require vlan-tagged traffic. */ + vmolr &= ~IXGBE_VMOLR_AUPE; - /* Only read FCOE on 82599 */ - if (hw->mac.type != ixgbe_mac_82598EB) { - adapter->stats.fcoerpdc.ev_count += - IXGBE_READ_REG(hw, IXGBE_FCOERPDC); - adapter->stats.fcoeprc.ev_count += - IXGBE_READ_REG(hw, IXGBE_FCOEPRC); - adapter->stats.fcoeptc.ev_count += - IXGBE_READ_REG(hw, IXGBE_FCOEPTC); - adapter->stats.fcoedwrc.ev_count += - IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); - adapter->stats.fcoedwtc.ev_count += - IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); + /* Tag all traffic with provided vlan tag. */ + vmvir = (tag | IXGBE_VMVIR_VLANA_DEFAULT); } + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf->pool), vmolr); + IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf->pool), vmvir); +} + + +static boolean_t +ixgbe_vf_frame_size_compatible(struct adapter *adapter, struct ixgbe_vf *vf) +{ - /* Fill out the OS statistics structure */ /* - * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with - * adapter->stats counters. It's required to make ifconfig -z - * (SOICZIFDATA) work. + * Frame size compatibility between PF and VF is only a problem on + * 82599-based cards. X540 and later support any combination of jumbo + * frames on PFs and VFs. */ - ifp->if_collisions = 0; + if (adapter->hw.mac.type != ixgbe_mac_82599EB) + return (TRUE); - /* Rx Errors */ - ifp->if_iqdrops += total_missed_rx; - ifp->if_ierrors += crcerrs + rlec; -} + switch (vf->api_ver) { + case IXGBE_API_VER_1_0: + case IXGBE_API_VER_UNKNOWN: + /* + * On legacy (1.0 and older) VF versions, we don't support jumbo + * frames on either the PF or the VF. + */ + if (adapter->max_frame_size > ETHER_MAX_LEN || + vf->max_frame_size > ETHER_MAX_LEN) + return (FALSE); -/** ixgbe_sysctl_tdh_handler - Handler function - * Retrieves the TDH value from the hardware - */ -static int -ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS) -{ - struct sysctlnode node; - uint32_t val; - struct tx_ring *txr; + return (TRUE); - node = *rnode; - txr = (struct tx_ring *)node.sysctl_data; - if (txr == NULL) - return 0; - val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me)); - node.sysctl_data = &val; - return sysctl_lookup(SYSCTLFN_CALL(&node)); -} + break; + case IXGBE_API_VER_1_1: + default: + /* + * 1.1 or later VF versions always work if they aren't using + * jumbo frames. + */ + if (vf->max_frame_size <= ETHER_MAX_LEN) + return (TRUE); -/** ixgbe_sysctl_tdt_handler - Handler function - * Retrieves the TDT value from the hardware - */ -static int -ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS) -{ - struct sysctlnode node; - uint32_t val; - struct tx_ring *txr; + /* + * Jumbo frames only work with VFs if the PF is also using jumbo + * frames. + */ + if (adapter->max_frame_size <= ETHER_MAX_LEN) + return (TRUE); - node = *rnode; - txr = (struct tx_ring *)node.sysctl_data; - if (txr == NULL) - return 0; - val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me)); - node.sysctl_data = &val; - return sysctl_lookup(SYSCTLFN_CALL(&node)); + return (FALSE); + + } } -/** ixgbe_sysctl_rdh_handler - Handler function - * Retrieves the RDH value from the hardware - */ -static int -ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS) + +static void +ixgbe_process_vf_reset(struct adapter *adapter, struct ixgbe_vf *vf) { - struct sysctlnode node; - uint32_t val; - struct rx_ring *rxr; + ixgbe_vf_set_default_vlan(adapter, vf, vf->default_vlan); - node = *rnode; - rxr = (struct rx_ring *)node.sysctl_data; - if (rxr == NULL) - return 0; - val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me)); - node.sysctl_data = &val; - return sysctl_lookup(SYSCTLFN_CALL(&node)); + // XXX clear multicast addresses + + ixgbe_clear_rar(&adapter->hw, vf->rar_index); + + vf->api_ver = IXGBE_API_VER_UNKNOWN; } -/** ixgbe_sysctl_rdt_handler - Handler function - * Retrieves the RDT value from the hardware - */ -static int -ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS) + +static void +ixgbe_vf_enable_transmit(struct adapter *adapter, struct ixgbe_vf *vf) { - struct sysctlnode node; - uint32_t val; - struct rx_ring *rxr; + struct ixgbe_hw *hw; + uint32_t vf_index, vfte; - node = *rnode; - rxr = (struct rx_ring *)node.sysctl_data; - if (rxr == NULL) - return 0; - val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me)); - node.sysctl_data = &val; - return sysctl_lookup(SYSCTLFN_CALL(&node)); + hw = &adapter->hw; + + vf_index = IXGBE_VF_INDEX(vf->pool); + vfte = IXGBE_READ_REG(hw, IXGBE_VFTE(vf_index)); + vfte |= IXGBE_VF_BIT(vf->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_index), vfte); } -static int -ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS) + +static void +ixgbe_vf_enable_receive(struct adapter *adapter, struct ixgbe_vf *vf) { - int error; - struct sysctlnode node; - struct ix_queue *que; - uint32_t reg, usec, rate; + struct ixgbe_hw *hw; + uint32_t vf_index, vfre; - node = *rnode; - que = (struct ix_queue *)node.sysctl_data; - if (que == NULL) - return 0; - reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); - usec = ((reg & 0x0FF8) >> 3); - if (usec > 0) - rate = 500000 / usec; + hw = &adapter->hw; + + vf_index = IXGBE_VF_INDEX(vf->pool); + vfre = IXGBE_READ_REG(hw, IXGBE_VFRE(vf_index)); + if (ixgbe_vf_frame_size_compatible(adapter, vf)) + vfre |= IXGBE_VF_BIT(vf->pool); else - rate = 0; - node.sysctl_data = &rate; - error = sysctl_lookup(SYSCTLFN_CALL(&node)); - if (error) - return error; - reg &= ~0xfff; /* default, no limitation */ - ixgbe_max_interrupt_rate = 0; - if (rate > 0 && rate < 500000) { - if (rate < 1000) - rate = 1000; - ixgbe_max_interrupt_rate = rate; - reg |= ((4000000/rate) & 0xff8 ); - } - IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); - return 0; + vfre &= ~IXGBE_VF_BIT(vf->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_index), vfre); } -const struct sysctlnode * -ixgbe_sysctl_instance(struct adapter *adapter) + +static void +ixgbe_vf_reset_msg(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) { - const char *dvname; - struct sysctllog **log; - int rc; - const struct sysctlnode *rnode; + struct ixgbe_hw *hw; + uint32_t ack; + uint32_t resp[IXGBE_VF_PERMADDR_MSG_LEN]; + + hw = &adapter->hw; + + ixgbe_process_vf_reset(adapter, vf); + + if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) { + ixgbe_set_rar(&adapter->hw, vf->rar_index, + vf->ether_addr, vf->pool, TRUE); + ack = IXGBE_VT_MSGTYPE_ACK; + } else + ack = IXGBE_VT_MSGTYPE_NACK; - log = &adapter->sysctllog; - dvname = device_xname(adapter->dev); + ixgbe_vf_enable_transmit(adapter, vf); + ixgbe_vf_enable_receive(adapter, vf); - if ((rc = sysctl_createv(log, 0, NULL, &rnode, - 0, CTLTYPE_NODE, dvname, - SYSCTL_DESCR("ixgbe information and settings"), - NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0) - goto err; + vf->flags |= IXGBE_VF_CTS; - return rnode; -err: - printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc); - return NULL; + resp[0] = IXGBE_VF_RESET | ack | IXGBE_VT_MSGTYPE_CTS; + bcopy(vf->ether_addr, &resp[1], ETHER_ADDR_LEN); + resp[3] = hw->mac.mc_filter_type; + ixgbe_write_mbx(hw, resp, IXGBE_VF_PERMADDR_MSG_LEN, vf->pool); } -/* - * Add sysctl variables, one per statistic, to the system. - */ + static void -ixgbe_add_hw_stats(struct adapter *adapter) +ixgbe_vf_set_mac(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) { - device_t dev = adapter->dev; - const struct sysctlnode *rnode, *cnode; - struct sysctllog **log = &adapter->sysctllog; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - struct ixgbe_hw_stats *stats = &adapter->stats; + uint8_t *mac; - /* Driver Statistics */ -#if 0 - /* These counters are not updated by the software */ - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", - CTLFLAG_RD, &adapter->dropped_pkts, - "Driver dropped packets"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed", - CTLFLAG_RD, &adapter->mbuf_header_failed, - "???"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed", - CTLFLAG_RD, &adapter->mbuf_packet_failed, - "???"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail", - CTLFLAG_RD, &adapter->no_tx_map_avail, - "???"); -#endif - evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Handled queue in softint"); - evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Requeued in softint"); - evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Interrupt handler more rx"); - evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Interrupt handler more tx"); - evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Interrupt handler tx loops"); - evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Driver tx dma soft fail EFBIG"); - evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "m_defrag() failed"); - evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Driver tx dma hard fail EFBIG"); - evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Driver tx dma hard fail EINVAL"); - evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Driver tx dma hard fail other"); - evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN"); - evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM"); - evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Watchdog timeouts"); - evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "TSO errors"); - evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "Link MSIX IRQ Handled"); + mac = (uint8_t*)&msg[1]; - for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { - snprintf(adapter->queues[i].evnamebuf, - sizeof(adapter->queues[i].evnamebuf), "%s queue%d", - device_xname(dev), i); - snprintf(adapter->queues[i].namebuf, - sizeof(adapter->queues[i].namebuf), "queue%d", i); + /* Check that the VF has permission to change the MAC address. */ + if (!(vf->flags & IXGBE_VF_CAP_MAC) && ixgbe_vf_mac_changed(vf, mac)) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } - if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) { - aprint_error_dev(dev, "could not create sysctl root\n"); - break; - } + if (ixgbe_validate_mac_addr(mac) != 0) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } - if (sysctl_createv(log, 0, &rnode, &rnode, - 0, CTLTYPE_NODE, - adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"), - NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0) - break; + bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN); - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READWRITE, CTLTYPE_INT, - "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"), - ixgbe_sysctl_interrupt_rate_handler, 0, - (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0) - break; + ixgbe_set_rar(&adapter->hw, vf->rar_index, vf->ether_addr, + vf->pool, TRUE); - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READONLY, CTLTYPE_QUAD, - "irqs", SYSCTL_DESCR("irqs on this queue"), - NULL, 0, &(adapter->queues[i].irqs), - 0, CTL_CREATE, CTL_EOL) != 0) - break; + ixgbe_send_vf_ack(adapter, vf, msg[0]); +} - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READONLY, CTLTYPE_INT, - "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"), - ixgbe_sysctl_tdh_handler, 0, (void *)txr, - 0, CTL_CREATE, CTL_EOL) != 0) - break; - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READONLY, CTLTYPE_INT, - "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"), - ixgbe_sysctl_tdt_handler, 0, (void *)txr, - 0, CTL_CREATE, CTL_EOL) != 0) - break; +/* +** VF multicast addresses are set by using the appropriate bit in +** 1 of 128 32 bit addresses (4096 possible). +*/ +static void +ixgbe_vf_set_mc_addr(struct adapter *adapter, struct ixgbe_vf *vf, u32 *msg) +{ + u16 *list = (u16*)&msg[1]; + int entries; + u32 vmolr, vec_bit, vec_reg, mta_reg; + + entries = (msg[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; + entries = min(entries, IXGBE_MAX_VF_MC); + + vmolr = IXGBE_READ_REG(&adapter->hw, IXGBE_VMOLR(vf->pool)); + + vf->num_mc_hashes = entries; + + /* Set the appropriate MTA bit */ + for (int i = 0; i < entries; i++) { + vf->mc_hash[i] = list[i]; + vec_reg = (vf->mc_hash[i] >> 5) & 0x7F; + vec_bit = vf->mc_hash[i] & 0x1F; + mta_reg = IXGBE_READ_REG(&adapter->hw, IXGBE_MTA(vec_reg)); + mta_reg |= (1 << vec_bit); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_MTA(vec_reg), mta_reg); + } - evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC, - NULL, device_xname(dev), "TSO"); - evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC, - NULL, adapter->queues[i].evnamebuf, - "Queue No Descriptor Available"); - evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC, - NULL, adapter->queues[i].evnamebuf, - "Queue Packets Transmitted"); + vmolr |= IXGBE_VMOLR_ROMPE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(vf->pool), vmolr); + ixgbe_send_vf_ack(adapter, vf, msg[0]); + return; +} -#ifdef LRO - struct lro_ctrl *lro = &rxr->lro; -#endif /* LRO */ - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READONLY, - CTLTYPE_INT, - "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"), - ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0, - CTL_CREATE, CTL_EOL) != 0) - break; +static void +ixgbe_vf_set_vlan(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) +{ + struct ixgbe_hw *hw; + int enable; + uint16_t tag; - if (sysctl_createv(log, 0, &rnode, &cnode, - CTLFLAG_READONLY, - CTLTYPE_INT, - "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"), - ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0, - CTL_CREATE, CTL_EOL) != 0) - break; + hw = &adapter->hw; + enable = IXGBE_VT_MSGINFO(msg[0]); + tag = msg[1] & IXGBE_VLVF_VLANID_MASK; - if (i < __arraycount(adapter->stats.mpc)) { - evcnt_attach_dynamic(&adapter->stats.mpc[i], - EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, - "Missed Packet Count"); - } - if (i < __arraycount(adapter->stats.pxontxc)) { - evcnt_attach_dynamic(&adapter->stats.pxontxc[i], - EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, - "pxontxc"); - evcnt_attach_dynamic(&adapter->stats.pxonrxc[i], - EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, - "pxonrxc"); - evcnt_attach_dynamic(&adapter->stats.pxofftxc[i], - EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, - "pxofftxc"); - evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i], - EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, - "pxoffrxc"); - evcnt_attach_dynamic(&adapter->stats.pxon2offc[i], - EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, - "pxon2offc"); - } - if (i < __arraycount(adapter->stats.qprc)) { - evcnt_attach_dynamic(&adapter->stats.qprc[i], - EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, - "qprc"); - evcnt_attach_dynamic(&adapter->stats.qptc[i], - EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, - "qptc"); - evcnt_attach_dynamic(&adapter->stats.qbrc[i], - EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, - "qbrc"); - evcnt_attach_dynamic(&adapter->stats.qbtc[i], - EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, - "qbtc"); - evcnt_attach_dynamic(&adapter->stats.qprdc[i], - EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf, - "qprdc"); - } + if (!(vf->flags & IXGBE_VF_CAP_VLAN)) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } - evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC, - NULL, adapter->queues[i].evnamebuf, "Queue Packets Received"); - evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC, - NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received"); - evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC, - NULL, adapter->queues[i].evnamebuf, "Copied RX Frames"); - evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC, - NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf"); - evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC, - NULL, adapter->queues[i].evnamebuf, "Rx discarded"); - evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC, - NULL, adapter->queues[i].evnamebuf, "Rx interrupts"); -#ifdef LRO - SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued", - CTLFLAG_RD, &lro->lro_queued, 0, - "LRO Queued"); - SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed", - CTLFLAG_RD, &lro->lro_flushed, 0, - "LRO Flushed"); -#endif /* LRO */ + /* It is illegal to enable vlan tag 0. */ + if (tag == 0 && enable != 0){ + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; } + + ixgbe_set_vfta(hw, tag, vf->pool, enable); + ixgbe_send_vf_ack(adapter, vf, msg[0]); +} - /* MAC stats get the own sub node */ +static void +ixgbe_vf_set_lpe(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) +{ + struct ixgbe_hw *hw; + uint32_t vf_max_size, pf_max_size, mhadd; - snprintf(stats->namebuf, - sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev)); + hw = &adapter->hw; + vf_max_size = msg[1]; - evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "rx csum offload - IP"); - evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "rx csum offload - L4"); - evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "rx csum offload - IP bad"); - evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "rx csum offload - L4 bad"); - evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Interrupt conditions zero"); - evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Legacy interrupts"); - evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "CRC Errors"); - evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Illegal Byte Errors"); - evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Byte Errors"); - evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "MAC Short Packets Discarded"); - evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "MAC Local Faults"); - evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "MAC Remote Faults"); - evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Receive Length Errors"); - evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Link XON Transmitted"); - evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Link XON Received"); - evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Link XOFF Transmitted"); - evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Link XOFF Received"); + if (vf_max_size < ETHER_CRC_LEN) { + /* We intentionally ACK invalid LPE requests. */ + ixgbe_send_vf_ack(adapter, vf, msg[0]); + return; + } - /* Packet Reception Stats */ - evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Total Octets Received"); - evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Good Octets Received"); - evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Total Packets Received"); - evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Good Packets Received"); - evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Multicast Packets Received"); - evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Broadcast Packets Received"); - evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "64 byte frames received "); - evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "65-127 byte frames received"); - evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "128-255 byte frames received"); - evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "256-511 byte frames received"); - evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "512-1023 byte frames received"); - evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "1023-1522 byte frames received"); - evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Receive Undersized"); - evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Fragmented Packets Received "); - evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Oversized Packets Received"); - evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Received Jabber"); - evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Management Packets Received"); - evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Checksum Errors"); + vf_max_size -= ETHER_CRC_LEN; + + if (vf_max_size > IXGBE_MAX_FRAME_SIZE) { + /* We intentionally ACK invalid LPE requests. */ + ixgbe_send_vf_ack(adapter, vf, msg[0]); + return; + } + + vf->max_frame_size = vf_max_size; + ixgbe_update_max_frame(adapter, vf->max_frame_size); + + /* + * We might have to disable reception to this VF if the frame size is + * not compatible with the config on the PF. + */ + ixgbe_vf_enable_receive(adapter, vf); - /* Packet Transmission Stats */ - evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Good Octets Transmitted"); - evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Total Packets Transmitted"); - evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Good Packets Transmitted"); - evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Broadcast Packets Transmitted"); - evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Multicast Packets Transmitted"); - evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "Management Packets Transmitted"); - evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "64 byte frames transmitted "); - evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "65-127 byte frames transmitted"); - evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "128-255 byte frames transmitted"); - evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "256-511 byte frames transmitted"); - evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "512-1023 byte frames transmitted"); - evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL, - stats->namebuf, "1024-1522 byte frames transmitted"); + mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); + pf_max_size = (mhadd & IXGBE_MHADD_MFS_MASK) >> IXGBE_MHADD_MFS_SHIFT; + + if (pf_max_size < adapter->max_frame_size) { + mhadd &= ~IXGBE_MHADD_MFS_MASK; + mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); + } + + ixgbe_send_vf_ack(adapter, vf, msg[0]); } -/* -** Set flow control using sysctl: -** Flow control values: -** 0 - off -** 1 - rx pause -** 2 - tx pause -** 3 - full -*/ -static int -ixgbe_set_flowcntl(SYSCTLFN_ARGS) + +static void +ixgbe_vf_set_macvlan(struct adapter *adapter, struct ixgbe_vf *vf, + uint32_t *msg) { - struct sysctlnode node; - int error, last; - struct adapter *adapter; + //XXX implement this + ixgbe_send_vf_nack(adapter, vf, msg[0]); +} - node = *rnode; - adapter = (struct adapter *)node.sysctl_data; - node.sysctl_data = &adapter->fc; - last = adapter->fc; - error = sysctl_lookup(SYSCTLFN_CALL(&node)); - if (error != 0 || newp == NULL) - return error; - /* Don't bother if it's not changed */ - if (adapter->fc == last) - return (0); +static void +ixgbe_vf_api_negotiate(struct adapter *adapter, struct ixgbe_vf *vf, + uint32_t *msg) +{ - switch (adapter->fc) { - case ixgbe_fc_rx_pause: - case ixgbe_fc_tx_pause: - case ixgbe_fc_full: - adapter->hw.fc.requested_mode = adapter->fc; - if (adapter->num_queues > 1) - ixgbe_disable_rx_drop(adapter); - break; - case ixgbe_fc_none: - adapter->hw.fc.requested_mode = ixgbe_fc_none; - if (adapter->num_queues > 1) - ixgbe_enable_rx_drop(adapter); - break; - default: - adapter->fc = last; - return (EINVAL); + switch (msg[1]) { + case IXGBE_API_VER_1_0: + case IXGBE_API_VER_1_1: + vf->api_ver = msg[1]; + ixgbe_send_vf_ack(adapter, vf, msg[0]); + break; + default: + vf->api_ver = IXGBE_API_VER_UNKNOWN; + ixgbe_send_vf_nack(adapter, vf, msg[0]); + break; } - /* Don't autoneg if forcing a value */ - adapter->hw.fc.disable_fc_autoneg = TRUE; - ixgbe_fc_enable(&adapter->hw); - return 0; } -/* -** Control link advertise speed: -** 1 - advertise only 1G -** 2 - advertise 100Mb -** 3 - advertise normal -*/ -static int -ixgbe_set_advertise(SYSCTLFN_ARGS) +static void +ixgbe_vf_get_queues(struct adapter *adapter, struct ixgbe_vf *vf, + uint32_t *msg) { - struct sysctlnode node; - int t, error = 0; - struct adapter *adapter; - device_t dev; - struct ixgbe_hw *hw; - ixgbe_link_speed speed, last; + struct ixgbe_hw *hw; + uint32_t resp[IXGBE_VF_GET_QUEUES_RESP_LEN]; + int num_queues; - node = *rnode; - adapter = (struct adapter *)node.sysctl_data; - dev = adapter->dev; hw = &adapter->hw; - last = adapter->advertise; - t = adapter->advertise; - node.sysctl_data = &t; - error = sysctl_lookup(SYSCTLFN_CALL(&node)); - if (error != 0 || newp == NULL) - return error; - if (adapter->advertise == last) /* no change */ - return (0); + /* GET_QUEUES is not supported on pre-1.1 APIs. */ + switch (msg[0]) { + case IXGBE_API_VER_1_0: + case IXGBE_API_VER_UNKNOWN: + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } - if (t == -1) - return 0; + resp[0] = IXGBE_VF_GET_QUEUES | IXGBE_VT_MSGTYPE_ACK | + IXGBE_VT_MSGTYPE_CTS; - adapter->advertise = t; + num_queues = ixgbe_vf_queues(ixgbe_get_iov_mode(adapter)); + resp[IXGBE_VF_TX_QUEUES] = num_queues; + resp[IXGBE_VF_RX_QUEUES] = num_queues; + resp[IXGBE_VF_TRANS_VLAN] = (vf->default_vlan != 0); + resp[IXGBE_VF_DEF_QUEUE] = 0; - if (!((hw->phy.media_type == ixgbe_media_type_copper) || - (hw->phy.multispeed_fiber))) - return (EINVAL); + ixgbe_write_mbx(hw, resp, IXGBE_VF_GET_QUEUES_RESP_LEN, vf->pool); +} - if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) { - device_printf(dev, "Set Advertise: 100Mb on X540 only\n"); - return (EINVAL); - } - if (adapter->advertise == 1) - speed = IXGBE_LINK_SPEED_1GB_FULL; - else if (adapter->advertise == 2) - speed = IXGBE_LINK_SPEED_100_FULL; - else if (adapter->advertise == 3) - speed = IXGBE_LINK_SPEED_1GB_FULL | - IXGBE_LINK_SPEED_10GB_FULL; - else { /* bogus value */ - adapter->advertise = last; - return (EINVAL); +static void +ixgbe_process_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf) +{ + struct ixgbe_hw *hw; + uint32_t msg[IXGBE_VFMAILBOX_SIZE]; + int error; + + hw = &adapter->hw; + + error = ixgbe_read_mbx(hw, msg, IXGBE_VFMAILBOX_SIZE, vf->pool); + + if (error != 0) + return; + + CTR3(KTR_MALLOC, "%s: received msg %x from %d", + adapter->ifp->if_xname, msg[0], vf->pool); + if (msg[0] == IXGBE_VF_RESET) { + ixgbe_vf_reset_msg(adapter, vf, msg); + return; } - hw->mac.autotry_restart = TRUE; - hw->mac.ops.setup_link(hw, speed, TRUE); + if (!(vf->flags & IXGBE_VF_CTS)) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } - return 0; + switch (msg[0] & IXGBE_VT_MSG_MASK) { + case IXGBE_VF_SET_MAC_ADDR: + ixgbe_vf_set_mac(adapter, vf, msg); + break; + case IXGBE_VF_SET_MULTICAST: + ixgbe_vf_set_mc_addr(adapter, vf, msg); + break; + case IXGBE_VF_SET_VLAN: + ixgbe_vf_set_vlan(adapter, vf, msg); + break; + case IXGBE_VF_SET_LPE: + ixgbe_vf_set_lpe(adapter, vf, msg); + break; + case IXGBE_VF_SET_MACVLAN: + ixgbe_vf_set_macvlan(adapter, vf, msg); + break; + case IXGBE_VF_API_NEGOTIATE: + ixgbe_vf_api_negotiate(adapter, vf, msg); + break; + case IXGBE_VF_GET_QUEUES: + ixgbe_vf_get_queues(adapter, vf, msg); + break; + default: + ixgbe_send_vf_nack(adapter, vf, msg[0]); + } } + /* -** Thermal Shutdown Trigger -** - cause a Thermal Overtemp IRQ -** - this now requires firmware enabling -*/ -static int -ixgbe_set_thermal_test(SYSCTLFN_ARGS) + * Tasklet for handling VF -> PF mailbox messages. + */ +static void +ixgbe_handle_mbx(void *context, int pending) { - struct sysctlnode node; - int error, fire = 0; - struct adapter *adapter; + struct adapter *adapter; struct ixgbe_hw *hw; + struct ixgbe_vf *vf; + int i; - node = *rnode; - adapter = (struct adapter *)node.sysctl_data; + adapter = context; hw = &adapter->hw; - if (hw->mac.type != ixgbe_mac_X540) - return (0); + IXGBE_CORE_LOCK(adapter); + for (i = 0; i < adapter->num_vfs; i++) { + vf = &adapter->vfs[i]; - node.sysctl_data = &fire; - error = sysctl_lookup(SYSCTLFN_CALL(&node)); - if ((error) || (newp == NULL)) - return (error); + if (vf->flags & IXGBE_VF_ACTIVE) { + if (ixgbe_check_for_rst(hw, vf->pool) == 0) + ixgbe_process_vf_reset(adapter, vf); - if (fire) { - u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS); - reg |= IXGBE_EICR_TS; - IXGBE_WRITE_REG(hw, IXGBE_EICS, reg); + if (ixgbe_check_for_msg(hw, vf->pool) == 0) + ixgbe_process_vf_msg(adapter, vf); + + if (ixgbe_check_for_ack(hw, vf->pool) == 0) + ixgbe_process_vf_ack(adapter, vf); + } + } + IXGBE_CORE_UNLOCK(adapter); +} + + +static int +ixgbe_init_iov(device_t dev, u16 num_vfs, const nvlist_t *config) +{ + struct adapter *adapter; + enum ixgbe_iov_mode mode; + + adapter = device_get_softc(dev); + adapter->num_vfs = num_vfs; + mode = ixgbe_get_iov_mode(adapter); + + if (num_vfs > ixgbe_max_vfs(mode)) { + adapter->num_vfs = 0; + return (ENOSPC); + } + + IXGBE_CORE_LOCK(adapter); + + adapter->vfs = malloc(sizeof(*adapter->vfs) * num_vfs, M_IXGBE, + M_NOWAIT | M_ZERO); + + if (adapter->vfs == NULL) { + adapter->num_vfs = 0; + IXGBE_CORE_UNLOCK(adapter); + return (ENOMEM); } + ixgbe_init_locked(adapter); + + IXGBE_CORE_UNLOCK(adapter); + return (0); } + +static void +ixgbe_uninit_iov(device_t dev) +{ + struct ixgbe_hw *hw; + struct adapter *adapter; + uint32_t pf_reg, vf_reg; + + adapter = device_get_softc(dev); + hw = &adapter->hw; + + IXGBE_CORE_LOCK(adapter); + + /* Enable rx/tx for the PF and disable it for all VFs. */ + pf_reg = IXGBE_VF_INDEX(adapter->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(pf_reg), + IXGBE_VF_BIT(adapter->pool)); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(pf_reg), + IXGBE_VF_BIT(adapter->pool)); + + if (pf_reg == 0) + vf_reg = 1; + else + vf_reg = 0; + IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), 0); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), 0); + + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0); + + free(adapter->vfs, M_IXGBE); + adapter->vfs = NULL; + adapter->num_vfs = 0; + + IXGBE_CORE_UNLOCK(adapter); +} + + +static void +ixgbe_initialize_iov(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + uint32_t mrqc, mtqc, vt_ctl, vf_reg, gcr_ext, gpie; + enum ixgbe_iov_mode mode; + int i; + + mode = ixgbe_get_iov_mode(adapter); + if (mode == IXGBE_NO_VM) + return; + + IXGBE_CORE_LOCK_ASSERT(adapter); + + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC); + mrqc &= ~IXGBE_MRQC_MRQE_MASK; + + switch (mode) { + case IXGBE_64_VM: + mrqc |= IXGBE_MRQC_VMDQRSS64EN; + break; + case IXGBE_32_VM: + mrqc |= IXGBE_MRQC_VMDQRSS32EN; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + + mtqc = IXGBE_MTQC_VT_ENA; + switch (mode) { + case IXGBE_64_VM: + mtqc |= IXGBE_MTQC_64VF; + break; + case IXGBE_32_VM: + mtqc |= IXGBE_MTQC_32VF; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc); + + + gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT); + gcr_ext |= IXGBE_GCR_EXT_MSIX_EN; + gcr_ext &= ~IXGBE_GCR_EXT_VT_MODE_MASK; + switch (mode) { + case IXGBE_64_VM: + gcr_ext |= IXGBE_GCR_EXT_VT_MODE_64; + break; + case IXGBE_32_VM: + gcr_ext |= IXGBE_GCR_EXT_VT_MODE_32; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext); + + + gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); + gcr_ext &= ~IXGBE_GPIE_VTMODE_MASK; + switch (mode) { + case IXGBE_64_VM: + gpie |= IXGBE_GPIE_VTMODE_64; + break; + case IXGBE_32_VM: + gpie |= IXGBE_GPIE_VTMODE_32; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); + + /* Enable rx/tx for the PF. */ + vf_reg = IXGBE_VF_INDEX(adapter->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), + IXGBE_VF_BIT(adapter->pool)); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), + IXGBE_VF_BIT(adapter->pool)); + + /* Allow VM-to-VM communication. */ + IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); + + vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; + vt_ctl |= (adapter->pool << IXGBE_VT_CTL_POOL_SHIFT); + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl); + + for (i = 0; i < adapter->num_vfs; i++) + ixgbe_init_vf(adapter, &adapter->vfs[i]); +} + + /* -** Enable the hardware to drop packets when the buffer is -** full. This is useful when multiqueue,so that no single -** queue being full stalls the entire RX engine. We only -** enable this when Multiqueue AND when Flow Control is -** disabled. +** Check the max frame setting of all active VF's */ static void -ixgbe_enable_rx_drop(struct adapter *adapter) +ixgbe_recalculate_max_frame(struct adapter *adapter) { - struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_vf *vf; - for (int i = 0; i < adapter->num_queues; i++) { - u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); - srrctl |= IXGBE_SRRCTL_DROP_EN; - IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); + IXGBE_CORE_LOCK_ASSERT(adapter); + + for (int i = 0; i < adapter->num_vfs; i++) { + vf = &adapter->vfs[i]; + if (vf->flags & IXGBE_VF_ACTIVE) + ixgbe_update_max_frame(adapter, vf->max_frame_size); } } + static void -ixgbe_disable_rx_drop(struct adapter *adapter) +ixgbe_init_vf(struct adapter *adapter, struct ixgbe_vf *vf) { - struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_hw *hw; + uint32_t vf_index, pfmbimr; - for (int i = 0; i < adapter->num_queues; i++) { - u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); - srrctl &= ~IXGBE_SRRCTL_DROP_EN; - IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); + IXGBE_CORE_LOCK_ASSERT(adapter); + + hw = &adapter->hw; + + if (!(vf->flags & IXGBE_VF_ACTIVE)) + return; + + vf_index = IXGBE_VF_INDEX(vf->pool); + pfmbimr = IXGBE_READ_REG(hw, IXGBE_PFMBIMR(vf_index)); + pfmbimr |= IXGBE_VF_BIT(vf->pool); + IXGBE_WRITE_REG(hw, IXGBE_PFMBIMR(vf_index), pfmbimr); + + ixgbe_vf_set_default_vlan(adapter, vf, vf->vlan_tag); + + // XXX multicast addresses + + if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) { + ixgbe_set_rar(&adapter->hw, vf->rar_index, + vf->ether_addr, vf->pool, TRUE); } + + ixgbe_vf_enable_transmit(adapter, vf); + ixgbe_vf_enable_receive(adapter, vf); + + ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG); +} + +static int +ixgbe_add_vf(device_t dev, u16 vfnum, const nvlist_t *config) +{ + struct adapter *adapter; + struct ixgbe_vf *vf; + const void *mac; + + adapter = device_get_softc(dev); + + KASSERT(vfnum < adapter->num_vfs, ("VF index %d is out of range %d", + vfnum, adapter->num_vfs)); + + IXGBE_CORE_LOCK(adapter); + vf = &adapter->vfs[vfnum]; + vf->pool= vfnum; + + /* RAR[0] is used by the PF so use vfnum + 1 for VF RAR. */ + vf->rar_index = vfnum + 1; + vf->default_vlan = 0; + vf->max_frame_size = ETHER_MAX_LEN; + ixgbe_update_max_frame(adapter, vf->max_frame_size); + + if (nvlist_exists_binary(config, "mac-addr")) { + mac = nvlist_get_binary(config, "mac-addr", NULL); + bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN); + if (nvlist_get_bool(config, "allow-set-mac")) + vf->flags |= IXGBE_VF_CAP_MAC; + } else + /* + * If the administrator has not specified a MAC address then + * we must allow the VF to choose one. + */ + vf->flags |= IXGBE_VF_CAP_MAC; + + vf->flags = IXGBE_VF_ACTIVE; + + ixgbe_init_vf(adapter, vf); + IXGBE_CORE_UNLOCK(adapter); + + return (0); } +#endif /* PCI_IOV */