Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/arch/x86/x86/intr.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/arch/x86/x86/intr.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.58 retrieving revision 1.58.2.4 diff -u -p -r1.58 -r1.58.2.4 --- src/sys/arch/x86/x86/intr.c 2008/12/17 20:51:33 1.58 +++ src/sys/arch/x86/x86/intr.c 2011/01/10 00:37:37 1.58.2.4 @@ -1,7 +1,7 @@ -/* $NetBSD: intr.c,v 1.58 2008/12/17 20:51:33 cegger Exp $ */ +/* $NetBSD: intr.c,v 1.58.2.4 2011/01/10 00:37:37 jym Exp $ */ /*- - * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -133,8 +133,9 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.58 2008/12/17 20:51:33 cegger Exp $"); +__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.58.2.4 2011/01/10 00:37:37 jym Exp $"); +#include "opt_intrdebug.h" #include "opt_multiprocessor.h" #include "opt_acpi.h" @@ -143,12 +144,13 @@ __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.5 #include #include #include -#include +#include #include #include #include #include #include +#include #include @@ -158,9 +160,9 @@ __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.5 #include "ioapic.h" #include "lapic.h" #include "pci.h" -#include "acpi.h" +#include "acpica.h" -#if NIOAPIC > 0 || NACPI > 0 +#if NIOAPIC > 0 || NACPICA > 0 #include #include #include @@ -174,6 +176,10 @@ __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.5 #include #endif +#ifdef DDB +#include +#endif + struct pic softintr_pic = { .pic_name = "softintr_fakepic", .pic_type = PIC_SOFT, @@ -182,16 +188,13 @@ struct pic softintr_pic = { .pic_lock = __SIMPLELOCK_UNLOCKED, }; -#if NIOAPIC > 0 || NACPI > 0 +#if NIOAPIC > 0 || NACPICA > 0 static int intr_scan_bus(int, int, int *); #if NPCI > 0 static int intr_find_pcibridge(int, pcitag_t *, pci_chipset_tag_t *); #endif #endif -kmutex_t x86_intr_lock; -bool x86_intr_lock_initted; - /* * Fill in default interrupt table (in case of spurious interrupt * during configuration of kernel), setup interrupt control unit @@ -201,8 +204,6 @@ intr_default_setup(void) { int i; - mutex_init(&x86_intr_lock, MUTEX_DEFAULT, IPL_NONE); - /* icu vectors */ for (i = 0; i < NUM_LEGACY_IRQS; i++) { idt_vec_reserve(ICU_OFFSET + i); @@ -217,61 +218,6 @@ intr_default_setup(void) i8259_default_setup(); } -struct nmi_handler { - int (*n_func)(void *); - void *n_arg; - SLIST_ENTRY(nmi_handler) n_next; -}; - -SLIST_HEAD(nmi_handler_head, nmi_handler) nmi_handlers = - SLIST_HEAD_INITIALIZER(nmi_handler_head); - -void * -nmi_establish(int (*func)(void *), void *arg) -{ - struct nmi_handler *n; - - n = malloc(sizeof(*n), M_DEVBUF, cold ? M_NOWAIT : M_WAITOK); - - if (n == NULL) - return NULL; - - n->n_func = func; - n->n_arg = arg; - SLIST_INSERT_HEAD(&nmi_handlers, n, n_next); - KASSERT(SLIST_FIRST(&nmi_handlers) == n); - return n; -} - -bool -nmi_disestablish(void *n0) -{ - struct nmi_handler *n; - - SLIST_FOREACH(n, &nmi_handlers, n_next) { - if (n == n0) - break; - } - if (n == NULL) - return false; - SLIST_REMOVE(&nmi_handlers, n, nmi_handler, n_next); - free(n, M_DEVBUF); - return true; -} - -int -nmi_dispatch(void) -{ - int handled = 0; - struct nmi_handler *n; - - SLIST_FOREACH(n, &nmi_handlers, n_next) { - if ((*n->n_func)(n->n_arg)) - handled = 1; - } - return handled; -} - /* * Handle a NMI, possibly a machine check. * return true to panic system, false to ignore. @@ -279,12 +225,22 @@ nmi_dispatch(void) int x86_nmi(void) { + log(LOG_CRIT, "NMI port 61 %x, port 70 %x\n", inb(0x61), inb(0x70)); return(0); } /* * Recalculate the interrupt masks from scratch. + * During early boot, anything goes and we are always called on the BP. + * When the system is up and running: + * + * => called with ci == curcpu() + * => cpu_lock held by the initiator + * => interrupts disabled on-chip (PSL_I) + * + * Do not call printf(), kmem_free() or other "heavyweight" routines + * from here. This routine must be quick and must not block. */ static void intr_calculatemasks(struct cpu_info *ci) @@ -347,7 +303,7 @@ intr_calculatemasks(struct cpu_info *ci) * * XXX should maintain one list, not an array and a linked list. */ -#if (NPCI > 0) && ((NIOAPIC > 0) || NACPI > 0) +#if (NPCI > 0) && ((NIOAPIC > 0) || NACPICA > 0) struct intr_extra_bus { int bus; pcitag_t *pci_bridge_tag; @@ -364,7 +320,7 @@ intr_add_pcibus(struct pcibus_attach_arg { struct intr_extra_bus *iebp; - iebp = malloc(sizeof(struct intr_extra_bus), M_TEMP, M_WAITOK); + iebp = kmem_alloc(sizeof(*iebp), KM_SLEEP); iebp->bus = pba->pba_bus; iebp->pci_chipset_tag = pba->pba_pc; iebp->pci_bridge_tag = pba->pba_bridgetag; @@ -373,7 +329,7 @@ intr_add_pcibus(struct pcibus_attach_arg static int intr_find_pcibridge(int bus, pcitag_t *pci_bridge_tag, - pci_chipset_tag_t *pci_chipset_tag) + pci_chipset_tag_t *pc) { struct intr_extra_bus *iebp; struct mp_bus *mpb; @@ -386,7 +342,7 @@ intr_find_pcibridge(int bus, pcitag_t *p if (mpb->mb_pci_bridge_tag == NULL) return ENOENT; *pci_bridge_tag = *mpb->mb_pci_bridge_tag; - *pci_chipset_tag = mpb->mb_pci_chipset_tag; + *pc = mpb->mb_pci_chipset_tag; return 0; } @@ -395,7 +351,7 @@ intr_find_pcibridge(int bus, pcitag_t *p if (iebp->pci_bridge_tag == NULL) return ENOENT; *pci_bridge_tag = *iebp->pci_bridge_tag; - *pci_chipset_tag = iebp->pci_chipset_tag; + *pc = iebp->pci_chipset_tag; return 0; } } @@ -403,29 +359,25 @@ intr_find_pcibridge(int bus, pcitag_t *p } #endif - -/* - * XXX if defined(MULTIPROCESSOR) && .. ? - */ -#if NIOAPIC > 0 || NACPI > 0 +#if NIOAPIC > 0 || NACPICA > 0 int intr_find_mpmapping(int bus, int pin, int *handle) { #if NPCI > 0 int dev, func; pcitag_t pci_bridge_tag; - pci_chipset_tag_t pci_chipset_tag; + pci_chipset_tag_t pc; #endif #if NPCI > 0 while (intr_scan_bus(bus, pin, handle) != 0) { if (intr_find_pcibridge(bus, &pci_bridge_tag, - &pci_chipset_tag) != 0) + &pc) != 0) return ENOENT; dev = pin >> 2; pin = pin & 3; pin = PPB_INTERRUPT_SWIZZLE(pin + 1, dev) - 1; - pci_decompose_tag(pci_chipset_tag, pci_bridge_tag, &bus, + pci_decompose_tag(pc, pci_bridge_tag, &bus, &dev, &func); pin |= (dev << 2); } @@ -449,7 +401,7 @@ intr_scan_bus(int bus, int pin, int *han for (mip = intrs; mip != NULL; mip = mip->next) { if (mip->bus_pin == pin) { -#if NACPI > 0 +#if NACPICA > 0 if (mip->linkdev != NULL) if (mpacpi_findintr_linkdev(mip) != 0) continue; @@ -469,15 +421,14 @@ intr_allocate_slot_cpu(struct cpu_info * int slot, i; struct intrsource *isp; + KASSERT(mutex_owned(&cpu_lock)); + if (pic == &i8259_pic) { - if (!CPU_IS_PRIMARY(ci)) - return EBUSY; + KASSERT(CPU_IS_PRIMARY(ci)); slot = pin; - mutex_enter(&x86_intr_lock); } else { slot = -1; - mutex_enter(&x86_intr_lock); /* * intr_allocate_slot has checked for an existing mapping. * Now look for a free slot. @@ -489,17 +440,14 @@ intr_allocate_slot_cpu(struct cpu_info * } } if (slot == -1) { - mutex_exit(&x86_intr_lock); return EBUSY; } } isp = ci->ci_isources[slot]; if (isp == NULL) { - isp = malloc(sizeof (struct intrsource), - M_DEVBUF, M_NOWAIT|M_ZERO); + isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); if (isp == NULL) { - mutex_exit(&x86_intr_lock); return ENOMEM; } snprintf(isp->is_evname, sizeof (isp->is_evname), @@ -508,7 +456,6 @@ intr_allocate_slot_cpu(struct cpu_info * pic->pic_name, isp->is_evname); ci->ci_isources[slot] = isp; } - mutex_exit(&x86_intr_lock); *index = slot; return 0; @@ -517,20 +464,23 @@ intr_allocate_slot_cpu(struct cpu_info * /* * A simple round-robin allocator to assign interrupts to CPUs. */ -static int +static int __noinline intr_allocate_slot(struct pic *pic, int pin, int level, struct cpu_info **cip, int *index, int *idt_slot) { CPU_INFO_ITERATOR cii; - struct cpu_info *ci; + struct cpu_info *ci, *lci; struct intrsource *isp; - int slot, idtvec, error; + int slot = 0, idtvec, error; + + KASSERT(mutex_owned(&cpu_lock)); /* First check if this pin is already used by an interrupt vector. */ for (CPU_INFO_FOREACH(cii, ci)) { for (slot = 0 ; slot < MAX_INTR_SOURCES ; slot++) { - if ((isp = ci->ci_isources[slot]) == NULL) + if ((isp = ci->ci_isources[slot]) == NULL) { continue; + } if (isp->is_pic == pic && isp->is_pin == pin) { *idt_slot = isp->is_idtvec; *index = slot; @@ -542,56 +492,77 @@ intr_allocate_slot(struct pic *pic, int /* * The pic/pin combination doesn't have an existing mapping. - * Find a slot for a new interrupt source and allocate an IDT - * vector. - * - * For the i8259 case, this always uses the reserved slots - * of the primary CPU and fixed IDT vectors. This is required - * by other parts of the code, see x86/intr.h for more details. - * - * For the IOAPIC case, interrupts are assigned to the - * primary CPU by default, until it runs out of slots. + * Find a slot for a new interrupt source. For the i8259 case, + * we always use reserved slots of the primary CPU. Otherwise, + * we make an attempt to balance the interrupt load. * * PIC and APIC usage are essentially exclusive, so the reservation * of the ISA slots is ignored when assigning IOAPIC slots. - * - * XXX Fix interrupt allocation to Application Processors. - * XXX Check how many interrupts each CPU got and assign it to - * XXX the least loaded CPU. Consider adding options to bind - * XXX interrupts to specific CPUs. - * XXX Drop apic level support, just assign IDT vectors sequentially. */ - ci = &cpu_info_primary; - error = intr_allocate_slot_cpu(ci, pic, pin, &slot); - if (error != 0) { + if (pic == &i8259_pic) { /* - * ..now try the others. + * Must be directed to BP. */ - for (CPU_INFO_FOREACH(cii, ci)) { - if (CPU_IS_PRIMARY(ci)) + ci = &cpu_info_primary; + error = intr_allocate_slot_cpu(ci, pic, pin, &slot); + } else { + /* + * Find least loaded AP/BP and try to allocate there. + */ + ci = NULL; + for (CPU_INFO_FOREACH(cii, lci)) { + if ((lci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) { continue; - error = intr_allocate_slot_cpu(ci, pic, pin, &slot); - if (error == 0) - break; + } +#if 0 + if (ci == NULL || + ci->ci_nintrhand > lci->ci_nintrhand) { + ci = lci; + } +#else + ci = &cpu_info_primary; +#endif } - if (error != 0) - return EBUSY; + KASSERT(ci != NULL); + error = intr_allocate_slot_cpu(ci, pic, pin, &slot); + + /* + * If that did not work, allocate anywhere. + */ + if (error != 0) { + for (CPU_INFO_FOREACH(cii, ci)) { + if ((ci->ci_schedstate.spc_flags & + SPCF_NOINTR) != 0) { + continue; + } + error = intr_allocate_slot_cpu(ci, pic, + pin, &slot); + if (error == 0) { + break; + } + } + } + } + if (error != 0) { + return error; } + KASSERT(ci != NULL); - if (pic == &i8259_pic) + /* + * Now allocate an IDT vector. + * For the 8259 these are reserved up front. + */ + if (pic == &i8259_pic) { idtvec = ICU_OFFSET + pin; - else + } else { idtvec = idt_vec_alloc(APIC_LEVEL(level), IDT_INTR_HIGH); - + } if (idtvec == 0) { - mutex_enter(&x86_intr_lock); evcnt_detach(&ci->ci_isources[slot]->is_evcnt); - free(ci->ci_isources[slot], M_DEVBUF); + kmem_free(ci->ci_isources[slot], sizeof(*(ci->ci_isources[slot]))); ci->ci_isources[slot] = NULL; - mutex_exit(&x86_intr_lock); return EBUSY; } - ci->ci_isources[slot]->is_idtvec = idtvec; *idt_slot = idtvec; *index = slot; @@ -599,6 +570,23 @@ intr_allocate_slot(struct pic *pic, int return 0; } +static void +intr_source_free(struct cpu_info *ci, int slot, struct pic *pic, int idtvec) +{ + struct intrsource *isp; + + isp = ci->ci_isources[slot]; + + if (isp->is_handlers != NULL) + return; + ci->ci_isources[slot] = NULL; + evcnt_detach(&isp->is_evcnt); + kmem_free(isp, sizeof(*isp)); + ci->ci_isources[slot] = NULL; + if (pic != &i8259_pic) + idt_vec_free(idtvec); +} + #ifdef MULTIPROCESSOR static int intr_biglock_wrapper(void *); @@ -638,6 +626,59 @@ intr_findpic(int num) return NULL; } +/* + * Handle per-CPU component of interrupt establish. + * + * => caller (on initiating CPU) holds cpu_lock on our behalf + * => arg1: struct intrhand *ih + * => arg2: int idt_vec + */ +static void +intr_establish_xcall(void *arg1, void *arg2) +{ + struct intrsource *source; + struct intrstub *stubp; + struct intrhand *ih; + struct cpu_info *ci; + int idt_vec; + u_long psl; + + ih = arg1; + + KASSERT(ih->ih_cpu == curcpu() || !mp_online); + + ci = ih->ih_cpu; + source = ci->ci_isources[ih->ih_slot]; + idt_vec = (int)(intptr_t)arg2; + + /* Disable interrupts locally. */ + psl = x86_read_psl(); + x86_disable_intr(); + + /* Link in the handler and re-calculate masks. */ + *(ih->ih_prevp) = ih; + intr_calculatemasks(ci); + + /* Hook in new IDT vector and SPL state. */ + if (source->is_resume == NULL || source->is_idtvec != idt_vec) { + if (source->is_idtvec != 0 && source->is_idtvec != idt_vec) + idt_vec_free(source->is_idtvec); + source->is_idtvec = idt_vec; + if (source->is_type == IST_LEVEL) { + stubp = &source->is_pic->pic_level_stubs[ih->ih_slot]; + } else { + stubp = &source->is_pic->pic_edge_stubs[ih->ih_slot]; + } + source->is_resume = stubp->ist_resume; + source->is_recurse = stubp->ist_recurse; + setgate(&idt[idt_vec], stubp->ist_entry, 0, SDT_SYS386IGT, + SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + } + + /* Re-enable interrupts locally. */ + x86_write_psl(psl); +} + void * intr_establish(int legacy_irq, struct pic *pic, int pin, int type, int level, int (*handler)(void *), void *arg, bool known_mpsafe) @@ -646,10 +687,10 @@ intr_establish(int legacy_irq, struct pi struct cpu_info *ci; int slot, error, idt_vec; struct intrsource *source; - struct intrstub *stubp; #ifdef MULTIPROCESSOR bool mpsafe = (known_mpsafe || level != IPL_VM); #endif /* MULTIPROCESSOR */ + uint64_t where; #ifdef DIAGNOSTIC if (legacy_irq != -1 && (legacy_irq < 0 || legacy_irq > 15)) @@ -659,18 +700,19 @@ intr_establish(int legacy_irq, struct pi panic("intr_establish: non-legacy IRQ on i8259"); #endif - error = intr_allocate_slot(pic, pin, level, &ci, &slot, - &idt_vec); - if (error != 0) { - printf("failed to allocate interrupt slot for PIC %s pin %d\n", - pic->pic_name, pin); + ih = kmem_alloc(sizeof(*ih), KM_SLEEP); + if (ih == NULL) { + printf("intr_establish: can't allocate handler info\n"); return NULL; } - /* no point in sleeping unless someone can free memory. */ - ih = malloc(sizeof *ih, M_DEVBUF, cold ? M_NOWAIT : M_WAITOK); - if (ih == NULL) { - printf("intr_establish: can't allocate malloc handler info\n"); + mutex_enter(&cpu_lock); + error = intr_allocate_slot(pic, pin, level, &ci, &slot, &idt_vec); + if (error != 0) { + mutex_exit(&cpu_lock); + kmem_free(ih, sizeof(*ih)); + printf("failed to allocate interrupt slot for PIC %s pin %d\n", + pic->pic_name, pin); return NULL; } @@ -678,15 +720,14 @@ intr_establish(int legacy_irq, struct pi if (source->is_handlers != NULL && source->is_pic->pic_type != pic->pic_type) { - free(ih, M_DEVBUF); + mutex_exit(&cpu_lock); + kmem_free(ih, sizeof(*ih)); printf("intr_establish: can't share intr source between " "different PIC types (legacy_irq %d pin %d slot %d)\n", legacy_irq, pin, slot); return NULL; } - mutex_enter(&x86_intr_lock); - source->is_pin = pin; source->is_pic = pic; @@ -698,23 +739,30 @@ intr_establish(int legacy_irq, struct pi case IST_LEVEL: if (source->is_type == type) break; + /* FALLTHROUGH */ case IST_PULSE: if (type != IST_NONE) { - mutex_exit(&x86_intr_lock); + mutex_exit(&cpu_lock); + kmem_free(ih, sizeof(*ih)); + intr_source_free(ci, slot, pic, idt_vec); printf("intr_establish: pic %s pin %d: can't share " "type %d with %d\n", pic->pic_name, pin, source->is_type, type); - free(ih, M_DEVBUF); return NULL; } break; default: - mutex_exit(&x86_intr_lock); panic("intr_establish: bad intr type %d for pic %s pin %d\n", source->is_type, pic->pic_name, pin); + /* NOTREACHED */ } - pic->pic_hwmask(pic, pin); + /* + * We're now committed. Mask the interrupt in hardware and + * count it for load distribution. + */ + (*pic->pic_hwmask)(pic, pin); + (ci->ci_nintrhand)++; /* * Figure out where to put the handler. @@ -723,11 +771,13 @@ intr_establish(int legacy_irq, struct pi */ for (p = &ci->ci_isources[slot]->is_handlers; (q = *p) != NULL && q->ih_level > level; - p = &q->ih_next) - ; + p = &q->ih_next) { + /* nothing */; + } ih->ih_fun = ih->ih_realfun = handler; ih->ih_arg = ih->ih_realarg = arg; + ih->ih_prevp = p; ih->ih_next = *p; ih->ih_level = level; ih->ih_pin = pin; @@ -739,27 +789,23 @@ intr_establish(int legacy_irq, struct pi ih->ih_arg = ih; } #endif /* MULTIPROCESSOR */ - *p = ih; - intr_calculatemasks(ci); - - mutex_exit(&x86_intr_lock); - - if (source->is_resume == NULL || source->is_idtvec != idt_vec) { - if (source->is_idtvec != 0 && source->is_idtvec != idt_vec) - idt_vec_free(source->is_idtvec); - source->is_idtvec = idt_vec; - stubp = type == IST_LEVEL ? - &pic->pic_level_stubs[slot] : &pic->pic_edge_stubs[slot]; - source->is_resume = stubp->ist_resume; - source->is_recurse = stubp->ist_recurse; - setgate(&idt[idt_vec], stubp->ist_entry, 0, SDT_SYS386IGT, - SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + /* + * Call out to the remote CPU to update its interrupt state. + * Only make RPCs if the APs are up and running. + */ + if (ci == curcpu() || !mp_online) { + intr_establish_xcall(ih, (void *)(intptr_t)idt_vec); + } else { + where = xc_unicast(0, intr_establish_xcall, ih, + (void *)(intptr_t)idt_vec, ci); + xc_wait(where); } - pic->pic_addroute(pic, ci, pin, idt_vec, type); - - pic->pic_hwunmask(pic, pin); + /* All set up, so add a route for the interrupt and unmask it. */ + (*pic->pic_addroute)(pic, ci, pin, idt_vec, type); + (*pic->pic_hwunmask)(pic, pin); + mutex_exit(&cpu_lock); #ifdef INTRDEBUG printf("allocated pic %s type %s pin %d level %d to %s slot %d " @@ -772,24 +818,37 @@ intr_establish(int legacy_irq, struct pi } /* - * Deregister an interrupt handler. + * Called on bound CPU to handle intr_disestablish(). + * + * => caller (on initiating CPU) holds cpu_lock on our behalf + * => arg1: struct intrhand *ih + * => arg2: unused */ -void -intr_disestablish(struct intrhand *ih) +static void +intr_disestablish_xcall(void *arg1, void *arg2) { struct intrhand **p, *q; struct cpu_info *ci; struct pic *pic; struct intrsource *source; + struct intrhand *ih; + u_long psl; int idtvec; + ih = arg1; ci = ih->ih_cpu; + + KASSERT(ci == curcpu() || !mp_online); + + /* Disable interrupts locally. */ + psl = x86_read_psl(); + x86_disable_intr(); + pic = ci->ci_isources[ih->ih_slot]->is_pic; source = ci->ci_isources[ih->ih_slot]; idtvec = source->is_idtvec; - mutex_enter(&x86_intr_lock); - pic->pic_hwmask(pic, ih->ih_pin); + (*pic->pic_hwmask)(pic, ih->ih_pin); atomic_and_32(&ci->ci_ipending, ~(1 << ih->ih_slot)); /* @@ -799,33 +858,56 @@ intr_disestablish(struct intrhand *ih) p = &q->ih_next) ; if (q == NULL) { - mutex_exit(&x86_intr_lock); + x86_write_psl(psl); panic("intr_disestablish: handler not registered"); + /* NOTREACHED */ } *p = q->ih_next; intr_calculatemasks(ci); - pic->pic_delroute(pic, ci, ih->ih_pin, idtvec, source->is_type); - pic->pic_hwunmask(pic, ih->ih_pin); + (*pic->pic_delroute)(pic, ci, ih->ih_pin, idtvec, source->is_type); + (*pic->pic_hwunmask)(pic, ih->ih_pin); + + /* Re-enable interrupts. */ + x86_write_psl(psl); + + /* If the source is free we can drop it now. */ + intr_source_free(ci, ih->ih_slot, pic, idtvec); #ifdef INTRDEBUG printf("%s: remove slot %d (pic %s pin %d vec %d)\n", device_xname(ci->ci_dev), ih->ih_slot, pic->pic_name, ih->ih_pin, idtvec); #endif +} - if (source->is_handlers == NULL) { - evcnt_detach(&source->is_evcnt); - free(source, M_DEVBUF); - ci->ci_isources[ih->ih_slot] = NULL; - if (pic != &i8259_pic) - idt_vec_free(idtvec); - } - - free(ih, M_DEVBUF); +/* + * Deregister an interrupt handler. + */ +void +intr_disestablish(struct intrhand *ih) +{ + struct cpu_info *ci; + uint64_t where; - mutex_exit(&x86_intr_lock); + /* + * Count the removal for load balancing. + * Call out to the remote CPU to update its interrupt state. + * Only make RPCs if the APs are up and running. + */ + mutex_enter(&cpu_lock); + ci = ih->ih_cpu; + (ci->ci_nintrhand)--; + KASSERT(ci->ci_nintrhand >= 0); + if (ci == curcpu() || !mp_online) { + intr_disestablish_xcall(ih, NULL); + } else { + where = xc_unicast(0, intr_disestablish_xcall, ih, NULL, ci); + xc_wait(where); + } + mutex_exit(&cpu_lock); + kmem_free(ih, sizeof(*ih)); } const char * @@ -844,26 +926,25 @@ intr_string(int ih) if (ih & APIC_INT_VIA_APIC) { pic = ioapic_find(APIC_IRQ_APIC(ih)); if (pic != NULL) { - sprintf(irqstr, "%s pin %d", + snprintf(irqstr, sizeof(irqstr), "%s pin %d", device_xname(pic->sc_dev), APIC_IRQ_PIN(ih)); } else { - sprintf(irqstr, "apic %d int %d (irq %d)", + snprintf(irqstr, sizeof(irqstr), + "apic %d int %d (irq %d)", APIC_IRQ_APIC(ih), APIC_IRQ_PIN(ih), ih&0xff); } } else - sprintf(irqstr, "irq %d", ih&0xff); + snprintf(irqstr, sizeof(irqstr), "irq %d", ih&0xff); #else - sprintf(irqstr, "irq %d", ih&0xff); + snprintf(irqstr, sizeof(irqstr), "irq %d", ih&0xff); #endif return (irqstr); } -#define CONCAT(x,y) __CONCAT(x,y) - /* * Fake interrupt handler structures for the benefit of symmetry with * other interrupt sources, and the benefit of intr_calculatemasks() @@ -906,31 +987,32 @@ cpu_intr_init(struct cpu_info *ci) struct intrsource *isp; #if NLAPIC > 0 && defined(MULTIPROCESSOR) int i; + static int first = 1; #endif #ifdef INTRSTACKSIZE vaddr_t istack; #endif #if NLAPIC > 0 - isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_WAITOK|M_ZERO); - if (isp == NULL) - panic("can't allocate fixed interrupt source"); + isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); + KASSERT(isp != NULL); isp->is_recurse = Xrecurse_lapic_ltimer; isp->is_resume = Xresume_lapic_ltimer; fake_timer_intrhand.ih_level = IPL_CLOCK; isp->is_handlers = &fake_timer_intrhand; isp->is_pic = &local_pic; ci->ci_isources[LIR_TIMER] = isp; - evcnt_attach_dynamic(&isp->is_evcnt, EVCNT_TYPE_MISC, NULL, + evcnt_attach_dynamic(&isp->is_evcnt, + first ? EVCNT_TYPE_INTR : EVCNT_TYPE_MISC, NULL, device_xname(ci->ci_dev), "timer"); + first = 0; #ifdef MULTIPROCESSOR - isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_WAITOK|M_ZERO); - if (isp == NULL) - panic("can't allocate fixed interrupt source"); + isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); + KASSERT(isp != NULL); isp->is_recurse = Xrecurse_lapic_ipi; isp->is_resume = Xresume_lapic_ipi; - fake_ipi_intrhand.ih_level = IPL_IPI; + fake_ipi_intrhand.ih_level = IPL_HIGH; isp->is_handlers = &fake_ipi_intrhand; isp->is_pic = &local_pic; ci->ci_isources[LIR_IPI] = isp; @@ -941,9 +1023,8 @@ cpu_intr_init(struct cpu_info *ci) #endif #endif - isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_WAITOK|M_ZERO); - if (isp == NULL) - panic("can't allocate fixed interrupt source"); + isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); + KASSERT(isp != NULL); isp->is_recurse = Xpreemptrecurse; isp->is_resume = Xpreemptresume; fake_preempt_intrhand.ih_level = IPL_PREEMPT; @@ -979,7 +1060,12 @@ cpu_intr_init(struct cpu_info *ci) ci->ci_idepth = -1; } -#ifdef INTRDEBUG +#if defined(INTRDEBUG) || defined(DDB) + +#ifdef DDB +#define printf db_printf +#endif + void intr_printconfig(void) { @@ -1009,6 +1095,9 @@ intr_printconfig(void) } } } +#ifdef DDB +#undef printf +#endif #endif void @@ -1020,9 +1109,8 @@ softint_init_md(lwp_t *l, u_int level, u ci = l->l_cpu; - isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_WAITOK|M_ZERO); - if (isp == NULL) - panic("can't allocate fixed interrupt source"); + isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); + KASSERT(isp != NULL); isp->is_recurse = Xsoftintr; isp->is_resume = Xsoftintr; isp->is_pic = &softintr_pic; @@ -1060,3 +1148,211 @@ softint_init_md(lwp_t *l, u_int level, u intr_calculatemasks(ci); } + +static void +intr_redistribute_xc_t(void *arg1, void *arg2) +{ + struct cpu_info *ci; + struct intrsource *isp; + int slot; + u_long psl; + + ci = curcpu(); + isp = arg1; + slot = (int)(intptr_t)arg2; + + /* Disable interrupts locally. */ + psl = x86_read_psl(); + x86_disable_intr(); + + /* Hook it in and re-calculate masks. */ + ci->ci_isources[slot] = isp; + intr_calculatemasks(curcpu()); + + /* Re-enable interrupts locally. */ + x86_write_psl(psl); +} + +static void +intr_redistribute_xc_s1(void *arg1, void *arg2) +{ + struct pic *pic; + struct intrsource *isp; + struct cpu_info *nci; + u_long psl; + + isp = arg1; + nci = arg2; + + /* + * Disable interrupts on-chip and mask the pin. Back out + * and let the interrupt be processed if one is pending. + */ + pic = isp->is_pic; + for (;;) { + psl = x86_read_psl(); + x86_disable_intr(); + if ((*pic->pic_trymask)(pic, isp->is_pin)) { + break; + } + x86_write_psl(psl); + DELAY(1000); + } + + /* pic_addroute will unmask the interrupt. */ + (*pic->pic_addroute)(pic, nci, isp->is_pin, isp->is_idtvec, + isp->is_type); + x86_write_psl(psl); +} + +static void +intr_redistribute_xc_s2(void *arg1, void *arg2) +{ + struct cpu_info *ci; + u_long psl; + int slot; + + ci = curcpu(); + slot = (int)(uintptr_t)arg1; + + /* Disable interrupts locally. */ + psl = x86_read_psl(); + x86_disable_intr(); + + /* Patch out the source and re-calculate masks. */ + ci->ci_isources[slot] = NULL; + intr_calculatemasks(ci); + + /* Re-enable interrupts locally. */ + x86_write_psl(psl); +} + +static bool +intr_redistribute(struct cpu_info *oci) +{ + struct intrsource *isp; + struct intrhand *ih; + CPU_INFO_ITERATOR cii; + struct cpu_info *nci, *ici; + int oslot, nslot; + uint64_t where; + + KASSERT(mutex_owned(&cpu_lock)); + + /* Look for an interrupt source that we can migrate. */ + for (oslot = 0; oslot < MAX_INTR_SOURCES; oslot++) { + if ((isp = oci->ci_isources[oslot]) == NULL) { + continue; + } + if (isp->is_pic->pic_type == PIC_IOAPIC) { + break; + } + } + if (oslot == MAX_INTR_SOURCES) { + return false; + } + + /* Find least loaded CPU and try to move there. */ + nci = NULL; + for (CPU_INFO_FOREACH(cii, ici)) { + if ((ici->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) { + continue; + } + KASSERT(ici != oci); + if (nci == NULL || nci->ci_nintrhand > ici->ci_nintrhand) { + nci = ici; + } + } + if (nci == NULL) { + return false; + } + for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) { + if (nci->ci_isources[nslot] == NULL) { + break; + } + } + + /* If that did not work, allocate anywhere. */ + if (nslot == MAX_INTR_SOURCES) { + for (CPU_INFO_FOREACH(cii, nci)) { + if ((nci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) { + continue; + } + KASSERT(nci != oci); + for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) { + if (nci->ci_isources[nslot] == NULL) { + break; + } + } + if (nslot != MAX_INTR_SOURCES) { + break; + } + } + } + if (nslot == MAX_INTR_SOURCES) { + return false; + } + + /* + * Now we have new CPU and new slot. Run a cross-call to set up + * the new vector on the target CPU. + */ + where = xc_unicast(0, intr_redistribute_xc_t, isp, + (void *)(intptr_t)nslot, nci); + xc_wait(where); + + /* + * We're ready to go on the target CPU. Run a cross call to + * reroute the interrupt away from the source CPU. + */ + where = xc_unicast(0, intr_redistribute_xc_s1, isp, nci, oci); + xc_wait(where); + + /* Sleep for (at least) 10ms to allow the change to take hold. */ + (void)kpause("intrdist", false, mstohz(10), NULL); + + /* Complete removal from the source CPU. */ + where = xc_unicast(0, intr_redistribute_xc_s2, + (void *)(uintptr_t)oslot, NULL, oci); + xc_wait(where); + + /* Finally, take care of book-keeping. */ + for (ih = isp->is_handlers; ih != NULL; ih = ih->ih_next) { + oci->ci_nintrhand--; + nci->ci_nintrhand++; + ih->ih_cpu = nci; + } + + return true; +} + +void +cpu_intr_redistribute(void) +{ + CPU_INFO_ITERATOR cii; + struct cpu_info *ci; + + KASSERT(mutex_owned(&cpu_lock)); + KASSERT(mp_online); + + /* Direct interrupts away from shielded CPUs. */ + for (CPU_INFO_FOREACH(cii, ci)) { + if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) == 0) { + continue; + } + while (intr_redistribute(ci)) { + /* nothing */ + } + } + + /* XXX should now re-balance */ +} + +u_int +cpu_intr_count(struct cpu_info *ci) +{ + + KASSERT(ci->ci_nintrhand >= 0); + + return ci->ci_nintrhand; +}