[BACK]Return to cpu.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / arch / x86 / x86

Annotation of src/sys/arch/x86/x86/cpu.c, Revision 1.191

1.191   ! msaitoh     1: /*     $NetBSD: cpu.c,v 1.190 2020/05/08 22:01:55 ad Exp $     */
1.2       ad          2:
1.134     maxv        3: /*
1.190     ad          4:  * Copyright (c) 2000-2020 NetBSD Foundation, Inc.
1.2       ad          5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
1.11      ad          8:  * by Bill Sommerfeld of RedBack Networks Inc, and by Andrew Doran.
1.2       ad          9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     20:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     21:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     22:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     23:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     24:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     25:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     26:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     27:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     28:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     29:  * POSSIBILITY OF SUCH DAMAGE.
                     30:  */
                     31:
                     32: /*
                     33:  * Copyright (c) 1999 Stefan Grefen
                     34:  *
                     35:  * Redistribution and use in source and binary forms, with or without
                     36:  * modification, are permitted provided that the following conditions
                     37:  * are met:
                     38:  * 1. Redistributions of source code must retain the above copyright
                     39:  *    notice, this list of conditions and the following disclaimer.
                     40:  * 2. Redistributions in binary form must reproduce the above copyright
                     41:  *    notice, this list of conditions and the following disclaimer in the
                     42:  *    documentation and/or other materials provided with the distribution.
                     43:  * 3. All advertising materials mentioning features or use of this software
                     44:  *    must display the following acknowledgement:
                     45:  *      This product includes software developed by the NetBSD
                     46:  *      Foundation, Inc. and its contributors.
                     47:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     48:  *    contributors may be used to endorse or promote products derived
                     49:  *    from this software without specific prior written permission.
                     50:  *
                     51:  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
                     52:  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     53:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     54:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
                     55:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     56:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     57:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     58:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     59:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     60:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     61:  * SUCH DAMAGE.
                     62:  */
                     63:
                     64: #include <sys/cdefs.h>
1.191   ! msaitoh    65: __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.190 2020/05/08 22:01:55 ad Exp $");
1.2       ad         66:
                     67: #include "opt_ddb.h"
                     68: #include "opt_mpbios.h"                /* for MPDEBUG */
                     69: #include "opt_mtrr.h"
1.101     kiyohara   70: #include "opt_multiprocessor.h"
1.144     maxv       71: #include "opt_svs.h"
1.2       ad         72:
                     73: #include "lapic.h"
                     74: #include "ioapic.h"
1.179     ad         75: #include "acpica.h"
1.190     ad         76: #include "hpet.h"
1.2       ad         77:
                     78: #include <sys/param.h>
                     79: #include <sys/proc.h>
                     80: #include <sys/systm.h>
                     81: #include <sys/device.h>
1.9       ad         82: #include <sys/cpu.h>
1.93      jruoho     83: #include <sys/cpufreq.h>
1.98      rmind      84: #include <sys/idle.h>
1.9       ad         85: #include <sys/atomic.h>
1.35      ad         86: #include <sys/reboot.h>
1.174     maxv       87: #include <sys/csan.h>
1.2       ad         88:
1.78      uebayasi   89: #include <uvm/uvm.h>
1.2       ad         90:
1.102     pgoyette   91: #include "acpica.h"            /* for NACPICA, for mp_verbose */
                     92:
1.187     bouyer     93: #include <x86/machdep.h>
1.2       ad         94: #include <machine/cpufunc.h>
                     95: #include <machine/cpuvar.h>
                     96: #include <machine/pmap.h>
                     97: #include <machine/vmparam.h>
1.102     pgoyette   98: #if defined(MULTIPROCESSOR)
1.2       ad         99: #include <machine/mpbiosvar.h>
1.101     kiyohara  100: #endif
1.102     pgoyette  101: #include <machine/mpconfig.h>          /* for mp_verbose */
1.2       ad        102: #include <machine/pcb.h>
                    103: #include <machine/specialreg.h>
                    104: #include <machine/segments.h>
                    105: #include <machine/gdt.h>
                    106: #include <machine/mtrr.h>
                    107: #include <machine/pio.h>
1.38      ad        108: #include <machine/cpu_counter.h>
1.2       ad        109:
1.109     dsl       110: #include <x86/fpu.h>
                    111:
1.179     ad        112: #if NACPICA > 0
                    113: #include <dev/acpi/acpi_srat.h>
                    114: #endif
                    115:
1.101     kiyohara  116: #if NLAPIC > 0
1.2       ad        117: #include <machine/apicvar.h>
                    118: #include <machine/i82489reg.h>
                    119: #include <machine/i82489var.h>
1.101     kiyohara  120: #endif
1.2       ad        121:
                    122: #include <dev/ic/mc146818reg.h>
1.190     ad        123: #include <dev/ic/hpetvar.h>
1.2       ad        124: #include <i386/isa/nvram.h>
                    125: #include <dev/isa/isareg.h>
                    126:
1.38      ad        127: #include "tsc.h"
                    128:
1.187     bouyer    129: #ifndef XENPV
1.178     nonaka    130: #include "hyperv.h"
                    131: #if NHYPERV > 0
                    132: #include <x86/x86/hypervvar.h>
                    133: #endif
                    134: #endif
                    135:
1.187     bouyer    136: #ifdef XEN
                    137: #include <xen/hypervisor.h>
                    138: #endif
                    139:
1.87      jruoho    140: static int     cpu_match(device_t, cfdata_t, void *);
                    141: static void    cpu_attach(device_t, device_t, void *);
                    142: static void    cpu_defer(device_t);
                    143: static int     cpu_rescan(device_t, const char *, const int *);
                    144: static void    cpu_childdetached(device_t, device_t);
1.96      jruoho    145: static bool    cpu_stop(device_t);
1.69      dyoung    146: static bool    cpu_suspend(device_t, const pmf_qual_t *);
                    147: static bool    cpu_resume(device_t, const pmf_qual_t *);
1.79      jruoho    148: static bool    cpu_shutdown(device_t, int);
1.12      jmcneill  149:
1.2       ad        150: struct cpu_softc {
1.23      cube      151:        device_t sc_dev;                /* device tree glue */
1.2       ad        152:        struct cpu_info *sc_info;       /* pointer to CPU info */
1.20      jmcneill  153:        bool sc_wasonline;
1.2       ad        154: };
                    155:
1.101     kiyohara  156: #ifdef MULTIPROCESSOR
1.120     msaitoh   157: int mp_cpu_start(struct cpu_info *, paddr_t);
1.2       ad        158: void mp_cpu_start_cleanup(struct cpu_info *);
                    159: const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL,
                    160:                                            mp_cpu_start_cleanup };
1.101     kiyohara  161: #endif
1.2       ad        162:
                    163:
1.81      jmcneill  164: CFATTACH_DECL2_NEW(cpu, sizeof(struct cpu_softc),
                    165:     cpu_match, cpu_attach, NULL, NULL, cpu_rescan, cpu_childdetached);
1.2       ad        166:
                    167: /*
                    168:  * Statically-allocated CPU info for the primary CPU (or the only
                    169:  * CPU, on uniprocessors).  The CPU info list is initialized to
                    170:  * point at it.
                    171:  */
1.21      ad        172: struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = {
1.2       ad        173:        .ci_dev = 0,
                    174:        .ci_self = &cpu_info_primary,
                    175:        .ci_idepth = -1,
                    176:        .ci_curlwp = &lwp0,
1.43      ad        177:        .ci_curldt = -1,
1.2       ad        178: };
                    179:
                    180: struct cpu_info *cpu_info_list = &cpu_info_primary;
                    181:
                    182: #ifdef i386
1.134     maxv      183: void           cpu_set_tss_gates(struct cpu_info *);
1.2       ad        184: #endif
                    185:
1.12      jmcneill  186: static void    cpu_init_idle_lwp(struct cpu_info *);
                    187:
1.122     maxv      188: uint32_t cpu_feature[7] __read_mostly; /* X86 CPUID feature bits */
1.117     maxv      189:                        /* [0] basic features cpuid.1:%edx
                    190:                         * [1] basic features cpuid.1:%ecx (CPUID2_xxx bits)
                    191:                         * [2] extended features cpuid:80000001:%edx
                    192:                         * [3] extended features cpuid:80000001:%ecx
                    193:                         * [4] VIA padlock features
                    194:                         * [5] structured extended features cpuid.7:%ebx
                    195:                         * [6] structured extended features cpuid.7:%ecx
                    196:                         */
1.70      jym       197:
1.101     kiyohara  198: #ifdef MULTIPROCESSOR
1.12      jmcneill  199: bool x86_mp_online;
                    200: paddr_t mp_trampoline_paddr = MP_TRAMPOLINE;
1.101     kiyohara  201: #endif
                    202: #if NLAPIC > 0
1.14      joerg     203: static vaddr_t cmos_data_mapping;
1.101     kiyohara  204: #endif
1.45      ad        205: struct cpu_info *cpu_starting;
1.2       ad        206:
1.101     kiyohara  207: #ifdef MULTIPROCESSOR
1.184     msaitoh   208: void           cpu_hatch(void *);
                    209: static void    cpu_boot_secondary(struct cpu_info *ci);
                    210: static void    cpu_start_secondary(struct cpu_info *ci);
1.101     kiyohara  211: #if NLAPIC > 0
1.136     maxv      212: static void    cpu_copy_trampoline(paddr_t);
1.101     kiyohara  213: #endif
1.164     cherry    214: #endif /* MULTIPROCESSOR */
1.2       ad        215:
                    216: /*
                    217:  * Runs once per boot once multiprocessor goo has been detected and
                    218:  * the local APIC on the boot processor has been mapped.
                    219:  *
                    220:  * Called from lapic_boot_init() (from mpbios_scan()).
                    221:  */
1.101     kiyohara  222: #if NLAPIC > 0
1.2       ad        223: void
1.9       ad        224: cpu_init_first(void)
1.2       ad        225: {
                    226:
1.45      ad        227:        cpu_info_primary.ci_cpuid = lapic_cpu_number();
1.14      joerg     228:
                    229:        cmos_data_mapping = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_VAONLY);
                    230:        if (cmos_data_mapping == 0)
                    231:                panic("No KVA for page 0");
1.64      cegger    232:        pmap_kenter_pa(cmos_data_mapping, 0, VM_PROT_READ|VM_PROT_WRITE, 0);
1.14      joerg     233:        pmap_update(pmap_kernel());
1.2       ad        234: }
1.101     kiyohara  235: #endif
1.2       ad        236:
1.87      jruoho    237: static int
1.23      cube      238: cpu_match(device_t parent, cfdata_t match, void *aux)
1.2       ad        239: {
                    240:
                    241:        return 1;
                    242: }
                    243:
1.142     maxv      244: #ifdef __HAVE_PCPU_AREA
                    245: void
                    246: cpu_pcpuarea_init(struct cpu_info *ci)
                    247: {
                    248:        struct vm_page *pg;
                    249:        size_t i, npages;
                    250:        vaddr_t base, va;
                    251:        paddr_t pa;
                    252:
                    253:        CTASSERT(sizeof(struct pcpu_entry) % PAGE_SIZE == 0);
                    254:
                    255:        npages = sizeof(struct pcpu_entry) / PAGE_SIZE;
                    256:        base = (vaddr_t)&pcpuarea->ent[cpu_index(ci)];
                    257:
                    258:        for (i = 0; i < npages; i++) {
                    259:                pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
                    260:                if (pg == NULL) {
                    261:                        panic("failed to allocate pcpu PA");
                    262:                }
                    263:
                    264:                va = base + i * PAGE_SIZE;
                    265:                pa = VM_PAGE_TO_PHYS(pg);
                    266:
                    267:                pmap_kenter_pa(va, pa, VM_PROT_READ|VM_PROT_WRITE, 0);
                    268:        }
                    269:
                    270:        pmap_update(pmap_kernel());
                    271: }
                    272: #endif
                    273:
1.2       ad        274: static void
                    275: cpu_vm_init(struct cpu_info *ci)
                    276: {
                    277:        int ncolors = 2, i;
                    278:
                    279:        for (i = CAI_ICACHE; i <= CAI_L2CACHE; i++) {
                    280:                struct x86_cache_info *cai;
                    281:                int tcolors;
                    282:
                    283:                cai = &ci->ci_cinfo[i];
                    284:
                    285:                tcolors = atop(cai->cai_totalsize);
1.184     msaitoh   286:                switch (cai->cai_associativity) {
1.2       ad        287:                case 0xff:
                    288:                        tcolors = 1; /* fully associative */
                    289:                        break;
                    290:                case 0:
                    291:                case 1:
                    292:                        break;
                    293:                default:
                    294:                        tcolors /= cai->cai_associativity;
                    295:                }
1.161     riastrad  296:                ncolors = uimax(ncolors, tcolors);
1.32      tls       297:                /*
                    298:                 * If the desired number of colors is not a power of
                    299:                 * two, it won't be good.  Find the greatest power of
                    300:                 * two which is an even divisor of the number of colors,
                    301:                 * to preserve even coloring of pages.
                    302:                 */
                    303:                if (ncolors & (ncolors - 1) ) {
                    304:                        int try, picked = 1;
                    305:                        for (try = 1; try < ncolors; try *= 2) {
                    306:                                if (ncolors % try == 0) picked = try;
                    307:                        }
                    308:                        if (picked == 1) {
                    309:                                panic("desired number of cache colors %d is "
1.184     msaitoh   310:                                " > 1, but not even!", ncolors);
1.32      tls       311:                        }
                    312:                        ncolors = picked;
                    313:                }
1.2       ad        314:        }
                    315:
                    316:        /*
1.94      mrg       317:         * Knowing the size of the largest cache on this CPU, potentially
                    318:         * re-color our pages.
1.2       ad        319:         */
1.52      ad        320:        aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors);
1.2       ad        321:        uvm_page_recolor(ncolors);
1.98      rmind     322:
                    323:        pmap_tlb_cpu_init(ci);
1.123     maxv      324: #ifndef __HAVE_DIRECT_MAP
                    325:        pmap_vpage_cpu_init(ci);
                    326: #endif
1.2       ad        327: }
                    328:
1.87      jruoho    329: static void
1.23      cube      330: cpu_attach(device_t parent, device_t self, void *aux)
1.2       ad        331: {
1.23      cube      332:        struct cpu_softc *sc = device_private(self);
1.2       ad        333:        struct cpu_attach_args *caa = aux;
                    334:        struct cpu_info *ci;
1.21      ad        335:        uintptr_t ptr;
1.101     kiyohara  336: #if NLAPIC > 0
1.2       ad        337:        int cpunum = caa->cpu_number;
1.101     kiyohara  338: #endif
1.51      ad        339:        static bool again;
1.2       ad        340:
1.23      cube      341:        sc->sc_dev = self;
                    342:
1.163     cherry    343:        if (ncpu > maxcpus) {
1.98      rmind     344: #ifndef _LP64
                    345:                aprint_error(": too many CPUs, please use NetBSD/amd64\n");
                    346: #else
                    347:                aprint_error(": too many CPUs\n");
                    348: #endif
1.48      ad        349:                return;
                    350:        }
                    351:
1.2       ad        352:        /*
                    353:         * If we're an Application Processor, allocate a cpu_info
                    354:         * structure, otherwise use the primary's.
                    355:         */
                    356:        if (caa->cpu_role == CPU_ROLE_AP) {
1.36      ad        357:                if ((boothowto & RB_MD1) != 0) {
1.35      ad        358:                        aprint_error(": multiprocessor boot disabled\n");
1.56      jmcneill  359:                        if (!pmf_device_register(self, NULL, NULL))
                    360:                                aprint_error_dev(self,
                    361:                                    "couldn't establish power handler\n");
1.35      ad        362:                        return;
                    363:                }
1.2       ad        364:                aprint_naive(": Application Processor\n");
1.143     maxv      365:                ptr = (uintptr_t)uvm_km_alloc(kernel_map,
                    366:                    sizeof(*ci) + CACHE_LINE_SIZE - 1, 0,
                    367:                    UVM_KMF_WIRED|UVM_KMF_ZERO);
1.67      jym       368:                ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE);
1.43      ad        369:                ci->ci_curldt = -1;
1.2       ad        370:        } else {
                    371:                aprint_naive(": %s Processor\n",
                    372:                    caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot");
                    373:                ci = &cpu_info_primary;
1.101     kiyohara  374: #if NLAPIC > 0
1.2       ad        375:                if (cpunum != lapic_cpu_number()) {
1.51      ad        376:                        /* XXX should be done earlier. */
1.39      ad        377:                        uint32_t reg;
                    378:                        aprint_verbose("\n");
1.47      ad        379:                        aprint_verbose_dev(self, "running CPU at apic %d"
                    380:                            " instead of at expected %d", lapic_cpu_number(),
1.23      cube      381:                            cpunum);
1.125     nonaka    382:                        reg = lapic_readreg(LAPIC_ID);
                    383:                        lapic_writereg(LAPIC_ID, (reg & ~LAPIC_ID_MASK) |
1.39      ad        384:                            (cpunum << LAPIC_ID_SHIFT));
1.2       ad        385:                }
1.47      ad        386:                if (cpunum != lapic_cpu_number()) {
                    387:                        aprint_error_dev(self, "unable to reset apic id\n");
                    388:                }
1.101     kiyohara  389: #endif
1.2       ad        390:        }
                    391:
                    392:        ci->ci_self = ci;
                    393:        sc->sc_info = ci;
                    394:        ci->ci_dev = self;
1.74      jruoho    395:        ci->ci_acpiid = caa->cpu_id;
1.42      ad        396:        ci->ci_cpuid = caa->cpu_number;
1.2       ad        397:        ci->ci_func = caa->cpu_func;
1.177     maxv      398:        ci->ci_kfpu_spl = -1;
1.112     msaitoh   399:        aprint_normal("\n");
1.2       ad        400:
1.55      ad        401:        /* Must be before mi_cpu_attach(). */
                    402:        cpu_vm_init(ci);
                    403:
1.2       ad        404:        if (caa->cpu_role == CPU_ROLE_AP) {
                    405:                int error;
                    406:
                    407:                error = mi_cpu_attach(ci);
                    408:                if (error != 0) {
1.47      ad        409:                        aprint_error_dev(self,
1.30      cegger    410:                            "mi_cpu_attach failed with %d\n", error);
1.2       ad        411:                        return;
                    412:                }
1.142     maxv      413: #ifdef __HAVE_PCPU_AREA
                    414:                cpu_pcpuarea_init(ci);
                    415: #endif
1.15      yamt      416:                cpu_init_tss(ci);
1.2       ad        417:        } else {
                    418:                KASSERT(ci->ci_data.cpu_idlelwp != NULL);
1.179     ad        419: #if NACPICA > 0
                    420:                /* Parse out NUMA info for cpu_identify(). */
                    421:                acpisrat_init();
                    422: #endif
1.2       ad        423:        }
                    424:
1.146     maxv      425: #ifdef SVS
                    426:        cpu_svs_init(ci);
                    427: #endif
                    428:
1.2       ad        429:        pmap_reference(pmap_kernel());
                    430:        ci->ci_pmap = pmap_kernel();
                    431:        ci->ci_tlbstate = TLBSTATE_STALE;
                    432:
1.51      ad        433:        /*
                    434:         * Boot processor may not be attached first, but the below
                    435:         * must be done to allow booting other processors.
                    436:         */
                    437:        if (!again) {
1.190     ad        438:                /* Make sure DELAY() (likely i8254_delay()) is initialized. */
                    439:                DELAY(1);
                    440:
                    441:                /*
                    442:                 * Basic init.  Compute an approximate frequency for the TSC
                    443:                 * using the i8254.  If there's a HPET we'll redo it later.
                    444:                 */
1.188     ad        445:                atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY);
1.2       ad        446:                cpu_intr_init(ci);
1.40      ad        447:                cpu_get_tsc_freq(ci);
1.2       ad        448:                cpu_init(ci);
1.134     maxv      449: #ifdef i386
1.2       ad        450:                cpu_set_tss_gates(ci);
1.134     maxv      451: #endif
1.2       ad        452:                pmap_cpu_init_late(ci);
1.101     kiyohara  453: #if NLAPIC > 0
1.51      ad        454:                if (caa->cpu_role != CPU_ROLE_SP) {
                    455:                        /* Enable lapic. */
                    456:                        lapic_enable();
                    457:                        lapic_set_lvt();
1.189     bouyer    458:                        if (!vm_guest_is_xenpvh_or_pvhvm())
1.187     bouyer    459:                                lapic_calibrate_timer(ci);
1.51      ad        460:                }
1.101     kiyohara  461: #endif
1.174     maxv      462:                kcsan_cpu_init(ci);
1.51      ad        463:                again = true;
                    464:        }
                    465:
                    466:        /* further PCB init done later. */
                    467:
                    468:        switch (caa->cpu_role) {
                    469:        case CPU_ROLE_SP:
                    470:                atomic_or_32(&ci->ci_flags, CPUF_SP);
                    471:                cpu_identify(ci);
1.53      ad        472:                x86_errata();
1.37      joerg     473:                x86_cpu_idle_init();
1.187     bouyer    474:                (*x86_cpu_initclock_func)();
                    475: #ifdef XENPVHVM
                    476:                xen_hvm_init_cpu(ci);
                    477: #endif
1.2       ad        478:                break;
                    479:
                    480:        case CPU_ROLE_BP:
1.51      ad        481:                atomic_or_32(&ci->ci_flags, CPUF_BSP);
1.40      ad        482:                cpu_identify(ci);
1.53      ad        483:                x86_errata();
1.37      joerg     484:                x86_cpu_idle_init();
1.187     bouyer    485: #ifdef XENPVHVM
                    486:                xen_hvm_init_cpu(ci);
                    487: #endif
                    488:                (*x86_cpu_initclock_func)();
1.2       ad        489:                break;
                    490:
1.101     kiyohara  491: #ifdef MULTIPROCESSOR
1.2       ad        492:        case CPU_ROLE_AP:
                    493:                /*
                    494:                 * report on an AP
                    495:                 */
                    496:                cpu_intr_init(ci);
                    497:                gdt_alloc_cpu(ci);
1.134     maxv      498: #ifdef i386
1.2       ad        499:                cpu_set_tss_gates(ci);
1.134     maxv      500: #endif
1.2       ad        501:                pmap_cpu_init_late(ci);
                    502:                cpu_start_secondary(ci);
                    503:                if (ci->ci_flags & CPUF_PRESENT) {
1.59      cegger    504:                        struct cpu_info *tmp;
                    505:
1.40      ad        506:                        cpu_identify(ci);
1.59      cegger    507:                        tmp = cpu_info_list;
                    508:                        while (tmp->ci_next)
                    509:                                tmp = tmp->ci_next;
                    510:
                    511:                        tmp->ci_next = ci;
1.2       ad        512:                }
                    513:                break;
1.101     kiyohara  514: #endif
1.2       ad        515:
                    516:        default:
                    517:                panic("unknown processor type??\n");
                    518:        }
1.51      ad        519:
1.71      cegger    520:        pat_init(ci);
1.2       ad        521:
1.79      jruoho    522:        if (!pmf_device_register1(self, cpu_suspend, cpu_resume, cpu_shutdown))
1.12      jmcneill  523:                aprint_error_dev(self, "couldn't establish power handler\n");
                    524:
1.101     kiyohara  525: #ifdef MULTIPROCESSOR
1.2       ad        526:        if (mp_verbose) {
                    527:                struct lwp *l = ci->ci_data.cpu_idlelwp;
1.65      rmind     528:                struct pcb *pcb = lwp_getpcb(l);
1.2       ad        529:
1.47      ad        530:                aprint_verbose_dev(self,
1.28      cegger    531:                    "idle lwp at %p, idle sp at %p\n",
                    532:                    l,
1.2       ad        533: #ifdef i386
1.65      rmind     534:                    (void *)pcb->pcb_esp
1.2       ad        535: #else
1.65      rmind     536:                    (void *)pcb->pcb_rsp
1.2       ad        537: #endif
                    538:                );
                    539:        }
1.101     kiyohara  540: #endif
1.81      jmcneill  541:
1.89      jruoho    542:        /*
                    543:         * Postpone the "cpufeaturebus" scan.
                    544:         * It is safe to scan the pseudo-bus
                    545:         * only after all CPUs have attached.
                    546:         */
1.87      jruoho    547:        (void)config_defer(self, cpu_defer);
                    548: }
                    549:
                    550: static void
                    551: cpu_defer(device_t self)
                    552: {
1.81      jmcneill  553:        cpu_rescan(self, NULL, NULL);
                    554: }
                    555:
1.87      jruoho    556: static int
1.81      jmcneill  557: cpu_rescan(device_t self, const char *ifattr, const int *locators)
                    558: {
1.83      jruoho    559:        struct cpu_softc *sc = device_private(self);
1.81      jmcneill  560:        struct cpufeature_attach_args cfaa;
                    561:        struct cpu_info *ci = sc->sc_info;
                    562:
1.181     pgoyette  563:        /*
                    564:         * If we booted with RB_MD1 to disable multiprocessor, the
                    565:         * auto-configuration data still contains the additional
                    566:         * CPUs.   But their initialization was mostly bypassed
                    567:         * during attach, so we have to make sure we don't look at
                    568:         * their featurebus info, since it wasn't retrieved.
                    569:         */
                    570:        if (ci == NULL)
                    571:                return 0;
                    572:
1.81      jmcneill  573:        memset(&cfaa, 0, sizeof(cfaa));
                    574:        cfaa.ci = ci;
                    575:
                    576:        if (ifattr_match(ifattr, "cpufeaturebus")) {
1.83      jruoho    577:                if (ci->ci_frequency == NULL) {
1.86      jruoho    578:                        cfaa.name = "frequency";
1.84      jruoho    579:                        ci->ci_frequency = config_found_ia(self,
                    580:                            "cpufeaturebus", &cfaa, NULL);
                    581:                }
                    582:
1.81      jmcneill  583:                if (ci->ci_padlock == NULL) {
                    584:                        cfaa.name = "padlock";
                    585:                        ci->ci_padlock = config_found_ia(self,
                    586:                            "cpufeaturebus", &cfaa, NULL);
                    587:                }
1.82      jruoho    588:
1.86      jruoho    589:                if (ci->ci_temperature == NULL) {
                    590:                        cfaa.name = "temperature";
                    591:                        ci->ci_temperature = config_found_ia(self,
1.85      jruoho    592:                            "cpufeaturebus", &cfaa, NULL);
                    593:                }
1.95      jmcneill  594:
                    595:                if (ci->ci_vm == NULL) {
                    596:                        cfaa.name = "vm";
                    597:                        ci->ci_vm = config_found_ia(self,
                    598:                            "cpufeaturebus", &cfaa, NULL);
                    599:                }
1.81      jmcneill  600:        }
                    601:
                    602:        return 0;
                    603: }
                    604:
1.87      jruoho    605: static void
1.81      jmcneill  606: cpu_childdetached(device_t self, device_t child)
                    607: {
                    608:        struct cpu_softc *sc = device_private(self);
                    609:        struct cpu_info *ci = sc->sc_info;
                    610:
1.83      jruoho    611:        if (ci->ci_frequency == child)
                    612:                ci->ci_frequency = NULL;
1.82      jruoho    613:
1.81      jmcneill  614:        if (ci->ci_padlock == child)
                    615:                ci->ci_padlock = NULL;
1.83      jruoho    616:
1.86      jruoho    617:        if (ci->ci_temperature == child)
                    618:                ci->ci_temperature = NULL;
1.95      jmcneill  619:
                    620:        if (ci->ci_vm == child)
                    621:                ci->ci_vm = NULL;
1.2       ad        622: }
                    623:
                    624: /*
                    625:  * Initialize the processor appropriately.
                    626:  */
                    627:
                    628: void
1.9       ad        629: cpu_init(struct cpu_info *ci)
1.2       ad        630: {
1.141     maxv      631:        extern int x86_fpu_save;
1.113     christos  632:        uint32_t cr4 = 0;
1.2       ad        633:
                    634:        lcr0(rcr0() | CR0_WP);
                    635:
1.169     maxv      636:        /* If global TLB caching is supported, enable it */
1.70      jym       637:        if (cpu_feature[0] & CPUID_PGE)
1.169     maxv      638:                cr4 |= CR4_PGE;
1.2       ad        639:
                    640:        /*
                    641:         * If we have FXSAVE/FXRESTOR, use them.
                    642:         */
1.70      jym       643:        if (cpu_feature[0] & CPUID_FXSR) {
1.110     dsl       644:                cr4 |= CR4_OSFXSR;
1.2       ad        645:
                    646:                /*
                    647:                 * If we have SSE/SSE2, enable XMM exceptions.
                    648:                 */
1.70      jym       649:                if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2))
1.110     dsl       650:                        cr4 |= CR4_OSXMMEXCPT;
1.2       ad        651:        }
                    652:
1.110     dsl       653:        /* If xsave is supported, enable it */
                    654:        if (cpu_feature[1] & CPUID2_XSAVE)
                    655:                cr4 |= CR4_OSXSAVE;
                    656:
1.118     maxv      657:        /* If SMEP is supported, enable it */
                    658:        if (cpu_feature[5] & CPUID_SEF_SMEP)
                    659:                cr4 |= CR4_SMEP;
                    660:
1.137     maxv      661:        /* If SMAP is supported, enable it */
                    662:        if (cpu_feature[5] & CPUID_SEF_SMAP)
                    663:                cr4 |= CR4_SMAP;
                    664:
1.171     maxv      665: #ifdef SVS
                    666:        /* If PCID is supported, enable it */
                    667:        if (svs_pcid)
                    668:                cr4 |= CR4_PCIDE;
                    669: #endif
                    670:
1.113     christos  671:        if (cr4) {
                    672:                cr4 |= rcr4();
                    673:                lcr4(cr4);
                    674:        }
1.110     dsl       675:
1.145     msaitoh   676:        /*
                    677:         * Changing CR4 register may change cpuid values. For example, setting
                    678:         * CR4_OSXSAVE sets CPUID2_OSXSAVE. The CPUID2_OSXSAVE is in
                    679:         * ci_feat_val[1], so update it.
                    680:         * XXX Other than ci_feat_val[1] might be changed.
                    681:         */
                    682:        if (cpuid_level >= 1) {
                    683:                u_int descs[4];
                    684:
                    685:                x86_cpuid(1, descs);
                    686:                ci->ci_feat_val[1] = descs[2];
                    687:        }
                    688:
1.141     maxv      689:        if (x86_fpu_save >= FPU_SAVE_FXSAVE) {
1.158     maxv      690:                fpuinit_mxcsr_mask();
1.141     maxv      691:        }
                    692:
1.110     dsl       693:        /* If xsave is enabled, enable all fpu features */
                    694:        if (cr4 & CR4_OSXSAVE)
                    695:                wrxcr(0, x86_xsave_features & XCR0_FPU);
                    696:
1.2       ad        697: #ifdef MTRR
                    698:        /*
                    699:         * On a P6 or above, initialize MTRR's if the hardware supports them.
                    700:         */
1.70      jym       701:        if (cpu_feature[0] & CPUID_MTRR) {
1.2       ad        702:                if ((ci->ci_flags & CPUF_AP) == 0)
                    703:                        i686_mtrr_init_first();
                    704:                mtrr_init_cpu(ci);
                    705:        }
                    706:
                    707: #ifdef i386
                    708:        if (strcmp((char *)(ci->ci_vendor), "AuthenticAMD") == 0) {
                    709:                /*
                    710:                 * Must be a K6-2 Step >= 7 or a K6-III.
                    711:                 */
1.106     msaitoh   712:                if (CPUID_TO_FAMILY(ci->ci_signature) == 5) {
                    713:                        if (CPUID_TO_MODEL(ci->ci_signature) > 8 ||
                    714:                            (CPUID_TO_MODEL(ci->ci_signature) == 8 &&
                    715:                             CPUID_TO_STEPPING(ci->ci_signature) >= 7)) {
1.2       ad        716:                                mtrr_funcs = &k6_mtrr_funcs;
                    717:                                k6_mtrr_init_first();
                    718:                                mtrr_init_cpu(ci);
                    719:                        }
                    720:                }
                    721:        }
                    722: #endif /* i386 */
                    723: #endif /* MTRR */
                    724:
1.38      ad        725:        if (ci != &cpu_info_primary) {
1.150     maxv      726:                /* Synchronize TSC */
1.38      ad        727:                atomic_or_32(&ci->ci_flags, CPUF_RUNNING);
                    728:                tsc_sync_ap(ci);
                    729:        } else {
                    730:                atomic_or_32(&ci->ci_flags, CPUF_RUNNING);
                    731:        }
1.2       ad        732: }
                    733:
1.101     kiyohara  734: #ifdef MULTIPROCESSOR
1.2       ad        735: void
1.12      jmcneill  736: cpu_boot_secondary_processors(void)
1.2       ad        737: {
                    738:        struct cpu_info *ci;
1.100     chs       739:        kcpuset_t *cpus;
1.2       ad        740:        u_long i;
                    741:
1.190     ad        742: #if NHPET > 0
                    743:        /* Use HPET delay, and re-calibrate TSC on boot CPU using HPET. */
                    744:        if (hpet_delay_p() && x86_delay == i8254_delay) {
                    745:                delay_func = x86_delay = hpet_delay;
                    746:                cpu_get_tsc_freq(curcpu());
                    747:        }
                    748: #endif
                    749:
1.5       ad        750:        /* Now that we know the number of CPUs, patch the text segment. */
1.60      ad        751:        x86_patch(false);
1.5       ad        752:
1.179     ad        753: #if NACPICA > 0
                    754:        /* Finished with NUMA info for now. */
                    755:        acpisrat_exit();
                    756: #endif
                    757:
1.100     chs       758:        kcpuset_create(&cpus, true);
                    759:        kcpuset_set(cpus, cpu_index(curcpu()));
                    760:        for (i = 0; i < maxcpus; i++) {
1.57      ad        761:                ci = cpu_lookup(i);
1.2       ad        762:                if (ci == NULL)
                    763:                        continue;
                    764:                if (ci->ci_data.cpu_idlelwp == NULL)
                    765:                        continue;
                    766:                if ((ci->ci_flags & CPUF_PRESENT) == 0)
                    767:                        continue;
                    768:                if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY))
                    769:                        continue;
                    770:                cpu_boot_secondary(ci);
1.100     chs       771:                kcpuset_set(cpus, cpu_index(ci));
1.2       ad        772:        }
1.100     chs       773:        while (!kcpuset_match(cpus, kcpuset_running))
                    774:                ;
                    775:        kcpuset_destroy(cpus);
1.2       ad        776:
                    777:        x86_mp_online = true;
1.38      ad        778:
                    779:        /* Now that we know about the TSC, attach the timecounter. */
                    780:        tsc_tc_init();
1.55      ad        781:
                    782:        /* Enable zeroing of pages in the idle loop if we have SSE2. */
1.175     ad        783:        vm_page_zero_enable = false; /* ((cpu_feature[0] & CPUID_SSE2) != 0); */
1.2       ad        784: }
1.101     kiyohara  785: #endif
1.2       ad        786:
                    787: static void
                    788: cpu_init_idle_lwp(struct cpu_info *ci)
                    789: {
                    790:        struct lwp *l = ci->ci_data.cpu_idlelwp;
1.65      rmind     791:        struct pcb *pcb = lwp_getpcb(l);
1.2       ad        792:
                    793:        pcb->pcb_cr0 = rcr0();
                    794: }
                    795:
                    796: void
1.12      jmcneill  797: cpu_init_idle_lwps(void)
1.2       ad        798: {
                    799:        struct cpu_info *ci;
                    800:        u_long i;
                    801:
1.54      ad        802:        for (i = 0; i < maxcpus; i++) {
1.57      ad        803:                ci = cpu_lookup(i);
1.2       ad        804:                if (ci == NULL)
                    805:                        continue;
                    806:                if (ci->ci_data.cpu_idlelwp == NULL)
                    807:                        continue;
                    808:                if ((ci->ci_flags & CPUF_PRESENT) == 0)
                    809:                        continue;
                    810:                cpu_init_idle_lwp(ci);
                    811:        }
                    812: }
                    813:
1.101     kiyohara  814: #ifdef MULTIPROCESSOR
1.2       ad        815: void
1.12      jmcneill  816: cpu_start_secondary(struct cpu_info *ci)
1.2       ad        817: {
1.38      ad        818:        u_long psl;
1.2       ad        819:        int i;
                    820:
1.165     cherry    821: #if NLAPIC > 0
                    822:        paddr_t mp_pdirpa;
1.12      jmcneill  823:        mp_pdirpa = pmap_init_tmp_pgtbl(mp_trampoline_paddr);
1.136     maxv      824:        cpu_copy_trampoline(mp_pdirpa);
1.165     cherry    825: #endif
1.136     maxv      826:
1.9       ad        827:        atomic_or_32(&ci->ci_flags, CPUF_AP);
1.2       ad        828:        ci->ci_curlwp = ci->ci_data.cpu_idlelwp;
1.45      ad        829:        if (CPU_STARTUP(ci, mp_trampoline_paddr) != 0) {
1.25      ad        830:                return;
1.45      ad        831:        }
1.2       ad        832:
                    833:        /*
1.50      ad        834:         * Wait for it to become ready.   Setting cpu_starting opens the
                    835:         * initial gate and allows the AP to start soft initialization.
1.2       ad        836:         */
1.50      ad        837:        KASSERT(cpu_starting == NULL);
                    838:        cpu_starting = ci;
1.26      cegger    839:        for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) {
1.189     bouyer    840:                delay_func(10);
1.2       ad        841:        }
1.38      ad        842:
1.9       ad        843:        if ((ci->ci_flags & CPUF_PRESENT) == 0) {
1.26      cegger    844:                aprint_error_dev(ci->ci_dev, "failed to become ready\n");
1.2       ad        845: #if defined(MPDEBUG) && defined(DDB)
                    846:                printf("dropping into debugger; continue from here to resume boot\n");
                    847:                Debugger();
                    848: #endif
1.38      ad        849:        } else {
                    850:                /*
1.68      jym       851:                 * Synchronize time stamp counters. Invalidate cache and do
1.150     maxv      852:                 * twice (in tsc_sync_bp) to minimize possible cache effects.
                    853:                 * Disable interrupts to try and rule out any external
                    854:                 * interference.
1.38      ad        855:                 */
                    856:                psl = x86_read_psl();
                    857:                x86_disable_intr();
                    858:                tsc_sync_bp(ci);
                    859:                x86_write_psl(psl);
1.2       ad        860:        }
                    861:
                    862:        CPU_START_CLEANUP(ci);
1.45      ad        863:        cpu_starting = NULL;
1.2       ad        864: }
                    865:
                    866: void
1.12      jmcneill  867: cpu_boot_secondary(struct cpu_info *ci)
1.2       ad        868: {
1.38      ad        869:        int64_t drift;
                    870:        u_long psl;
1.2       ad        871:        int i;
                    872:
1.9       ad        873:        atomic_or_32(&ci->ci_flags, CPUF_GO);
1.26      cegger    874:        for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) {
1.189     bouyer    875:                delay_func(10);
1.2       ad        876:        }
1.9       ad        877:        if ((ci->ci_flags & CPUF_RUNNING) == 0) {
1.26      cegger    878:                aprint_error_dev(ci->ci_dev, "failed to start\n");
1.2       ad        879: #if defined(MPDEBUG) && defined(DDB)
                    880:                printf("dropping into debugger; continue from here to resume boot\n");
                    881:                Debugger();
                    882: #endif
1.38      ad        883:        } else {
                    884:                /* Synchronize TSC again, check for drift. */
                    885:                drift = ci->ci_data.cpu_cc_skew;
                    886:                psl = x86_read_psl();
                    887:                x86_disable_intr();
                    888:                tsc_sync_bp(ci);
                    889:                x86_write_psl(psl);
                    890:                drift -= ci->ci_data.cpu_cc_skew;
                    891:                aprint_debug_dev(ci->ci_dev, "TSC skew=%lld drift=%lld\n",
                    892:                    (long long)ci->ci_data.cpu_cc_skew, (long long)drift);
                    893:                tsc_sync_drift(drift);
1.2       ad        894:        }
                    895: }
                    896:
                    897: /*
1.117     maxv      898:  * The CPU ends up here when it's ready to run.
1.2       ad        899:  * This is called from code in mptramp.s; at this point, we are running
                    900:  * in the idle pcb/idle stack of the new CPU.  When this function returns,
                    901:  * this processor will enter the idle loop and start looking for work.
                    902:  */
                    903: void
                    904: cpu_hatch(void *v)
                    905: {
                    906:        struct cpu_info *ci = (struct cpu_info *)v;
1.65      rmind     907:        struct pcb *pcb;
1.130     kre       908:        int s, i;
1.2       ad        909:
1.162     maxv      910:        /* ------------------------------------------------------------- */
                    911:
                    912:        /*
                    913:         * This section of code must be compiled with SSP disabled, to
                    914:         * prevent a race against cpu0. See sys/conf/ssp.mk.
                    915:         */
                    916:
1.12      jmcneill  917:        cpu_init_msrs(ci, true);
1.40      ad        918:        cpu_probe(ci);
1.154     maxv      919:        cpu_speculation_init(ci);
1.178     nonaka    920: #if NHYPERV > 0
                    921:        hyperv_init_cpu(ci);
                    922: #endif
1.46      ad        923:
                    924:        ci->ci_data.cpu_cc_freq = cpu_info_primary.ci_data.cpu_cc_freq;
1.134     maxv      925:        /* cpu_get_tsc_freq(ci); */
1.38      ad        926:
1.8       ad        927:        KDASSERT((ci->ci_flags & CPUF_PRESENT) == 0);
1.38      ad        928:
                    929:        /*
1.150     maxv      930:         * Synchronize the TSC for the first time. Note that interrupts are
                    931:         * off at this point.
1.38      ad        932:         */
1.9       ad        933:        atomic_or_32(&ci->ci_flags, CPUF_PRESENT);
1.38      ad        934:        tsc_sync_ap(ci);
                    935:
1.162     maxv      936:        /* ------------------------------------------------------------- */
                    937:
1.38      ad        938:        /*
1.150     maxv      939:         * Wait to be brought online.
                    940:         *
                    941:         * Use MONITOR/MWAIT if available. These instructions put the CPU in
                    942:         * a low consumption mode (C-state), and if the TSC is not invariant,
                    943:         * this causes the TSC to drift. We want this to happen, so that we
                    944:         * can later detect (in tsc_tc_init) any abnormal drift with invariant
                    945:         * TSCs. That's just for safety; by definition such drifts should
                    946:         * never occur with invariant TSCs.
                    947:         *
                    948:         * If not available, try PAUSE. We'd like to use HLT, but we have
                    949:         * interrupts off.
1.38      ad        950:         */
1.6       ad        951:        while ((ci->ci_flags & CPUF_GO) == 0) {
1.70      jym       952:                if ((cpu_feature[1] & CPUID2_MONITOR) != 0) {
1.38      ad        953:                        x86_monitor(&ci->ci_flags, 0, 0);
                    954:                        if ((ci->ci_flags & CPUF_GO) != 0) {
                    955:                                continue;
                    956:                        }
                    957:                        x86_mwait(0, 0);
                    958:                } else {
1.131     pgoyette  959:        /*
                    960:         * XXX The loop repetition count could be a lot higher, but
                    961:         * XXX currently qemu emulator takes a _very_long_time_ to
                    962:         * XXX execute the pause instruction.  So for now, use a low
                    963:         * XXX value to allow the cpu to hatch before timing out.
                    964:         */
                    965:                        for (i = 50; i != 0; i--) {
1.127     pgoyette  966:                                x86_pause();
                    967:                        }
1.38      ad        968:                }
1.6       ad        969:        }
1.5       ad        970:
1.26      cegger    971:        /* Because the text may have been patched in x86_patch(). */
1.5       ad        972:        wbinvd();
                    973:        x86_flush();
1.88      rmind     974:        tlbflushg();
1.5       ad        975:
1.8       ad        976:        KASSERT((ci->ci_flags & CPUF_RUNNING) == 0);
1.2       ad        977:
1.73      jym       978: #ifdef PAE
                    979:        pd_entry_t * l3_pd = ci->ci_pae_l3_pdir;
                    980:        for (i = 0 ; i < PDP_SIZE; i++) {
1.168     maxv      981:                l3_pd[i] = pmap_kernel()->pm_pdirpa[i] | PTE_P;
1.73      jym       982:        }
                    983:        lcr3(ci->ci_pae_l3_pdirpa);
                    984: #else
                    985:        lcr3(pmap_pdirpa(pmap_kernel(), 0));
                    986: #endif
                    987:
1.65      rmind     988:        pcb = lwp_getpcb(curlwp);
1.73      jym       989:        pcb->pcb_cr3 = rcr3();
1.65      rmind     990:        pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp);
                    991:        lcr0(pcb->pcb_cr0);
                    992:
1.2       ad        993:        cpu_init_idt();
1.8       ad        994:        gdt_init_cpu(ci);
1.111     joerg     995: #if NLAPIC > 0
1.8       ad        996:        lapic_enable();
1.2       ad        997:        lapic_set_lvt();
1.111     joerg     998: #endif
1.2       ad        999:
                   1000:        fpuinit(ci);
                   1001:        lldt(GSYSSEL(GLDT_SEL, SEL_KPL));
1.15      yamt     1002:        ltr(ci->ci_tss_sel);
1.2       ad       1003:
1.150     maxv     1004:        /*
                   1005:         * cpu_init will re-synchronize the TSC, and will detect any abnormal
                   1006:         * drift that would have been caused by the use of MONITOR/MWAIT
                   1007:         * above.
                   1008:         */
1.2       ad       1009:        cpu_init(ci);
1.187     bouyer   1010: #ifdef XENPVHVM
                   1011:        xen_hvm_init_cpu(ci);
                   1012: #endif
                   1013:        (*x86_cpu_initclock_func)();
1.7       ad       1014:        cpu_get_tsc_freq(ci);
1.2       ad       1015:
                   1016:        s = splhigh();
1.165     cherry   1017: #if NLAPIC > 0
1.124     nonaka   1018:        lapic_write_tpri(0);
1.165     cherry   1019: #endif
1.3       ad       1020:        x86_enable_intr();
1.2       ad       1021:        splx(s);
1.6       ad       1022:        x86_errata();
1.2       ad       1023:
1.42      ad       1024:        aprint_debug_dev(ci->ci_dev, "running\n");
1.98      rmind    1025:
1.174     maxv     1026:        kcsan_cpu_init(ci);
                   1027:
1.98      rmind    1028:        idle_loop(NULL);
                   1029:        KASSERT(false);
1.2       ad       1030: }
1.101     kiyohara 1031: #endif
1.2       ad       1032:
                   1033: #if defined(DDB)
                   1034:
                   1035: #include <ddb/db_output.h>
                   1036: #include <machine/db_machdep.h>
                   1037:
                   1038: /*
                   1039:  * Dump CPU information from ddb.
                   1040:  */
                   1041: void
                   1042: cpu_debug_dump(void)
                   1043: {
                   1044:        struct cpu_info *ci;
                   1045:        CPU_INFO_ITERATOR cii;
1.184     msaitoh  1046:        const char sixtyfour64space[] =
1.172     mrg      1047: #ifdef _LP64
                   1048:                           "        "
                   1049: #endif
                   1050:                           "";
1.2       ad       1051:
1.180     ad       1052:        db_printf("addr         %sdev   id      flags   ipis    spl curlwp              "
1.173     maxv     1053:                  "\n", sixtyfour64space);
1.2       ad       1054:        for (CPU_INFO_FOREACH(cii, ci)) {
1.180     ad       1055:                db_printf("%p   %s      %ld     %x      %x      %d  %10p\n",
1.2       ad       1056:                    ci,
1.27      cegger   1057:                    ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev),
1.2       ad       1058:                    (long)ci->ci_cpuid,
1.180     ad       1059:                    ci->ci_flags, ci->ci_ipis, ci->ci_ilevel,
1.173     maxv     1060:                    ci->ci_curlwp);
1.2       ad       1061:        }
                   1062: }
                   1063: #endif
                   1064:
1.164     cherry   1065: #ifdef MULTIPROCESSOR
1.101     kiyohara 1066: #if NLAPIC > 0
1.2       ad       1067: static void
1.136     maxv     1068: cpu_copy_trampoline(paddr_t pdir_pa)
1.2       ad       1069: {
1.136     maxv     1070:        extern uint32_t nox_flag;
1.2       ad       1071:        extern u_char cpu_spinup_trampoline[];
                   1072:        extern u_char cpu_spinup_trampoline_end[];
1.12      jmcneill 1073:        vaddr_t mp_trampoline_vaddr;
1.136     maxv     1074:        struct {
                   1075:                uint32_t large;
                   1076:                uint32_t nox;
                   1077:                uint32_t pdir;
                   1078:        } smp_data;
                   1079:        CTASSERT(sizeof(smp_data) == 3 * 4);
                   1080:
                   1081:        smp_data.large = (pmap_largepages != 0);
                   1082:        smp_data.nox = nox_flag;
                   1083:        smp_data.pdir = (uint32_t)(pdir_pa & 0xFFFFFFFF);
1.12      jmcneill 1084:
1.136     maxv     1085:        /* Enter the physical address */
1.12      jmcneill 1086:        mp_trampoline_vaddr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
                   1087:            UVM_KMF_VAONLY);
                   1088:        pmap_kenter_pa(mp_trampoline_vaddr, mp_trampoline_paddr,
1.64      cegger   1089:            VM_PROT_READ | VM_PROT_WRITE, 0);
1.2       ad       1090:        pmap_update(pmap_kernel());
1.136     maxv     1091:
                   1092:        /* Copy boot code */
1.12      jmcneill 1093:        memcpy((void *)mp_trampoline_vaddr,
1.2       ad       1094:            cpu_spinup_trampoline,
1.26      cegger   1095:            cpu_spinup_trampoline_end - cpu_spinup_trampoline);
1.12      jmcneill 1096:
1.136     maxv     1097:        /* Copy smp_data at the end */
                   1098:        memcpy((void *)(mp_trampoline_vaddr + PAGE_SIZE - sizeof(smp_data)),
                   1099:            &smp_data, sizeof(smp_data));
                   1100:
1.12      jmcneill 1101:        pmap_kremove(mp_trampoline_vaddr, PAGE_SIZE);
                   1102:        pmap_update(pmap_kernel());
                   1103:        uvm_km_free(kernel_map, mp_trampoline_vaddr, PAGE_SIZE, UVM_KMF_VAONLY);
1.2       ad       1104: }
1.101     kiyohara 1105: #endif
1.2       ad       1106:
                   1107: int
1.14      joerg    1108: mp_cpu_start(struct cpu_info *ci, paddr_t target)
1.2       ad       1109: {
                   1110:        int error;
1.14      joerg    1111:
                   1112:        /*
                   1113:         * Bootstrap code must be addressable in real mode
                   1114:         * and it must be page aligned.
                   1115:         */
                   1116:        KASSERT(target < 0x10000 && target % PAGE_SIZE == 0);
1.2       ad       1117:
                   1118:        /*
                   1119:         * "The BSP must initialize CMOS shutdown code to 0Ah ..."
                   1120:         */
                   1121:
                   1122:        outb(IO_RTC, NVRAM_RESET);
                   1123:        outb(IO_RTC+1, NVRAM_RESET_JUMP);
                   1124:
1.165     cherry   1125: #if NLAPIC > 0
1.2       ad       1126:        /*
                   1127:         * "and the warm reset vector (DWORD based at 40:67) to point
                   1128:         * to the AP startup code ..."
                   1129:         */
1.165     cherry   1130:        unsigned short dwordptr[2];
1.2       ad       1131:        dwordptr[0] = 0;
1.14      joerg    1132:        dwordptr[1] = target >> 4;
1.2       ad       1133:
1.25      ad       1134:        memcpy((uint8_t *)cmos_data_mapping + 0x467, dwordptr, 4);
1.111     joerg    1135: #endif
1.2       ad       1136:
1.70      jym      1137:        if ((cpu_feature[0] & CPUID_APIC) == 0) {
1.25      ad       1138:                aprint_error("mp_cpu_start: CPU does not have APIC\n");
                   1139:                return ENODEV;
                   1140:        }
                   1141:
1.2       ad       1142:        /*
1.51      ad       1143:         * ... prior to executing the following sequence:".  We'll also add in
                   1144:         * local cache flush, in case the BIOS has left the AP with its cache
                   1145:         * disabled.  It may not be able to cope with MP coherency.
1.2       ad       1146:         */
1.51      ad       1147:        wbinvd();
1.2       ad       1148:
                   1149:        if (ci->ci_flags & CPUF_AP) {
1.42      ad       1150:                error = x86_ipi_init(ci->ci_cpuid);
1.26      cegger   1151:                if (error != 0) {
                   1152:                        aprint_error_dev(ci->ci_dev, "%s: IPI not taken (1)\n",
1.50      ad       1153:                            __func__);
1.2       ad       1154:                        return error;
1.25      ad       1155:                }
1.189     bouyer   1156:                delay_func(10000);
1.2       ad       1157:
1.50      ad       1158:                error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE);
1.26      cegger   1159:                if (error != 0) {
                   1160:                        aprint_error_dev(ci->ci_dev, "%s: IPI not taken (2)\n",
1.50      ad       1161:                            __func__);
1.25      ad       1162:                        return error;
                   1163:                }
1.189     bouyer   1164:                delay_func(200);
1.2       ad       1165:
1.50      ad       1166:                error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE);
1.26      cegger   1167:                if (error != 0) {
                   1168:                        aprint_error_dev(ci->ci_dev, "%s: IPI not taken (3)\n",
1.50      ad       1169:                            __func__);
1.25      ad       1170:                        return error;
1.2       ad       1171:                }
1.189     bouyer   1172:                delay_func(200);
1.2       ad       1173:        }
1.44      ad       1174:
1.2       ad       1175:        return 0;
                   1176: }
                   1177:
                   1178: void
                   1179: mp_cpu_start_cleanup(struct cpu_info *ci)
                   1180: {
                   1181:        /*
                   1182:         * Ensure the NVRAM reset byte contains something vaguely sane.
                   1183:         */
                   1184:
                   1185:        outb(IO_RTC, NVRAM_RESET);
                   1186:        outb(IO_RTC+1, NVRAM_RESET_RST);
                   1187: }
1.101     kiyohara 1188: #endif
1.2       ad       1189:
                   1190: #ifdef __x86_64__
                   1191: typedef void (vector)(void);
1.148     maxv     1192: extern vector Xsyscall, Xsyscall32, Xsyscall_svs;
1.70      jym      1193: #endif
1.2       ad       1194:
                   1195: void
1.12      jmcneill 1196: cpu_init_msrs(struct cpu_info *ci, bool full)
1.2       ad       1197: {
1.70      jym      1198: #ifdef __x86_64__
1.2       ad       1199:        wrmsr(MSR_STAR,
                   1200:            ((uint64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
                   1201:            ((uint64_t)LSEL(LSYSRETBASE_SEL, SEL_UPL) << 48));
                   1202:        wrmsr(MSR_LSTAR, (uint64_t)Xsyscall);
                   1203:        wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32);
1.138     maxv     1204:        wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D|PSL_AC);
1.2       ad       1205:
1.148     maxv     1206: #ifdef SVS
                   1207:        if (svs_enabled)
                   1208:                wrmsr(MSR_LSTAR, (uint64_t)Xsyscall_svs);
                   1209: #endif
                   1210:
1.12      jmcneill 1211:        if (full) {
                   1212:                wrmsr(MSR_FSBASE, 0);
1.27      cegger   1213:                wrmsr(MSR_GSBASE, (uint64_t)ci);
1.12      jmcneill 1214:                wrmsr(MSR_KERNELGSBASE, 0);
                   1215:        }
1.70      jym      1216: #endif /* __x86_64__ */
1.2       ad       1217:
1.70      jym      1218:        if (cpu_feature[2] & CPUID_NOX)
1.2       ad       1219:                wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE);
                   1220: }
1.7       ad       1221:
1.107     christos 1222: void
                   1223: cpu_offline_md(void)
                   1224: {
1.173     maxv     1225:        return;
1.107     christos 1226: }
                   1227:
1.12      jmcneill 1228: /* XXX joerg restructure and restart CPUs individually */
                   1229: static bool
1.96      jruoho   1230: cpu_stop(device_t dv)
1.12      jmcneill 1231: {
                   1232:        struct cpu_softc *sc = device_private(dv);
                   1233:        struct cpu_info *ci = sc->sc_info;
1.18      joerg    1234:        int err;
1.12      jmcneill 1235:
1.96      jruoho   1236:        KASSERT((ci->ci_flags & CPUF_PRESENT) != 0);
1.93      jruoho   1237:
                   1238:        if ((ci->ci_flags & CPUF_PRIMARY) != 0)
                   1239:                return true;
                   1240:
1.12      jmcneill 1241:        if (ci->ci_data.cpu_idlelwp == NULL)
                   1242:                return true;
                   1243:
1.20      jmcneill 1244:        sc->sc_wasonline = !(ci->ci_schedstate.spc_flags & SPCF_OFFLINE);
1.17      joerg    1245:
1.20      jmcneill 1246:        if (sc->sc_wasonline) {
                   1247:                mutex_enter(&cpu_lock);
1.58      rmind    1248:                err = cpu_setstate(ci, false);
1.20      jmcneill 1249:                mutex_exit(&cpu_lock);
1.79      jruoho   1250:
1.93      jruoho   1251:                if (err != 0)
1.20      jmcneill 1252:                        return false;
                   1253:        }
1.17      joerg    1254:
                   1255:        return true;
1.12      jmcneill 1256: }
                   1257:
                   1258: static bool
1.96      jruoho   1259: cpu_suspend(device_t dv, const pmf_qual_t *qual)
                   1260: {
                   1261:        struct cpu_softc *sc = device_private(dv);
                   1262:        struct cpu_info *ci = sc->sc_info;
                   1263:
                   1264:        if ((ci->ci_flags & CPUF_PRESENT) == 0)
                   1265:                return true;
                   1266:        else {
                   1267:                cpufreq_suspend(ci);
                   1268:        }
                   1269:
                   1270:        return cpu_stop(dv);
                   1271: }
                   1272:
                   1273: static bool
1.69      dyoung   1274: cpu_resume(device_t dv, const pmf_qual_t *qual)
1.12      jmcneill 1275: {
                   1276:        struct cpu_softc *sc = device_private(dv);
                   1277:        struct cpu_info *ci = sc->sc_info;
1.20      jmcneill 1278:        int err = 0;
1.12      jmcneill 1279:
1.93      jruoho   1280:        if ((ci->ci_flags & CPUF_PRESENT) == 0)
1.12      jmcneill 1281:                return true;
1.93      jruoho   1282:
                   1283:        if ((ci->ci_flags & CPUF_PRIMARY) != 0)
                   1284:                goto out;
                   1285:
1.12      jmcneill 1286:        if (ci->ci_data.cpu_idlelwp == NULL)
1.93      jruoho   1287:                goto out;
1.12      jmcneill 1288:
1.20      jmcneill 1289:        if (sc->sc_wasonline) {
                   1290:                mutex_enter(&cpu_lock);
1.58      rmind    1291:                err = cpu_setstate(ci, true);
1.20      jmcneill 1292:                mutex_exit(&cpu_lock);
                   1293:        }
1.13      joerg    1294:
1.93      jruoho   1295: out:
                   1296:        if (err != 0)
                   1297:                return false;
                   1298:
                   1299:        cpufreq_resume(ci);
                   1300:
                   1301:        return true;
1.12      jmcneill 1302: }
                   1303:
1.79      jruoho   1304: static bool
                   1305: cpu_shutdown(device_t dv, int how)
                   1306: {
1.90      dyoung   1307:        struct cpu_softc *sc = device_private(dv);
                   1308:        struct cpu_info *ci = sc->sc_info;
                   1309:
1.96      jruoho   1310:        if ((ci->ci_flags & CPUF_BSP) != 0)
1.90      dyoung   1311:                return false;
                   1312:
1.96      jruoho   1313:        if ((ci->ci_flags & CPUF_PRESENT) == 0)
                   1314:                return true;
                   1315:
                   1316:        return cpu_stop(dv);
1.79      jruoho   1317: }
                   1318:
1.185     msaitoh  1319: /* Get the TSC frequency and set it to ci->ci_data.cpu_cc_freq. */
1.7       ad       1320: void
                   1321: cpu_get_tsc_freq(struct cpu_info *ci)
                   1322: {
1.191   ! msaitoh  1323:        uint64_t freq = 0, freq_from_cpuid, t0, t1;
1.190     ad       1324:        int64_t overhead;
1.7       ad       1325:
1.190     ad       1326:        if ((ci->ci_flags & CPUF_PRIMARY) != 0 && cpu_hascounter()) {
1.191   ! msaitoh  1327:                /*
        !          1328:                 * If it's the first call of this function, try to get TSC
        !          1329:                 * freq from CPUID by calling cpu_tsc_freq_cpuid().
        !          1330:                 * The function also set lapic_per_second variable if it's
        !          1331:                 * known. This is required for Intel's Comet Lake and newer
        !          1332:                 * processors to set LAPIC timer correctly.
        !          1333:                 */
        !          1334:                if (ci->ci_data.cpu_cc_freq == 0)
        !          1335:                        freq = freq_from_cpuid = cpu_tsc_freq_cpuid(ci);
1.190     ad       1336: #if NHPET > 0
                   1337:                if (freq == 0)
                   1338:                        freq = hpet_tsc_freq();
                   1339: #endif
                   1340:                if (freq == 0) {
                   1341:                        /*
                   1342:                         * Work out the approximate overhead involved below.
                   1343:                         * Discard the result of the first go around the
                   1344:                         * loop.
                   1345:                         */
                   1346:                        overhead = 0;
                   1347:                        for (int i = 0; i <= 8; i++) {
                   1348:                                t0 = cpu_counter();
                   1349:                                x86_delay(0);
                   1350:                                t1 = cpu_counter();
                   1351:                                if (i > 0) {
                   1352:                                        overhead += (t1 - t0);
                   1353:                                }
                   1354:                        }
                   1355:                        overhead >>= 3;
1.185     msaitoh  1356:
1.190     ad       1357:                        /* Now do the calibration. */
                   1358:                        t0 = cpu_counter();
                   1359:                        x86_delay(100000);
                   1360:                        t1 = cpu_counter();
                   1361:                        freq = (t1 - t0 - overhead) * 10;
                   1362:                }
1.191   ! msaitoh  1363:                if (ci->ci_data.cpu_cc_freq != 0) {
        !          1364:                        freq_from_cpuid = cpu_tsc_freq_cpuid(ci);
        !          1365:                        if ((freq_from_cpuid != 0)
        !          1366:                            && (freq != freq_from_cpuid))
        !          1367:                                aprint_verbose_dev(ci->ci_dev, "TSC freq "
        !          1368:                                    "calibrated %" PRIu64 " Hz\n", freq);
        !          1369:                }
1.185     msaitoh  1370:        } else {
1.190     ad       1371:                freq = cpu_info_primary.ci_data.cpu_cc_freq;
1.7       ad       1372:        }
1.190     ad       1373:
                   1374:        ci->ci_data.cpu_cc_freq = freq;
1.7       ad       1375: }
1.37      joerg    1376:
                   1377: void
                   1378: x86_cpu_idle_mwait(void)
                   1379: {
                   1380:        struct cpu_info *ci = curcpu();
                   1381:
                   1382:        KASSERT(ci->ci_ilevel == IPL_NONE);
                   1383:
                   1384:        x86_monitor(&ci->ci_want_resched, 0, 0);
                   1385:        if (__predict_false(ci->ci_want_resched)) {
                   1386:                return;
                   1387:        }
                   1388:        x86_mwait(0, 0);
                   1389: }
                   1390:
                   1391: void
                   1392: x86_cpu_idle_halt(void)
                   1393: {
                   1394:        struct cpu_info *ci = curcpu();
                   1395:
                   1396:        KASSERT(ci->ci_ilevel == IPL_NONE);
                   1397:
                   1398:        x86_disable_intr();
                   1399:        if (!__predict_false(ci->ci_want_resched)) {
                   1400:                x86_stihlt();
                   1401:        } else {
                   1402:                x86_enable_intr();
                   1403:        }
                   1404: }
1.73      jym      1405:
                   1406: /*
                   1407:  * Loads pmap for the current CPU.
                   1408:  */
                   1409: void
1.97      bouyer   1410: cpu_load_pmap(struct pmap *pmap, struct pmap *oldpmap)
1.73      jym      1411: {
1.144     maxv     1412: #ifdef SVS
1.159     maxv     1413:        if (svs_enabled) {
                   1414:                svs_pdir_switch(pmap);
                   1415:        }
1.144     maxv     1416: #endif
                   1417:
1.73      jym      1418: #ifdef PAE
1.99      yamt     1419:        struct cpu_info *ci = curcpu();
1.116     nat      1420:        bool interrupts_enabled;
1.99      yamt     1421:        pd_entry_t *l3_pd = ci->ci_pae_l3_pdir;
                   1422:        int i;
1.73      jym      1423:
1.99      yamt     1424:        /*
                   1425:         * disable interrupts to block TLB shootdowns, which can reload cr3.
                   1426:         * while this doesn't block NMIs, it's probably ok as NMIs unlikely
                   1427:         * reload cr3.
                   1428:         */
1.116     nat      1429:        interrupts_enabled = (x86_read_flags() & PSL_I) != 0;
                   1430:        if (interrupts_enabled)
                   1431:                x86_disable_intr();
                   1432:
1.73      jym      1433:        for (i = 0 ; i < PDP_SIZE; i++) {
1.168     maxv     1434:                l3_pd[i] = pmap->pm_pdirpa[i] | PTE_P;
1.73      jym      1435:        }
1.134     maxv     1436:
1.116     nat      1437:        if (interrupts_enabled)
                   1438:                x86_enable_intr();
1.73      jym      1439:        tlbflush();
1.160     maxv     1440: #else
1.73      jym      1441:        lcr3(pmap_pdirpa(pmap, 0));
1.160     maxv     1442: #endif
1.73      jym      1443: }
1.91      cherry   1444:
                   1445: /*
                   1446:  * Notify all other cpus to halt.
                   1447:  */
                   1448:
                   1449: void
1.92      cherry   1450: cpu_broadcast_halt(void)
1.91      cherry   1451: {
                   1452:        x86_broadcast_ipi(X86_IPI_HALT);
                   1453: }
                   1454:
                   1455: /*
1.176     ad       1456:  * Send a dummy ipi to a cpu to force it to run splraise()/spllower(),
                   1457:  * and trigger an AST on the running LWP.
1.91      cherry   1458:  */
                   1459:
                   1460: void
                   1461: cpu_kick(struct cpu_info *ci)
                   1462: {
1.176     ad       1463:        x86_send_ipi(ci, X86_IPI_AST);
1.91      cherry   1464: }

CVSweb <webmaster@jp.NetBSD.org>