File: [cvs.NetBSD.org] / src / sys / arch / i386 / i386 / machdep.c (download)
Revision 1.586.2.1, Wed Feb 7 12:51:44 2007 UTC (6 years, 3 months ago) by tron
Branch: netbsd-4
Changes since 1.586: +32 -13
lines
Pull up following revision(s) (requested by pavel in ticket #397):
sys/arch/i386/i386/machdep.c: revision 1.589
sys/arch/i386/i386/pmap.c: revision 1.195
PR port-i386/34186 by Wolfgang Stukenbrock:
mapping of msgbuf during startup may map invalid physical adresses
"If the last available physical memory segment on a system is less 16k,
than the startup code that will map the kernel message buffer, will fail
and map physical pages behind the last segment. This may either only lead
to a message buffer without physical memory behind it, or to an
overlapping message buffer with something else."
Fix by allowing multiple physical memory segments to be used for msgbuf.
Also remove some leftover msgbuf manipulation from pmap.c.
Fix supplied by Wolfgang Stukenbrock in the PR, with some modifications
from me, mainly to use the already existing constant VM_PHYSSEG_MAX as the
static limit of number of msgbuf segments.
|
/* $NetBSD: machdep.c,v 1.586.2.1 2007/02/07 12:51:44 tron Exp $ */
/*-
* Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace
* Simulation Facility, NASA Ames Research Center and by Julio M. Merino Vidal.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* William Jolitz.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)machdep.c 7.4 (Berkeley) 6/3/91
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.586.2.1 2007/02/07 12:51:44 tron Exp $");
#include "opt_beep.h"
#include "opt_compat_ibcs2.h"
#include "opt_compat_mach.h" /* need to get the right segment def */
#include "opt_compat_netbsd.h"
#include "opt_compat_svr4.h"
#include "opt_cpureset_delay.h"
#include "opt_cputype.h"
#include "opt_ddb.h"
#include "opt_ipkdb.h"
#include "opt_kgdb.h"
#include "opt_mtrr.h"
#include "opt_multiprocessor.h"
#include "opt_realmem.h"
#include "opt_user_ldt.h"
#include "opt_vm86.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/signal.h>
#include <sys/signalvar.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/user.h>
#include <sys/exec.h>
#include <sys/buf.h>
#include <sys/reboot.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/msgbuf.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/extent.h>
#include <sys/syscallargs.h>
#include <sys/core.h>
#include <sys/kcore.h>
#include <sys/ucontext.h>
#include <sys/ras.h>
#include <sys/sa.h>
#include <sys/savar.h>
#include <sys/ksyms.h>
#ifdef IPKDB
#include <ipkdb/ipkdb.h>
#endif
#ifdef KGDB
#include <sys/kgdb.h>
#endif
#include <dev/cons.h>
#include <uvm/uvm_extern.h>
#include <uvm/uvm_page.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/cpuvar.h>
#include <machine/gdt.h>
#include <machine/kcore.h>
#include <machine/pio.h>
#include <machine/psl.h>
#include <machine/reg.h>
#include <machine/specialreg.h>
#include <machine/bootinfo.h>
#include <machine/mtrr.h>
#include <x86/x86/tsc.h>
#include <machine/multiboot.h>
#include <dev/isa/isareg.h>
#include <machine/isa_machdep.h>
#include <dev/ic/i8042reg.h>
#ifdef DDB
#include <machine/db_machdep.h>
#include <ddb/db_extern.h>
#endif
#ifdef VM86
#include <machine/vm86.h>
#endif
#include "acpi.h"
#include "apmbios.h"
#include "bioscall.h"
#if NBIOSCALL > 0
#include <machine/bioscall.h>
#endif
#if NACPI > 0
#include <dev/acpi/acpivar.h>
#define ACPI_MACHDEP_PRIVATE
#include <machine/acpi_machdep.h>
#endif
#if NAPMBIOS > 0
#include <machine/apmvar.h>
#endif
#include "isa.h"
#include "isadma.h"
#include "npx.h"
#include "ksyms.h"
#include "mca.h"
#if NMCA > 0
#include <machine/mca_machdep.h> /* for mca_busprobe() */
#endif
#ifdef MULTIPROCESSOR /* XXX */
#include <machine/mpbiosvar.h> /* XXX */
#endif /* XXX */
#ifndef BEEP_ONHALT_COUNT
#define BEEP_ONHALT_COUNT 3
#endif
#ifndef BEEP_ONHALT_PITCH
#define BEEP_ONHALT_PITCH 1500
#endif
#ifndef BEEP_ONHALT_PERIOD
#define BEEP_ONHALT_PERIOD 250
#endif
/* the following is used externally (sysctl_hw) */
char machine[] = "i386"; /* CPU "architecture" */
char machine_arch[] = "i386"; /* machine == machine_arch */
extern struct bi_devmatch *x86_alldisks;
extern int x86_ndisks;
#ifdef CPURESET_DELAY
int cpureset_delay = CPURESET_DELAY;
#else
int cpureset_delay = 2000; /* default to 2s */
#endif
#ifdef MTRR
struct mtrr_funcs *mtrr_funcs;
#endif
#ifdef COMPAT_NOMID
static int exec_nomid(struct lwp *, struct exec_package *);
#endif
int physmem;
int dumpmem_low;
int dumpmem_high;
unsigned int cpu_feature;
unsigned int cpu_feature2;
int cpu_class;
int i386_fpu_present;
int i386_fpu_exception;
int i386_fpu_fdivbug;
int i386_use_fxsave;
int i386_has_sse;
int i386_has_sse2;
int tmx86_has_longrun;
vaddr_t msgbuf_vaddr;
struct {
paddr_t paddr;
psize_t sz;
} msgbuf_p_seg[VM_PHYSSEG_MAX];
unsigned int msgbuf_p_cnt = 0;
vaddr_t idt_vaddr;
paddr_t idt_paddr;
#ifdef I586_CPU
vaddr_t pentium_idt_vaddr;
#endif
struct vm_map *exec_map = NULL;
struct vm_map *mb_map = NULL;
struct vm_map *phys_map = NULL;
extern paddr_t avail_start, avail_end;
void (*delay_func)(int) = i8254_delay;
void (*initclock_func)(void) = i8254_initclocks;
/*
* Size of memory segments, before any memory is stolen.
*/
phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
int mem_cluster_cnt;
int cpu_dump(void);
int cpu_dumpsize(void);
u_long cpu_dump_mempagecnt(void);
void dumpsys(void);
void init386(paddr_t);
void initgdt(union descriptor *);
void add_mem_cluster(uint64_t, uint64_t, uint32_t);
extern int time_adjusted;
struct bootinfo bootinfo;
int *esym;
extern int boothowto;
/* Base memory reported by BIOS. */
#ifndef REALBASEMEM
int biosbasemem = 0;
#else
int biosbasemem = REALBASEMEM;
#endif
/* Extended memory reported by BIOS. */
#ifndef REALEXTMEM
int biosextmem = 0;
#else
int biosextmem = REALEXTMEM;
#endif
/* Set if any boot-loader set biosbasemem/biosextmem. */
int biosmem_implicit;
/* Representation of the bootinfo structure constructed by a NetBSD native
* boot loader. Only be used by native_loader(). */
struct bootinfo_source {
uint32_t bs_naddrs;
paddr_t bs_addrs[1]; /* Actually longer. */
};
/* Only called by locore.h; no need to be in a header file. */
void native_loader(int, int, struct bootinfo_source *, paddr_t, int, int);
/*
* Called as one of the very first things during system startup (just after
* the boot loader gave control to the kernel image), this routine is in
* charge of retrieving the parameters passed in by the boot loader and
* storing them in the appropriate kernel variables.
*
* WARNING: Because the kernel has not yet relocated itself to KERNBASE,
* special care has to be taken when accessing memory because absolute
* addresses (referring to kernel symbols) do not work. So:
*
* 1) Avoid jumps to absolute addresses (such as gotos and switches).
* 2) To access global variables use their physical address, which
* can be obtained using the RELOC macro.
*/
void
native_loader(int bl_boothowto, int bl_bootdev,
struct bootinfo_source *bl_bootinfo, paddr_t bl_esym,
int bl_biosextmem, int bl_biosbasemem)
{
#define RELOC(type, x) ((type)((vaddr_t)(x) - KERNBASE))
*RELOC(int *, &boothowto) = bl_boothowto;
#ifdef COMPAT_OLDBOOT
/*
* Pre-1.3 boot loaders gave the boot device as a parameter
* (instead of a bootinfo entry).
*/
*RELOC(int *, &bootdev) = bl_bootdev;
#endif
/*
* The boot loader provides a physical, non-relocated address
* for the symbols table's end. We need to convert it to a
* virtual address.
*/
if (bl_esym != 0)
*RELOC(int **, &esym) = (int *)((vaddr_t)bl_esym + KERNBASE);
else
*RELOC(int **, &esym) = 0;
/*
* Copy bootinfo entries (if any) from the boot loader's
* representation to the kernel's bootinfo space.
*/
if (bl_bootinfo != NULL) {
size_t i;
uint8_t *data;
struct bootinfo *bidest;
bidest = RELOC(struct bootinfo *, &bootinfo);
data = &bidest->bi_data[0];
for (i = 0; i < bl_bootinfo->bs_naddrs; i++) {
struct btinfo_common *bc;
bc = (struct btinfo_common *)(bl_bootinfo->bs_addrs[i]);
if ((paddr_t)(data + bc->len) >
(paddr_t)(&bidest->bi_data[0] + BOOTINFO_MAXSIZE))
break;
memcpy(data, bc, bc->len);
data += bc->len;
}
bidest->bi_nentries = i;
}
/*
* Configure biosbasemem and biosextmem only if they were not
* explicitly given during the kernel's build.
*/
if (*RELOC(int *, &biosbasemem) == 0) {
*RELOC(int *, &biosbasemem) = bl_biosbasemem;
*RELOC(int *, &biosmem_implicit) = 1;
}
if (*RELOC(int *, &biosextmem) == 0) {
*RELOC(int *, &biosextmem) = bl_biosextmem;
*RELOC(int *, &biosmem_implicit) = 1;
}
#undef RELOC
}
/*
* Machine-dependent startup code
*/
void
cpu_startup()
{
int x, y;
vaddr_t minaddr, maxaddr;
psize_t sz;
char pbuf[9];
/*
* Initialize error message buffer (et end of core).
*/
if (msgbuf_p_cnt == 0)
panic("msgbuf paddr map has not been set up");
for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz)
continue;
msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_VAONLY);
if (msgbuf_vaddr == 0)
panic("failed to valloc msgbuf_vaddr");
/* msgbuf_paddr was init'd in pmap */
for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) {
for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE)
pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz,
msgbuf_p_seg[y].paddr + x * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE);
}
pmap_update(pmap_kernel());
initmsgbuf((caddr_t)msgbuf_vaddr, sz);
printf("%s%s", copyright, version);
#ifdef MULTIBOOT
multiboot_print_info();
#endif
#ifdef TRAPLOG
/*
* Enable recording of branch from/to in MSR's
*/
wrmsr(MSR_DEBUGCTLMSR, 0x1);
#endif
format_bytes(pbuf, sizeof(pbuf), ptoa(physmem));
printf("total memory = %s\n", pbuf);
minaddr = 0;
/*
* Allocate a submap for exec arguments. This map effectively
* limits the number of processes exec'ing at any time.
*/
exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
/*
* Allocate a submap for physio
*/
phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
VM_PHYS_SIZE, 0, FALSE, NULL);
/*
* Finally, allocate mbuf cluster submap.
*/
mb_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
nmbclusters * mclbytes, VM_MAP_INTRSAFE, FALSE, NULL);
format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
printf("avail memory = %s\n", pbuf);
/* Safe for i/o port / memory space allocation to use malloc now. */
x86_bus_space_mallocok();
}
/*
* Set up proc0's TSS and LDT.
*/
void
i386_proc0_tss_ldt_init()
{
struct pcb *pcb;
int x;
gdt_init();
cpu_info_primary.ci_curpcb = pcb = &lwp0.l_addr->u_pcb;
pcb->pcb_tss.tss_ioopt =
((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16;
for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
pcb->pcb_iomap[x] = 0xffffffff;
pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
pcb->pcb_cr0 = rcr0();
pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
pcb->pcb_tss.tss_esp0 = USER_TO_UAREA(lwp0.l_addr) + KSTACK_SIZE - 16;
lwp0.l_md.md_regs = (struct trapframe *)pcb->pcb_tss.tss_esp0 - 1;
lwp0.l_md.md_tss_sel = tss_alloc(pcb);
ltr(lwp0.l_md.md_tss_sel);
lldt(pcb->pcb_ldt_sel);
}
/*
* Set up TSS and LDT for a new PCB.
*/
void
i386_init_pcb_tss_ldt(struct cpu_info *ci)
{
int x;
struct pcb *pcb = ci->ci_idle_pcb;
pcb->pcb_tss.tss_ioopt =
((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16;
for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
pcb->pcb_iomap[x] = 0xffffffff;
pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
pcb->pcb_cr0 = rcr0();
ci->ci_idle_tss_sel = tss_alloc(pcb);
}
/*
* sysctl helper routine for machdep.tm* nodes.
*/
static int
sysctl_machdep_tm_longrun(SYSCTLFN_ARGS)
{
struct sysctlnode node;
int io, error;
if (!tmx86_has_longrun)
return (EOPNOTSUPP);
node = *rnode;
node.sysctl_data = &io;
switch (rnode->sysctl_num) {
case CPU_TMLR_MODE:
io = (int)(crusoe_longrun = tmx86_get_longrun_mode());
break;
case CPU_TMLR_FREQUENCY:
tmx86_get_longrun_status_all();
io = crusoe_frequency;
break;
case CPU_TMLR_VOLTAGE:
tmx86_get_longrun_status_all();
io = crusoe_voltage;
break;
case CPU_TMLR_PERCENTAGE:
tmx86_get_longrun_status_all();
io = crusoe_percentage;
break;
default:
return (EOPNOTSUPP);
}
error = sysctl_lookup(SYSCTLFN_CALL(&node));
if (error || newp == NULL)
return (error);
if (rnode->sysctl_num == CPU_TMLR_MODE) {
if (tmx86_set_longrun_mode(io))
crusoe_longrun = (u_int)io;
else
return (EINVAL);
}
return (0);
}
/*
* sysctl helper routine for machdep.booted_kernel
*/
static int
sysctl_machdep_booted_kernel(SYSCTLFN_ARGS)
{
struct btinfo_bootpath *bibp;
struct sysctlnode node;
bibp = lookup_bootinfo(BTINFO_BOOTPATH);
if(!bibp)
return(ENOENT); /* ??? */
node = *rnode;
node.sysctl_data = bibp->bootpath;
node.sysctl_size = sizeof(bibp->bootpath);
return (sysctl_lookup(SYSCTLFN_CALL(&node)));
}
/*
* sysctl helper routine for machdep.diskinfo
*/
static int
sysctl_machdep_diskinfo(SYSCTLFN_ARGS)
{
struct sysctlnode node;
node = *rnode;
if (x86_alldisks == NULL)
return(EOPNOTSUPP);
node.sysctl_data = x86_alldisks;
node.sysctl_size = sizeof(struct disklist) +
(x86_ndisks - 1) * sizeof(struct nativedisk_info);
return (sysctl_lookup(SYSCTLFN_CALL(&node)));
}
/*
* machine dependent system variables.
*/
SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
{
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_NODE, "machdep", NULL,
NULL, 0, NULL, 0,
CTL_MACHDEP, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_STRUCT, "console_device", NULL,
sysctl_consdev, 0, NULL, sizeof(dev_t),
CTL_MACHDEP, CPU_CONSDEV, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_INT, "biosbasemem", NULL,
NULL, 0, &biosbasemem, 0,
CTL_MACHDEP, CPU_BIOSBASEMEM, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_INT, "biosextmem", NULL,
NULL, 0, &biosextmem, 0,
CTL_MACHDEP, CPU_BIOSEXTMEM, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_INT, "nkpde", NULL,
NULL, 0, &nkpde, 0,
CTL_MACHDEP, CPU_NKPDE, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_STRING, "booted_kernel", NULL,
sysctl_machdep_booted_kernel, 0, NULL, 0,
CTL_MACHDEP, CPU_BOOTED_KERNEL, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_STRUCT, "diskinfo", NULL,
sysctl_machdep_diskinfo, 0, NULL, 0,
CTL_MACHDEP, CPU_DISKINFO, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_INT, "fpu_present", NULL,
NULL, 0, &i386_fpu_present, 0,
CTL_MACHDEP, CPU_FPU_PRESENT, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_INT, "osfxsr", NULL,
NULL, 0, &i386_use_fxsave, 0,
CTL_MACHDEP, CPU_OSFXSR, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_INT, "sse", NULL,
NULL, 0, &i386_has_sse, 0,
CTL_MACHDEP, CPU_SSE, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_INT, "sse2", NULL,
NULL, 0, &i386_has_sse2, 0,
CTL_MACHDEP, CPU_SSE2, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_STRING, "cpu_brand", NULL,
NULL, 0, &cpu_brand_string, 0,
CTL_MACHDEP, CTL_CREATE, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
CTLTYPE_INT, "tm_longrun_mode", NULL,
sysctl_machdep_tm_longrun, 0, NULL, 0,
CTL_MACHDEP, CPU_TMLR_MODE, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_INT, "tm_longrun_frequency", NULL,
sysctl_machdep_tm_longrun, 0, NULL, 0,
CTL_MACHDEP, CPU_TMLR_FREQUENCY, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_INT, "tm_longrun_voltage", NULL,
sysctl_machdep_tm_longrun, 0, NULL, 0,
CTL_MACHDEP, CPU_TMLR_VOLTAGE, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_INT, "tm_longrun_percentage", NULL,
sysctl_machdep_tm_longrun, 0, NULL, 0,
CTL_MACHDEP, CPU_TMLR_PERCENTAGE, CTL_EOL);
}
void *
getframe(struct lwp *l, int sig, int *onstack)
{
struct proc *p = l->l_proc;
struct sigctx *ctx = &p->p_sigctx;
struct trapframe *tf = l->l_md.md_regs;
/* Do we need to jump onto the signal stack? */
*onstack = (ctx->ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0
&& (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
if (*onstack)
return (char *)ctx->ps_sigstk.ss_sp + ctx->ps_sigstk.ss_size;
#ifdef VM86
if (tf->tf_eflags & PSL_VM)
return (void *)(tf->tf_esp + (tf->tf_ss << 4));
else
#endif
return (void *)tf->tf_esp;
}
/*
* Build context to run handler in. We invoke the handler
* directly, only returning via the trampoline. Note the
* trampoline version numbers are coordinated with machine-
* dependent code in libc.
*/
void
buildcontext(struct lwp *l, int sel, void *catcher, void *fp)
{
struct trapframe *tf = l->l_md.md_regs;
tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_eip = (int)catcher;
tf->tf_cs = GSEL(sel, SEL_UPL);
tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
tf->tf_esp = (int)fp;
tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
}
static void
sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
{
struct lwp *l = curlwp;
struct proc *p = l->l_proc;
struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map);
int sel = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
GUCODEBIG_SEL : GUCODE_SEL;
struct sigacts *ps = p->p_sigacts;
int onstack;
int sig = ksi->ksi_signo;
struct sigframe_siginfo *fp = getframe(l, sig, &onstack), frame;
sig_t catcher = SIGACTION(p, sig).sa_handler;
struct trapframe *tf = l->l_md.md_regs;
fp--;
/* Build stack frame for signal trampoline. */
switch (ps->sa_sigdesc[sig].sd_vers) {
case 0: /* handled by sendsig_sigcontext */
case 1: /* handled by sendsig_sigcontext */
default: /* unknown version */
printf("nsendsig: bad version %d\n",
ps->sa_sigdesc[sig].sd_vers);
sigexit(l, SIGILL);
case 2:
break;
}
frame.sf_ra = (int)ps->sa_sigdesc[sig].sd_tramp;
frame.sf_signum = sig;
frame.sf_sip = &fp->sf_si;
frame.sf_ucp = &fp->sf_uc;
frame.sf_si._info = ksi->ksi_info;
frame.sf_uc.uc_flags = _UC_SIGMASK|_UC_VM;
frame.sf_uc.uc_sigmask = *mask;
frame.sf_uc.uc_link = NULL;
frame.sf_uc.uc_flags |= (p->p_sigctx.ps_sigstk.ss_flags & SS_ONSTACK)
? _UC_SETSTACK : _UC_CLRSTACK;
memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack));
cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
if (tf->tf_eflags & PSL_VM)
(*p->p_emul->e_syscall_intern)(p);
if (copyout(&frame, fp, sizeof(frame)) != 0) {
/*
* Process has trashed its stack; give it an illegal
* instruction to halt it in its tracks.
*/
sigexit(l, SIGILL);
/* NOTREACHED */
}
buildcontext(l, sel, catcher, fp);
/* Remember that we're now on the signal stack. */
if (onstack)
p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
}
void
sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
{
#ifdef COMPAT_16
if (curproc->p_sigacts->sa_sigdesc[ksi->ksi_signo].sd_vers < 2)
sendsig_sigcontext(ksi, mask);
else
#endif
sendsig_siginfo(ksi, mask);
}
void
cpu_upcall(struct lwp *l, int type, int nevents, int ninterrupted, void *sas,
void *ap, void *sp, sa_upcall_t upcall)
{
struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
struct saframe *sf, frame;
struct trapframe *tf;
tf = l->l_md.md_regs;
/* Finally, copy out the rest of the frame. */
frame.sa_type = type;
frame.sa_sas = sas;
frame.sa_events = nevents;
frame.sa_interrupted = ninterrupted;
frame.sa_arg = ap;
frame.sa_ra = 0;
sf = (struct saframe *)sp - 1;
if (copyout(&frame, sf, sizeof(frame)) != 0) {
/* Copying onto the stack didn't work. Die. */
sigexit(l, SIGILL);
/* NOTREACHED */
}
tf->tf_eip = (int) upcall;
tf->tf_esp = (int) sf;
tf->tf_ebp = 0; /* indicate call-frame-top to debuggers */
tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
GSEL(GUCODEBIG_SEL, SEL_UPL) : GSEL(GUCODE_SEL, SEL_UPL);
tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
}
int waittime = -1;
struct pcb dumppcb;
void
cpu_reboot(int howto, char *bootstr)
{
if (cold) {
howto |= RB_HALT;
goto haltsys;
}
boothowto = howto;
if ((howto & RB_NOSYNC) == 0 && waittime < 0) {
waittime = 0;
vfs_shutdown();
/*
* If we've been adjusting the clock, the todr
* will be out of synch; adjust it now.
*/
if (time_adjusted != 0)
resettodr();
}
/* Disable interrupts. */
splhigh();
/* Do a dump if requested. */
if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
dumpsys();
haltsys:
doshutdownhooks();
#ifdef MULTIPROCESSOR
x86_broadcast_ipi(X86_IPI_HALT);
#endif
if ((howto & RB_POWERDOWN) == RB_POWERDOWN) {
#if NACPI > 0
if (acpi_softc != NULL) {
delay(500000);
acpi_enter_sleep_state(acpi_softc, ACPI_STATE_S5);
printf("WARNING: ACPI powerdown failed!\n");
}
#endif
#if NAPMBIOS > 0 && !defined(APM_NO_POWEROFF)
/* turn off, if we can. But try to turn disk off and
* wait a bit first--some disk drives are slow to clean up
* and users have reported disk corruption.
*/
delay(500000);
apm_set_powstate(NULL, APM_DEV_DISK(APM_DEV_ALLUNITS), APM_SYS_OFF);
delay(500000);
apm_set_powstate(NULL, APM_DEV_ALLDEVS, APM_SYS_OFF);
printf("WARNING: APM powerdown failed!\n");
/*
* RB_POWERDOWN implies RB_HALT... fall into it...
*/
#endif
}
if (howto & RB_HALT) {
printf("\n");
printf("The operating system has halted.\n");
printf("Please press any key to reboot.\n\n");
#ifdef BEEP_ONHALT
{
int c;
for (c = BEEP_ONHALT_COUNT; c > 0; c--) {
sysbeep(BEEP_ONHALT_PITCH,
BEEP_ONHALT_PERIOD * hz / 1000);
delay(BEEP_ONHALT_PERIOD * 1000);
sysbeep(0, BEEP_ONHALT_PERIOD * hz / 1000);
delay(BEEP_ONHALT_PERIOD * 1000);
}
}
#endif
cnpollc(1); /* for proper keyboard command handling */
if (cngetc() == 0) {
/* no console attached, so just hlt */
for(;;) {
__asm volatile("hlt");
}
}
cnpollc(0);
}
printf("rebooting...\n");
if (cpureset_delay > 0)
delay(cpureset_delay * 1000);
cpu_reset();
for(;;) ;
/*NOTREACHED*/
}
/*
* These variables are needed by /sbin/savecore
*/
uint32_t dumpmag = 0x8fca0101; /* magic number */
int dumpsize = 0; /* pages */
long dumplo = 0; /* blocks */
/*
* cpu_dumpsize: calculate size of machine-dependent kernel core dump headers.
*/
int
cpu_dumpsize()
{
int size;
size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) +
ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t));
if (roundup(size, dbtob(1)) != dbtob(1))
return (-1);
return (1);
}
/*
* cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped.
*/
u_long
cpu_dump_mempagecnt()
{
u_long i, n;
n = 0;
for (i = 0; i < mem_cluster_cnt; i++)
n += atop(mem_clusters[i].size);
return (n);
}
/*
* cpu_dump: dump the machine-dependent kernel core dump headers.
*/
int
cpu_dump()
{
int (*dump)(dev_t, daddr_t, caddr_t, size_t);
char bf[dbtob(1)];
kcore_seg_t *segp;
cpu_kcore_hdr_t *cpuhdrp;
phys_ram_seg_t *memsegp;
const struct bdevsw *bdev;
int i;
bdev = bdevsw_lookup(dumpdev);
if (bdev == NULL)
return (ENXIO);
dump = bdev->d_dump;
memset(bf, 0, sizeof bf);
segp = (kcore_seg_t *)bf;
cpuhdrp = (cpu_kcore_hdr_t *)&bf[ALIGN(sizeof(*segp))];
memsegp = (phys_ram_seg_t *)&bf[ ALIGN(sizeof(*segp)) +
ALIGN(sizeof(*cpuhdrp))];
/*
* Generate a segment header.
*/
CORE_SETMAGIC(*segp, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
segp->c_size = dbtob(1) - ALIGN(sizeof(*segp));
/*
* Add the machine-dependent header info.
*/
cpuhdrp->pdppaddr = PDPpaddr;
cpuhdrp->nmemsegs = mem_cluster_cnt;
/*
* Fill in the memory segment descriptors.
*/
for (i = 0; i < mem_cluster_cnt; i++) {
memsegp[i].start = mem_clusters[i].start;
memsegp[i].size = mem_clusters[i].size;
}
return (dump(dumpdev, dumplo, (caddr_t)bf, dbtob(1)));
}
/*
* This is called by main to set dumplo and dumpsize.
* Dumps always skip the first PAGE_SIZE of disk space
* in case there might be a disk label stored there.
* If there is extra space, put dump at the end to
* reduce the chance that swapping trashes it.
*/
void
cpu_dumpconf()
{
const struct bdevsw *bdev;
int nblks, dumpblks; /* size of dump area */
if (dumpdev == NODEV)
goto bad;
bdev = bdevsw_lookup(dumpdev);
if (bdev == NULL) {
dumpdev = NODEV;
goto bad;
}
if (bdev->d_psize == NULL)
goto bad;
nblks = (*bdev->d_psize)(dumpdev);
if (nblks <= ctod(1))
goto bad;
dumpblks = cpu_dumpsize();
if (dumpblks < 0)
goto bad;
dumpblks += ctod(cpu_dump_mempagecnt());
/* If dump won't fit (incl. room for possible label), punt. */
if (dumpblks > (nblks - ctod(1)))
goto bad;
/* Put dump at end of partition */
dumplo = nblks - dumpblks;
/* dumpsize is in page units, and doesn't include headers. */
dumpsize = cpu_dump_mempagecnt();
return;
bad:
dumpsize = 0;
}
/*
* Doadump comes here after turning off memory management and
* getting on the dump stack, either when called above, or by
* the auto-restart code.
*/
#define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small */
static vaddr_t dumpspace;
vaddr_t
reserve_dumppages(vaddr_t p)
{
dumpspace = p;
return (p + BYTES_PER_DUMP);
}
void
dumpsys()
{
u_long totalbytesleft, bytes, i, n, m, memseg;
u_long maddr;
int psize;
daddr_t blkno;
const struct bdevsw *bdev;
int (*dump)(dev_t, daddr_t, caddr_t, size_t);
int error;
/* Save registers. */
savectx(&dumppcb);
if (dumpdev == NODEV)
return;
bdev = bdevsw_lookup(dumpdev);
if (bdev == NULL || bdev->d_psize == NULL)
return;
/*
* For dumps during autoconfiguration,
* if dump device has already configured...
*/
if (dumpsize == 0)
cpu_dumpconf();
if (dumplo <= 0 || dumpsize == 0) {
printf("\ndump to dev %u,%u not possible\n", major(dumpdev),
minor(dumpdev));
return;
}
printf("\ndumping to dev %u,%u offset %ld\n", major(dumpdev),
minor(dumpdev), dumplo);
psize = (*bdev->d_psize)(dumpdev);
printf("dump ");
if (psize == -1) {
printf("area unavailable\n");
return;
}
#if 0 /* XXX this doesn't work. grr. */
/* toss any characters present prior to dump */
while (sget() != NULL); /*syscons and pccons differ */
#endif
if ((error = cpu_dump()) != 0)
goto err;
totalbytesleft = ptoa(cpu_dump_mempagecnt());
blkno = dumplo + cpu_dumpsize();
dump = bdev->d_dump;
error = 0;
for (memseg = 0; memseg < mem_cluster_cnt; memseg++) {
maddr = mem_clusters[memseg].start;
bytes = mem_clusters[memseg].size;
for (i = 0; i < bytes; i += n, totalbytesleft -= n) {
/* Print out how many MBs we have left to go. */
if ((totalbytesleft % (1024*1024)) == 0)
printf("%ld ", totalbytesleft / (1024 * 1024));
/* Limit size for next transfer. */
n = bytes - i;
if (n > BYTES_PER_DUMP)
n = BYTES_PER_DUMP;
for (m = 0; m < n; m += NBPG)
pmap_kenter_pa(dumpspace + m, maddr + m,
VM_PROT_READ);
error = (*dump)(dumpdev, blkno, (caddr_t)dumpspace, n);
if (error)
goto err;
maddr += n;
blkno += btodb(n); /* XXX? */
#if 0 /* XXX this doesn't work. grr. */
/* operator aborting dump? */
if (sget() != NULL) {
error = EINTR;
break;
}
#endif
}
}
err:
switch (error) {
case ENXIO:
printf("device bad\n");
break;
case EFAULT:
printf("device not ready\n");
break;
case EINVAL:
printf("area improper\n");
break;
case EIO:
printf("i/o error\n");
break;
case EINTR:
printf("aborted from console\n");
break;
case 0:
printf("succeeded\n");
break;
default:
printf("error %d\n", error);
break;
}
printf("\n\n");
delay(5000000); /* 5 seconds */
}
/*
* Clear registers on exec
*/
void
setregs(struct lwp *l, struct exec_package *pack, u_long stack)
{
struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
struct pcb *pcb = &l->l_addr->u_pcb;
struct trapframe *tf;
#if NNPX > 0
/* If we were using the FPU, forget about it. */
if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
npxsave_lwp(l, 0);
#endif
#ifdef USER_LDT
pmap_ldt_cleanup(l);
#endif
l->l_md.md_flags &= ~MDL_USEDFPU;
if (i386_use_fxsave) {
pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __NetBSD_NPXCW__;
pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
} else
pcb->pcb_savefpu.sv_87.sv_env.en_cw = __NetBSD_NPXCW__;
tf = l->l_md.md_regs;
tf->tf_gs = LSEL(LUDATA_SEL, SEL_UPL);
tf->tf_fs = LSEL(LUDATA_SEL, SEL_UPL);
tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
tf->tf_edi = 0;
tf->tf_esi = 0;
tf->tf_ebp = 0;
tf->tf_ebx = (int)l->l_proc->p_psstr;
tf->tf_edx = 0;
tf->tf_ecx = 0;
tf->tf_eax = 0;
tf->tf_eip = pack->ep_entry;
tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
LSEL(LUCODEBIG_SEL, SEL_UPL) : LSEL(LUCODE_SEL, SEL_UPL);
tf->tf_eflags = PSL_USERSET;
tf->tf_esp = stack;
tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
}
/*
* Initialize segments and descriptor tables
*/
union descriptor *gdt, *ldt;
struct gate_descriptor *idt;
char idt_allocmap[NIDT];
struct simplelock idt_lock = SIMPLELOCK_INITIALIZER;
#ifdef I586_CPU
union descriptor *pentium_idt;
#endif
struct user *proc0paddr;
extern vaddr_t proc0uarea;
void
setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl,
int sel)
{
gd->gd_looffset = (int)func;
gd->gd_selector = sel;
gd->gd_stkcpy = args;
gd->gd_xx = 0;
gd->gd_type = type;
gd->gd_dpl = dpl;
gd->gd_p = 1;
gd->gd_hioffset = (int)func >> 16;
}
void
unsetgate(struct gate_descriptor *gd)
{
gd->gd_p = 0;
gd->gd_hioffset = 0;
gd->gd_looffset = 0;
gd->gd_selector = 0;
gd->gd_xx = 0;
gd->gd_stkcpy = 0;
gd->gd_type = 0;
gd->gd_dpl = 0;
}
void
setregion(struct region_descriptor *rd, void *base, size_t limit)
{
rd->rd_limit = (int)limit;
rd->rd_base = (int)base;
}
void
setsegment(struct segment_descriptor *sd, void *base, size_t limit, int type,
int dpl, int def32, int gran)
{
sd->sd_lolimit = (int)limit;
sd->sd_lobase = (int)base;
sd->sd_type = type;
sd->sd_dpl = dpl;
sd->sd_p = 1;
sd->sd_hilimit = (int)limit >> 16;
sd->sd_xx = 0;
sd->sd_def32 = def32;
sd->sd_gran = gran;
sd->sd_hibase = (int)base >> 24;
}
#define IDTVEC(name) __CONCAT(X, name)
typedef void (vector)(void);
extern vector IDTVEC(syscall);
extern vector IDTVEC(osyscall);
extern vector *IDTVEC(exceptions)[];
#ifdef COMPAT_SVR4
extern vector IDTVEC(svr4_fasttrap);
#endif /* COMPAT_SVR4 */
#ifdef COMPAT_MACH
extern vector IDTVEC(mach_trap);
#endif
#define KBTOB(x) ((size_t)(x) * 1024UL)
void cpu_init_idt()
{
struct region_descriptor region;
#ifdef I586_CPU
setregion(®ion, pentium_idt, NIDT * sizeof(idt[0]) - 1);
#else
setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1);
#endif
lidt(®ion);
}
void
add_mem_cluster(uint64_t seg_start, uint64_t seg_end, uint32_t type)
{
extern struct extent *iomem_ex;
int i;
if (seg_end > 0x100000000ULL) {
printf("WARNING: skipping large "
"memory map entry: "
"0x%qx/0x%qx/0x%x\n",
seg_start,
(seg_end - seg_start),
type);
return;
}
/*
* XXX Chop the last page off the size so that
* XXX it can fit in avail_end.
*/
if (seg_end == 0x100000000ULL)
seg_end -= PAGE_SIZE;
if (seg_end <= seg_start)
return;
for (i = 0; i < mem_cluster_cnt; i++) {
if ((mem_clusters[i].start == round_page(seg_start))
&& (mem_clusters[i].size
== trunc_page(seg_end) - mem_clusters[i].start)) {
#ifdef DEBUG_MEMLOAD
printf("WARNING: skipping duplicate segment entry\n");
#endif
return;
}
}
/*
* Allocate the physical addresses used by RAM
* from the iomem extent map. This is done before
* the addresses are page rounded just to make
* sure we get them all.
*/
if (extent_alloc_region(iomem_ex, seg_start,
seg_end - seg_start, EX_NOWAIT)) {
/* XXX What should we do? */
printf("WARNING: CAN'T ALLOCATE "
"MEMORY SEGMENT "
"(0x%qx/0x%qx/0x%x) FROM "
"IOMEM EXTENT MAP!\n",
seg_start, seg_end - seg_start, type);
return;
}
/*
* If it's not free memory, skip it.
*/
if (type != BIM_Memory)
return;
/* XXX XXX XXX */
if (mem_cluster_cnt >= VM_PHYSSEG_MAX)
panic("init386: too many memory segments "
"(increase VM_PHYSSEG_MAX)");
seg_start = round_page(seg_start);
seg_end = trunc_page(seg_end);
if (seg_start == seg_end)
return;
mem_clusters[mem_cluster_cnt].start = seg_start;
mem_clusters[mem_cluster_cnt].size =
seg_end - seg_start;
if (avail_end < seg_end)
avail_end = seg_end;
physmem += atop(mem_clusters[mem_cluster_cnt].size);
mem_cluster_cnt++;
}
void
initgdt(union descriptor *tgdt)
{
struct region_descriptor region;
gdt = tgdt;
memset(gdt, 0, NGDT*sizeof(*gdt));
/* make gdt gates and memory segments */
setsegment(&gdt[GCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 1, 1);
setsegment(&gdt[GDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 1);
setsegment(&gdt[GUCODE_SEL].sd, 0, x86_btop(I386_MAX_EXE_ADDR) - 1,
SDT_MEMERA, SEL_UPL, 1, 1);
setsegment(&gdt[GUCODEBIG_SEL].sd, 0, x86_btop(VM_MAXUSER_ADDRESS) - 1,
SDT_MEMERA, SEL_UPL, 1, 1);
setsegment(&gdt[GUDATA_SEL].sd, 0, x86_btop(VM_MAXUSER_ADDRESS) - 1,
SDT_MEMRWA, SEL_UPL, 1, 1);
#ifdef COMPAT_MACH
setgate(&gdt[GMACHCALLS_SEL].gd, &IDTVEC(mach_trap), 1,
SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
#endif
#if NBIOSCALL > 0
/* bios trampoline GDT entries */
setsegment(&gdt[GBIOSCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 0,
0);
setsegment(&gdt[GBIOSDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 0,
0);
#endif
setsegment(&gdt[GCPU_SEL].sd, &cpu_info_primary,
sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 1, 1);
setregion(®ion, gdt, NGDT * sizeof(gdt[0]) - 1);
lgdt(®ion);
}
void
init386(paddr_t first_avail)
{
union descriptor *tgdt;
extern void consinit(void);
extern struct extent *iomem_ex;
struct btinfo_memmap *bim;
struct region_descriptor region;
int x, first16q;
uint64_t seg_start, seg_end;
uint64_t seg_start1, seg_end1;
paddr_t realmode_reserved_start;
psize_t realmode_reserved_size;
int needs_earlier_install_pte0;
#if NBIOSCALL > 0
extern int biostramp_image_size;
extern u_char biostramp_image[];
#endif
cpu_probe_features(&cpu_info_primary);
cpu_feature = cpu_info_primary.ci_feature_flags;
cpu_feature2 = cpu_info_primary.ci_feature2_flags;
proc0paddr = UAREA_TO_USER(proc0uarea);
lwp0.l_addr = proc0paddr;
cpu_info_primary.ci_curpcb = &lwp0.l_addr->u_pcb;
x86_bus_space_init();
consinit(); /* XXX SHOULD NOT BE DONE HERE */
/*
* Initailize PAGE_SIZE-dependent variables.
*/
uvm_setpagesize();
/*
* Saving SSE registers won't work if the save area isn't
* 16-byte aligned.
*/
if (offsetof(struct user, u_pcb.pcb_savefpu) & 0xf)
panic("init386: pcb_savefpu not 16-byte aligned");
/*
* Start with 2 color bins -- this is just a guess to get us
* started. We'll recolor when we determine the largest cache
* sizes on the system.
*/
uvmexp.ncolors = 2;
/*
* BIOS leaves data in physical page 0
* Even if it didn't, our VM system doesn't like using zero as a
* physical page number.
* We may also need pages in low memory (one each) for secondary CPU
* startup, for BIOS calls, and for ACPI, plus a page table page to map
* them into the first few pages of the kernel's pmap.
*/
avail_start = PAGE_SIZE;
/*
* reserve memory for real-mode call
*/
needs_earlier_install_pte0 = 0;
realmode_reserved_start = 0;
realmode_reserved_size = 0;
#if NBIOSCALL > 0
/* save us a page for trampoline code */
realmode_reserved_size += PAGE_SIZE;
needs_earlier_install_pte0 = 1;
#endif
#ifdef MULTIPROCESSOR /* XXX */
KASSERT(avail_start == PAGE_SIZE); /* XXX */
if (realmode_reserved_size < MP_TRAMPOLINE) /* XXX */
realmode_reserved_size = MP_TRAMPOLINE; /* XXX */
needs_earlier_install_pte0 = 1; /* XXX */
#endif /* XXX */
#if NACPI > 0
/* trampoline code for wake handler */
realmode_reserved_size += ptoa(acpi_md_get_npages_of_wakecode()+1);
needs_earlier_install_pte0 = 1;
#endif
if (needs_earlier_install_pte0) {
/* page table for directory entry 0 */
realmode_reserved_size += PAGE_SIZE;
}
if (realmode_reserved_size>0) {
realmode_reserved_start = avail_start;
avail_start += realmode_reserved_size;
}
#ifdef DEBUG_MEMLOAD
printf("mem_cluster_count: %d\n", mem_cluster_cnt);
#endif
/*
* Call pmap initialization to make new kernel address space.
* We must do this before loading pages into the VM system.
*/
pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE);
/*
* Check to see if we have a memory map from the BIOS (passed
* to us by the boot program.
*/
if ((biosmem_implicit || (biosbasemem == 0 && biosextmem == 0)) &&
(bim = lookup_bootinfo(BTINFO_MEMMAP)) != NULL && bim->num > 0) {
#ifdef DEBUG_MEMLOAD
printf("BIOS MEMORY MAP (%d ENTRIES):\n", bim->num);
#endif
for (x = 0; x < bim->num; x++) {
#ifdef DEBUG_MEMLOAD
printf(" addr 0x%qx size 0x%qx type 0x%x\n",
bim->entry[x].addr,
bim->entry[x].size,
bim->entry[x].type);
#endif
/*
* If the segment is not memory, skip it.
*/
switch (bim->entry[x].type) {
case BIM_Memory:
case BIM_ACPI:
case BIM_NVS:
break;
default:
continue;
}
/*
* Sanity check the entry.
* XXX Need to handle uint64_t in extent code
* XXX and 64-bit physical addresses in i386
* XXX port.
*/
seg_start = bim->entry[x].addr;
seg_end = bim->entry[x].addr + bim->entry[x].size;
/*
* Avoid Compatibility Holes.
* XXX Holes within memory space that allow access
* XXX to be directed to the PC-compatible frame buffer
* XXX (0xa0000-0xbffff),to adapter ROM space
* XXX (0xc0000-0xdffff), and to system BIOS space
* XXX (0xe0000-0xfffff).
* XXX Some laptop(for example,Toshiba Satellite2550X)
* XXX report this area and occurred problems,
* XXX so we avoid this area.
*/
if (seg_start < 0x100000 && seg_end > 0xa0000) {
printf("WARNING: memory map entry overlaps "
"with ``Compatibility Holes'': "
"0x%qx/0x%qx/0x%x\n", seg_start,
seg_end - seg_start, bim->entry[x].type);
add_mem_cluster(seg_start, 0xa0000,
bim->entry[x].type);
add_mem_cluster(0x100000, seg_end,
bim->entry[x].type);
} else
add_mem_cluster(seg_start, seg_end,
bim->entry[x].type);
}
}
/*
* If the loop above didn't find any valid segment, fall back to
* former code.
*/
if (mem_cluster_cnt == 0) {
/*
* Allocate the physical addresses used by RAM from the iomem
* extent map. This is done before the addresses are
* page rounded just to make sure we get them all.
*/
if (extent_alloc_region(iomem_ex, 0, KBTOB(biosbasemem),
EX_NOWAIT)) {
/* XXX What should we do? */
printf("WARNING: CAN'T ALLOCATE BASE MEMORY FROM "
"IOMEM EXTENT MAP!\n");
}
mem_clusters[0].start = 0;
mem_clusters[0].size = trunc_page(KBTOB(biosbasemem));
physmem += atop(mem_clusters[0].size);
if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem),
EX_NOWAIT)) {
/* XXX What should we do? */
printf("WARNING: CAN'T ALLOCATE EXTENDED MEMORY FROM "
"IOMEM EXTENT MAP!\n");
}
#if NISADMA > 0
/*
* Some motherboards/BIOSes remap the 384K of RAM that would
* normally be covered by the ISA hole to the end of memory
* so that it can be used. However, on a 16M system, this
* would cause bounce buffers to be allocated and used.
* This is not desirable behaviour, as more than 384K of
* bounce buffers might be allocated. As a work-around,
* we round memory down to the nearest 1M boundary if
* we're using any isadma devices and the remapped memory
* is what puts us over 16M.
*/
if (biosextmem > (15*1024) && biosextmem < (16*1024)) {
char pbuf[9];
format_bytes(pbuf, sizeof(pbuf),
biosextmem - (15*1024));
printf("Warning: ignoring %s of remapped memory\n",
pbuf);
biosextmem = (15*1024);
}
#endif
mem_clusters[1].start = IOM_END;
mem_clusters[1].size = trunc_page(KBTOB(biosextmem));
physmem += atop(mem_clusters[1].size);
mem_cluster_cnt = 2;
avail_end = IOM_END + trunc_page(KBTOB(biosextmem));
}
/*
* If we have 16M of RAM or less, just put it all on
* the default free list. Otherwise, put the first
* 16M of RAM on a lower priority free list (so that
* all of the ISA DMA'able memory won't be eaten up
* first-off).
*/
if (avail_end <= (16 * 1024 * 1024))
first16q = VM_FREELIST_DEFAULT;
else
first16q = VM_FREELIST_FIRST16;
/* Make sure the end of the space used by the kernel is rounded. */
first_avail = round_page(first_avail);
/*
* Now, load the memory clusters (which have already been
* rounded and truncated) into the VM system.
*
* NOTE: WE ASSUME THAT MEMORY STARTS AT 0 AND THAT THE KERNEL
* IS LOADED AT IOM_END (1M).
*/
for (x = 0; x < mem_cluster_cnt; x++) {
seg_start = mem_clusters[x].start;
seg_end = mem_clusters[x].start + mem_clusters[x].size;
seg_start1 = 0;
seg_end1 = 0;
/*
* Skip memory before our available starting point.
*/
if (seg_end <= avail_start)
continue;
if (avail_start >= seg_start && avail_start < seg_end) {
if (seg_start != 0)
panic("init386: memory doesn't start at 0");
seg_start = avail_start;
if (seg_start == seg_end)
continue;
}
/*
* If this segment contains the kernel, split it
* in two, around the kernel.
*/
if (seg_start <= IOM_END && first_avail <= seg_end) {
seg_start1 = first_avail;
seg_end1 = seg_end;
seg_end = IOM_END;
}
/* First hunk */
if (seg_start != seg_end) {
if (seg_start < (16 * 1024 * 1024) &&
first16q != VM_FREELIST_DEFAULT) {
uint64_t tmp;
if (seg_end > (16 * 1024 * 1024))
tmp = (16 * 1024 * 1024);
else
tmp = seg_end;
if (tmp != seg_start) {
#ifdef DEBUG_MEMLOAD
printf("loading 0x%qx-0x%qx "
"(0x%lx-0x%lx)\n",
seg_start, tmp,
atop(seg_start), atop(tmp));
#endif
uvm_page_physload(atop(seg_start),
atop(tmp), atop(seg_start),
atop(tmp), first16q);
}
seg_start = tmp;
}
if (seg_start != seg_end) {
#ifdef DEBUG_MEMLOAD
printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
seg_start, seg_end,
atop(seg_start), atop(seg_end));
#endif
uvm_page_physload(atop(seg_start),
atop(seg_end), atop(seg_start),
atop(seg_end), VM_FREELIST_DEFAULT);
}
}
/* Second hunk */
if (seg_start1 != seg_end1) {
if (seg_start1 < (16 * 1024 * 1024) &&
first16q != VM_FREELIST_DEFAULT) {
uint64_t tmp;
if (seg_end1 > (16 * 1024 * 1024))
tmp = (16 * 1024 * 1024);
else
tmp = seg_end1;
if (tmp != seg_start1) {
#ifdef DEBUG_MEMLOAD
printf("loading 0x%qx-0x%qx "
"(0x%lx-0x%lx)\n",
seg_start1, tmp,
atop(seg_start1), atop(tmp));
#endif
uvm_page_physload(atop(seg_start1),
atop(tmp), atop(seg_start1),
atop(tmp), first16q);
}
seg_start1 = tmp;
}
if (seg_start1 != seg_end1) {
#ifdef DEBUG_MEMLOAD
printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
seg_start1, seg_end1,
atop(seg_start1), atop(seg_end1));
#endif
uvm_page_physload(atop(seg_start1),
atop(seg_end1), atop(seg_start1),
atop(seg_end1), VM_FREELIST_DEFAULT);
}
}
}
/*
* Steal memory for the message buffer (at end of core).
*/
{
struct vm_physseg *vps;
psize_t sz = round_page(MSGBUFSIZE);
psize_t reqsz = sz;
search_again:
for (x = 0; x < vm_nphysseg; x++) {
vps = &vm_physmem[x];
if (ptoa(vps->avail_end) == avail_end)
goto found;
}
panic("init386: can't find end of memory");
found:
/* Shrink so it'll fit in the last segment. */
if ((vps->avail_end - vps->avail_start) < atop(sz))
sz = ptoa(vps->avail_end - vps->avail_start);
vps->avail_end -= atop(sz);
vps->end -= atop(sz);
msgbuf_p_seg[msgbuf_p_cnt].sz = sz;
msgbuf_p_seg[msgbuf_p_cnt++].paddr = ptoa(vps->avail_end);
/* Remove the last segment if it now has no pages. */
if (vps->start == vps->end) {
for (vm_nphysseg--; x < vm_nphysseg; x++)
vm_physmem[x] = vm_physmem[x + 1];
}
/* Now find where the new avail_end is. */
for (avail_end = 0, x = 0; x < vm_nphysseg; x++)
if (vm_physmem[x].avail_end > avail_end)
avail_end = vm_physmem[x].avail_end;
avail_end = ptoa(avail_end);
if (sz != reqsz) {
reqsz -= sz;
if (msgbuf_p_cnt != VM_PHYSSEG_MAX) {
/* if still segments available, get memory from next one ... */
sz = reqsz;
goto search_again;
}
/* Warn if the message buffer had to be shrunk. */
printf("WARNING: %ld bytes not available for msgbuf "
"in last cluster (%ld used)\n", (long)MSGBUFSIZE, MSGBUFSIZE - reqsz);
}
}
/*
* install PT page for the first 4M if needed.
*/
if (needs_earlier_install_pte0) {
paddr_t paddr;
#ifdef DIAGNOSTIC
if (realmode_reserved_size < PAGE_SIZE) {
panic("cannot steal memory for first 4M PT page.");
}
#endif
paddr=realmode_reserved_start+realmode_reserved_size-PAGE_SIZE;
pmap_kenter_pa((vaddr_t)vtopte(0), paddr,
VM_PROT_READ|VM_PROT_WRITE);
pmap_update(pmap_kernel());
/* make sure it is clean before using */
memset(vtopte(0), 0, PAGE_SIZE);
realmode_reserved_size -= PAGE_SIZE;
}
#if NBIOSCALL > 0
/*
* this should be caught at kernel build time, but put it here
* in case someone tries to fake it out...
*/
#ifdef DIAGNOSTIC
if (realmode_reserved_start > BIOSTRAMP_BASE ||
(realmode_reserved_start+realmode_reserved_size) < (BIOSTRAMP_BASE+
PAGE_SIZE)) {
panic("cannot steal memory for PT page of bioscall.");
}
if (biostramp_image_size > PAGE_SIZE)
panic("biostramp_image_size too big: %x vs. %x",
biostramp_image_size, PAGE_SIZE);
#endif
pmap_kenter_pa((vaddr_t)BIOSTRAMP_BASE, /* virtual */
(paddr_t)BIOSTRAMP_BASE, /* physical */
VM_PROT_ALL); /* protection */
pmap_update(pmap_kernel());
memcpy((caddr_t)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size);
#ifdef DEBUG_BIOSCALL
printf("biostramp installed @ %x\n", BIOSTRAMP_BASE);
#endif
realmode_reserved_size -= PAGE_SIZE;
realmode_reserved_start += PAGE_SIZE;
#endif
#if NACPI > 0
/*
* Steal memory for the acpi wake code
*/
{
paddr_t paddr, p;
psize_t sz;
int npg;
paddr = realmode_reserved_start;
npg = acpi_md_get_npages_of_wakecode();
sz = ptoa(npg);
#ifdef DIAGNOSTIC
if (realmode_reserved_size < sz) {
panic("cannot steal memory for ACPI wake code.");
}
#endif
/* identical mapping */
p = paddr;
for (x=0; x<npg; x++) {
printf("kenter: 0x%08X\n", (unsigned)p);
pmap_kenter_pa((vaddr_t)p, p, VM_PROT_ALL);
p += PAGE_SIZE;
}
pmap_update(pmap_kernel());
acpi_md_install_wakecode(paddr);
realmode_reserved_size -= sz;
realmode_reserved_start += sz;
}
#endif
pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE);
pmap_update(pmap_kernel());
memset((void *)idt_vaddr, 0, PAGE_SIZE);
idt = (struct gate_descriptor *)idt_vaddr;
#ifdef I586_CPU
pmap_kenter_pa(pentium_idt_vaddr, idt_paddr, VM_PROT_READ);
pentium_idt = (union descriptor *)pentium_idt_vaddr;
#endif
pmap_update(pmap_kernel());
tgdt = gdt;
gdt = (union descriptor *)
((char *)idt + NIDT * sizeof (struct gate_descriptor));
ldt = gdt + NGDT;
memcpy(gdt, tgdt, NGDT*sizeof(*gdt));
setsegment(&gdt[GLDT_SEL].sd, ldt, NLDT * sizeof(ldt[0]) - 1,
SDT_SYSLDT, SEL_KPL, 0, 0);
/* make ldt gates and memory segments */
setgate(&ldt[LSYS5CALLS_SEL].gd, &IDTVEC(osyscall), 1,
SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
ldt[LUCODE_SEL] = gdt[GUCODE_SEL];
ldt[LUCODEBIG_SEL] = gdt[GUCODEBIG_SEL];
ldt[LUDATA_SEL] = gdt[GUDATA_SEL];
ldt[LSOL26CALLS_SEL] = ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
/* exceptions */
for (x = 0; x < 32; x++) {
setgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386TGT,
(x == 3 || x == 4) ? SEL_UPL : SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
idt_allocmap[x] = 1;
}
/* new-style interrupt gate for syscalls */
setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386TGT, SEL_UPL,
GSEL(GCODE_SEL, SEL_KPL));
idt_allocmap[128] = 1;
#ifdef COMPAT_SVR4
setgate(&idt[0xd2], &IDTVEC(svr4_fasttrap), 0, SDT_SYS386TGT,
SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
idt_allocmap[0xd2] = 1;
#endif /* COMPAT_SVR4 */
setregion(®ion, gdt, NGDT * sizeof(gdt[0]) - 1);
lgdt(®ion);
cpu_init_idt();
#if NKSYMS || defined(DDB) || defined(LKM)
{
extern int end;
boolean_t loaded;
struct btinfo_symtab *symtab;
#ifdef DDB
db_machine_init();
#endif
#if defined(MULTIBOOT)
loaded = multiboot_ksyms_init();
#else
loaded = FALSE;
#endif
if (!loaded) {
symtab = lookup_bootinfo(BTINFO_SYMTAB);
if (symtab) {
symtab->ssym += KERNBASE;
symtab->esym += KERNBASE;
ksyms_init(symtab->nsym, (int *)symtab->ssym,
(int *)symtab->esym);
} else
ksyms_init(*(int *)&end, ((int *)&end) + 1, esym);
}
}
#endif
#ifdef DDB
if (boothowto & RB_KDB)
Debugger();
#endif
#ifdef IPKDB
ipkdb_init();
if (boothowto & RB_KDB)
ipkdb_connect(0);
#endif
#ifdef KGDB
kgdb_port_init();
if (boothowto & RB_KDB) {
kgdb_debug_init = 1;
kgdb_connect(1);
}
#endif
#if NMCA > 0
/* check for MCA bus, needed to be done before ISA stuff - if
* MCA is detected, ISA needs to use level triggered interrupts
* by default */
mca_busprobe();
#endif
intr_default_setup();
/* Initialize software interrupts. */
softintr_init();
splraise(IPL_IPI);
enable_intr();
if (physmem < btoc(2 * 1024 * 1024)) {
printf("warning: too little memory available; "
"have %lu bytes, want %lu bytes\n"
"running in degraded mode\n"
"press a key to confirm\n\n",
ptoa(physmem), 2*1024*1024UL);
cngetc();
}
#ifdef __HAVE_CPU_MAXPROC
/* Make sure maxproc is sane */
if (maxproc > cpu_maxproc())
maxproc = cpu_maxproc();
#endif
}
#ifdef COMPAT_NOMID
static int
exec_nomid(struct lwp *l, struct exec_package *epp)
{
int error;
u_long midmag, magic;
u_short mid;
struct exec *execp = epp->ep_hdr;
/* check on validity of epp->ep_hdr performed by exec_out_makecmds */
midmag = ntohl(execp->a_midmag);
mid = (midmag >> 16) & 0xffff;
magic = midmag & 0xffff;
if (magic == 0) {
magic = (execp->a_midmag & 0xffff);
mid = MID_ZERO;
}
midmag = mid << 16 | magic;
switch (midmag) {
case (MID_ZERO << 16) | ZMAGIC:
/*
* 386BSD's ZMAGIC format:
*/
error = exec_aout_prep_oldzmagic(l, epp);
break;
case (MID_ZERO << 16) | QMAGIC:
/*
* BSDI's QMAGIC format:
* same as new ZMAGIC format, but with different magic number
*/
error = exec_aout_prep_zmagic(l, epp);
break;
case (MID_ZERO << 16) | NMAGIC:
/*
* BSDI's NMAGIC format:
* same as NMAGIC format, but with different magic number
* and with text starting at 0.
*/
error = exec_aout_prep_oldnmagic(l, epp);
break;
case (MID_ZERO << 16) | OMAGIC:
/*
* BSDI's OMAGIC format:
* same as OMAGIC format, but with different magic number
* and with text starting at 0.
*/
error = exec_aout_prep_oldomagic(l, epp);
break;
default:
error = ENOEXEC;
}
return error;
}
#endif
/*
* cpu_exec_aout_makecmds():
* CPU-dependent a.out format hook for execve().
*
* Determine of the given exec package refers to something which we
* understand and, if so, set up the vmcmds for it.
*
* On the i386, old (386bsd) ZMAGIC binaries and BSDI QMAGIC binaries
* if COMPAT_NOMID is given as a kernel option.
*/
int
cpu_exec_aout_makecmds(struct lwp *l, struct exec_package *epp)
{
int error = ENOEXEC;
#ifdef COMPAT_NOMID
if ((error = exec_nomid(l, epp)) == 0)
return error;
#else
(void) l;
(void) epp;
#endif /* ! COMPAT_NOMID */
return error;
}
#include <dev/ic/mc146818reg.h> /* for NVRAM POST */
#include <i386/isa/nvram.h> /* for NVRAM POST */
void
cpu_reset()
{
struct region_descriptor region;
disable_intr();
/*
* Ensure the NVRAM reset byte contains something vaguely sane.
*/
outb(IO_RTC, NVRAM_RESET);
outb(IO_RTC+1, NVRAM_RESET_RST);
/*
* Reset AMD Geode SC1100.
*
* 1) Write PCI Configuration Address Register (0xcf8) to
* select Function 0, Register 0x44: Bridge Configuration,
* GPIO and LPC Configuration Register Space, Reset
* Control Register.
*
* 2) Write 0xf to PCI Configuration Data Register (0xcfc)
* to reset IDE controller, IDE bus, and PCI bus, and
* to trigger a system-wide reset.
*
* See AMD Geode SC1100 Processor Data Book, Revision 2.0,
* sections 6.3.1, 6.3.2, and 6.4.1.
*/
if (cpu_info_primary.ci_signature == 0x540) {
outl(0xcf8, 0x80009044ul);
outl(0xcfc, 0xf);
}
/*
* The keyboard controller has 4 random output pins, one of which is
* connected to the RESET pin on the CPU in many PCs. We tell the
* keyboard controller to pulse this line a couple of times.
*/
outb(IO_KBD + KBCMDP, KBC_PULSE0);
delay(100000);
outb(IO_KBD + KBCMDP, KBC_PULSE0);
delay(100000);
/*
* Try to cause a triple fault and watchdog reset by making the IDT
* invalid and causing a fault.
*/
memset((caddr_t)idt, 0, NIDT * sizeof(idt[0]));
setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1);
lidt(®ion);
__asm volatile("divl %0,%1" : : "q" (0), "a" (0));
#if 0
/*
* Try to cause a triple fault and watchdog reset by unmapping the
* entire address space and doing a TLB flush.
*/
memset((caddr_t)PTD, 0, PAGE_SIZE);
tlbflush();
#endif
for (;;);
}
void
cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
{
const struct trapframe *tf = l->l_md.md_regs;
__greg_t *gr = mcp->__gregs;
__greg_t ras_eip;
/* Save register context. */
#ifdef VM86
if (tf->tf_eflags & PSL_VM) {
gr[_REG_GS] = tf->tf_vm86_gs;
gr[_REG_FS] = tf->tf_vm86_fs;
gr[_REG_ES] = tf->tf_vm86_es;
gr[_REG_DS] = tf->tf_vm86_ds;
gr[_REG_EFL] = get_vflags(l);
} else
#endif
{
gr[_REG_GS] = tf->tf_gs;
gr[_REG_FS] = tf->tf_fs;
gr[_REG_ES] = tf->tf_es;
gr[_REG_DS] = tf->tf_ds;
gr[_REG_EFL] = tf->tf_eflags;
}
gr[_REG_EDI] = tf->tf_edi;
gr[_REG_ESI] = tf->tf_esi;
gr[_REG_EBP] = tf->tf_ebp;
gr[_REG_EBX] = tf->tf_ebx;
gr[_REG_EDX] = tf->tf_edx;
gr[_REG_ECX] = tf->tf_ecx;
gr[_REG_EAX] = tf->tf_eax;
gr[_REG_EIP] = tf->tf_eip;
gr[_REG_CS] = tf->tf_cs;
gr[_REG_ESP] = tf->tf_esp;
gr[_REG_UESP] = tf->tf_esp;
gr[_REG_SS] = tf->tf_ss;
gr[_REG_TRAPNO] = tf->tf_trapno;
gr[_REG_ERR] = tf->tf_err;
if ((ras_eip = (__greg_t)ras_lookup(l->l_proc,
(caddr_t) gr[_REG_EIP])) != -1)
gr[_REG_EIP] = ras_eip;
*flags |= _UC_CPU;
/* Save floating point register context, if any. */
if ((l->l_md.md_flags & MDL_USEDFPU) != 0) {
#if NNPX > 0
/*
* If this process is the current FP owner, dump its
* context to the PCB first.
* XXX npxsave() also clears the FPU state; depending on the
* XXX application this might be a penalty.
*/
if (l->l_addr->u_pcb.pcb_fpcpu) {
npxsave_lwp(l, 1);
}
#endif
if (i386_use_fxsave) {
memcpy(&mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm,
&l->l_addr->u_pcb.pcb_savefpu.sv_xmm,
sizeof (mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm));
*flags |= _UC_FXSAVE;
} else {
memcpy(&mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state,
&l->l_addr->u_pcb.pcb_savefpu.sv_87,
sizeof (mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state));
}
#if 0
/* Apparently nothing ever touches this. */
ucp->mcp.mc_fp.fp_emcsts = l->l_addr->u_pcb.pcb_saveemc;
#endif
*flags |= _UC_FPU;
}
}
int
cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
{
struct trapframe *tf = l->l_md.md_regs;
const __greg_t *gr = mcp->__gregs;
/* Restore register context, if any. */
if ((flags & _UC_CPU) != 0) {
#ifdef VM86
if (gr[_REG_EFL] & PSL_VM) {
tf->tf_vm86_gs = gr[_REG_GS];
tf->tf_vm86_fs = gr[_REG_FS];
tf->tf_vm86_es = gr[_REG_ES];
tf->tf_vm86_ds = gr[_REG_DS];
set_vflags(l, gr[_REG_EFL]);
if (flags & _UC_VM) {
void syscall_vm86(struct trapframe *);
l->l_proc->p_md.md_syscall = syscall_vm86;
}
} else
#endif
{
/*
* Check for security violations. If we're returning
* to protected mode, the CPU will validate the segment
* registers automatically and generate a trap on
* violations. We handle the trap, rather than doing
* all of the checking here.
*/
if (((gr[_REG_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) ||
!USERMODE(gr[_REG_CS], gr[_REG_EFL])) {
printf("cpu_setmcontext error: uc EFL: 0x%08x"
" tf EFL: 0x%08x uc CS: 0x%x\n",
gr[_REG_EFL], tf->tf_eflags, gr[_REG_CS]);
return (EINVAL);
}
tf->tf_gs = gr[_REG_GS];
tf->tf_fs = gr[_REG_FS];
tf->tf_es = gr[_REG_ES];
tf->tf_ds = gr[_REG_DS];
/* Only change the user-alterable part of eflags */
tf->tf_eflags &= ~PSL_USER;
tf->tf_eflags |= (gr[_REG_EFL] & PSL_USER);
}
tf->tf_edi = gr[_REG_EDI];
tf->tf_esi = gr[_REG_ESI];
tf->tf_ebp = gr[_REG_EBP];
tf->tf_ebx = gr[_REG_EBX];
tf->tf_edx = gr[_REG_EDX];
tf->tf_ecx = gr[_REG_ECX];
tf->tf_eax = gr[_REG_EAX];
tf->tf_eip = gr[_REG_EIP];
tf->tf_cs = gr[_REG_CS];
tf->tf_esp = gr[_REG_UESP];
tf->tf_ss = gr[_REG_SS];
}
/* Restore floating point register context, if any. */
if ((flags & _UC_FPU) != 0) {
#if NNPX > 0
/*
* If we were using the FPU, forget that we were.
*/
if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
npxsave_lwp(l, 0);
#endif
if (flags & _UC_FXSAVE) {
if (i386_use_fxsave) {
memcpy(
&l->l_addr->u_pcb.pcb_savefpu.sv_xmm,
&mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm,
sizeof (&l->l_addr->u_pcb.pcb_savefpu.sv_xmm));
} else {
/* This is a weird corner case */
process_xmm_to_s87((struct savexmm *)
&mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm,
&l->l_addr->u_pcb.pcb_savefpu.sv_87);
}
} else {
if (i386_use_fxsave) {
process_s87_to_xmm((struct save87 *)
&mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state,
&l->l_addr->u_pcb.pcb_savefpu.sv_xmm);
} else {
memcpy(&l->l_addr->u_pcb.pcb_savefpu.sv_87,
&mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state,
sizeof (l->l_addr->u_pcb.pcb_savefpu.sv_87));
}
}
/* If not set already. */
l->l_md.md_flags |= MDL_USEDFPU;
#if 0
/* Apparently unused. */
l->l_addr->u_pcb.pcb_saveemc = mcp->mc_fp.fp_emcsts;
#endif
}
if (flags & _UC_SETSTACK)
l->l_proc->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
if (flags & _UC_CLRSTACK)
l->l_proc->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
return (0);
}
void
cpu_initclocks()
{
(*initclock_func)();
}
#ifdef MULTIPROCESSOR
void
need_resched(struct cpu_info *ci)
{
if (ci->ci_want_resched)
return;
ci->ci_want_resched = 1;
if ((ci)->ci_curlwp != NULL)
aston((ci)->ci_curlwp->l_proc);
else if (ci != curcpu())
x86_send_ipi(ci, 0);
}
#endif
/*
* Allocate an IDT vector slot within the given range.
* XXX needs locking to avoid MP allocation races.
*/
int
idt_vec_alloc(int low, int high)
{
int vec;
simple_lock(&idt_lock);
for (vec = low; vec <= high; vec++) {
if (idt_allocmap[vec] == 0) {
idt_allocmap[vec] = 1;
simple_unlock(&idt_lock);
return vec;
}
}
simple_unlock(&idt_lock);
return 0;
}
void
idt_vec_set(int vec, void (*function)(void))
{
/*
* Vector should be allocated, so no locking needed.
*/
KASSERT(idt_allocmap[vec] == 1);
setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
}
void
idt_vec_free(int vec)
{
simple_lock(&idt_lock);
unsetgate(&idt[vec]);
idt_allocmap[vec] = 0;
simple_unlock(&idt_lock);
}
/*
* Number of processes is limited by number of available GDT slots.
*/
int
cpu_maxproc(void)
{
#ifdef USER_LDT
return ((MAXGDTSIZ - NGDT) / 2);
#else
return (MAXGDTSIZ - NGDT);
#endif
}