Annotation of src/sys/arch/amd64/amd64/locore.S, Revision 1.123.6.1
1.123.6.1! snj 1: /* $NetBSD: locore.S,v 1.123 2017/03/25 15:07:21 maxv Exp $ */
1.1 fvdl 2:
3: /*
4: * Copyright-o-rama!
5: */
6:
7: /*
1.84 maxv 8: * Copyright (c) 1998, 2000, 2007, 2008, 2016 The NetBSD Foundation, Inc.
9: * All rights reserved.
10: *
11: * This code is derived from software contributed to The NetBSD Foundation
1.100 maxv 12: * by Charles M. Hannum and by Maxime Villard.
1.84 maxv 13: *
14: * Redistribution and use in source and binary forms, with or without
15: * modification, are permitted provided that the following conditions
16: * are met:
17: * 1. Redistributions of source code must retain the above copyright
18: * notice, this list of conditions and the following disclaimer.
19: * 2. Redistributions in binary form must reproduce the above copyright
20: * notice, this list of conditions and the following disclaimer in the
21: * documentation and/or other materials provided with the distribution.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33: * POSSIBILITY OF SUCH DAMAGE.
34: */
35:
36: /*
1.32 bouyer 37: * Copyright (c) 2007 Manuel Bouyer.
38: *
39: * Redistribution and use in source and binary forms, with or without
40: * modification, are permitted provided that the following conditions
41: * are met:
42: * 1. Redistributions of source code must retain the above copyright
43: * notice, this list of conditions and the following disclaimer.
44: * 2. Redistributions in binary form must reproduce the above copyright
45: * notice, this list of conditions and the following disclaimer in the
46: * documentation and/or other materials provided with the distribution.
47: *
48: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
49: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
50: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
51: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
52: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
53: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
54: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
55: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
56: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
57: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58: *
59: */
60:
61: /*
62: * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
63: *
64: * Permission to use, copy, modify, and distribute this software for any
65: * purpose with or without fee is hereby granted, provided that the above
66: * copyright notice and this permission notice appear in all copies.
67: *
68: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
69: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
70: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
71: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
72: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
73: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
74: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
75: */
76:
77: /*
1.1 fvdl 78: * Copyright (c) 2001 Wasabi Systems, Inc.
79: * All rights reserved.
80: *
81: * Written by Frank van der Linden for Wasabi Systems, Inc.
82: *
83: * Redistribution and use in source and binary forms, with or without
84: * modification, are permitted provided that the following conditions
85: * are met:
86: * 1. Redistributions of source code must retain the above copyright
87: * notice, this list of conditions and the following disclaimer.
88: * 2. Redistributions in binary form must reproduce the above copyright
89: * notice, this list of conditions and the following disclaimer in the
90: * documentation and/or other materials provided with the distribution.
91: * 3. All advertising materials mentioning features or use of this software
92: * must display the following acknowledgement:
93: * This product includes software developed for the NetBSD Project by
94: * Wasabi Systems, Inc.
95: * 4. The name of Wasabi Systems, Inc. may not be used to endorse
96: * or promote products derived from this software without specific prior
97: * written permission.
98: *
99: * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
100: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
101: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
102: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
103: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
104: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
105: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
106: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
107: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
108: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
109: * POSSIBILITY OF SUCH DAMAGE.
110: */
111:
112: /*-
113: * Copyright (c) 1990 The Regents of the University of California.
114: * All rights reserved.
115: *
116: * This code is derived from software contributed to Berkeley by
117: * William Jolitz.
118: *
119: * Redistribution and use in source and binary forms, with or without
120: * modification, are permitted provided that the following conditions
121: * are met:
122: * 1. Redistributions of source code must retain the above copyright
123: * notice, this list of conditions and the following disclaimer.
124: * 2. Redistributions in binary form must reproduce the above copyright
125: * notice, this list of conditions and the following disclaimer in the
126: * documentation and/or other materials provided with the distribution.
1.5 agc 127: * 3. Neither the name of the University nor the names of its contributors
1.1 fvdl 128: * may be used to endorse or promote products derived from this software
129: * without specific prior written permission.
130: *
131: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
132: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
133: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
134: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
135: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
136: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
137: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
138: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
139: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
140: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
141: * SUCH DAMAGE.
142: *
143: * @(#)locore.s 7.3 (Berkeley) 5/13/91
144: */
145:
1.84 maxv 146: /* Override user-land alignment before including asm.h */
1.34 dsl 147: #define ALIGN_DATA .align 8
148: #define ALIGN_TEXT .align 16,0x90
149: #define _ALIGN_TEXT ALIGN_TEXT
150:
151: #include <machine/asm.h>
152:
1.78 uebayasi 153: #include "opt_copy_symtab.h"
1.1 fvdl 154: #include "opt_ddb.h"
155: #include "opt_ddbparam.h"
1.51 apb 156: #include "opt_modular.h"
1.1 fvdl 157: #include "opt_realmem.h"
158:
1.12 drochner 159: #include "opt_compat_netbsd.h"
160: #include "opt_compat_netbsd32.h"
161: #include "opt_compat_ibcs2.h"
1.32 bouyer 162: #include "opt_xen.h"
1.12 drochner 163:
1.1 fvdl 164: #include "assym.h"
165: #include "lapic.h"
166: #include "ioapic.h"
1.2 fvdl 167: #include "ksyms.h"
1.1 fvdl 168:
169: #include <sys/errno.h>
170: #include <sys/syscall.h>
171:
172: #include <machine/pte.h>
173: #include <machine/segments.h>
174: #include <machine/specialreg.h>
175: #include <machine/trap.h>
176: #include <machine/bootinfo.h>
177: #include <machine/frameasm.h>
1.44 ad 178: #include <machine/cputypes.h>
1.1 fvdl 179:
180: #if NLAPIC > 0
181: #include <machine/i82489reg.h>
182: #endif
183:
184: /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
185: #include <dev/isa/isareg.h>
186:
1.71 uebayasi 187: #define _RELOC(x) ((x) - KERNBASE)
188: #define RELOC(x) _RELOC(_C_LABEL(x))
189:
1.86 maxv 190: /* 32bit version of PG_NX */
191: #define PG_NX32 0x80000000
192:
1.83 maxv 193: #if L2_SLOT_KERNBASE > 0
194: #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
195: #else
196: #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
197: #endif
198:
199: #if L3_SLOT_KERNBASE > 0
200: #define TABLE_L3_ENTRIES (2 * NKL3_KIMG_ENTRIES)
201: #else
202: #define TABLE_L3_ENTRIES NKL3_KIMG_ENTRIES
203: #endif
204:
205: #define PROC0_PML4_OFF 0
1.97 maxv 206: #define PROC0_STK_OFF (PROC0_PML4_OFF + 1 * PAGE_SIZE)
207: #define PROC0_PTP3_OFF (PROC0_STK_OFF + UPAGES * PAGE_SIZE)
1.83 maxv 208: #define PROC0_PTP2_OFF (PROC0_PTP3_OFF + NKL4_KIMG_ENTRIES * PAGE_SIZE)
209: #define PROC0_PTP1_OFF (PROC0_PTP2_OFF + TABLE_L3_ENTRIES * PAGE_SIZE)
210: #define TABLESIZE \
1.97 maxv 211: ((NKL4_KIMG_ENTRIES + TABLE_L3_ENTRIES + TABLE_L2_ENTRIES + 1 + UPAGES) \
1.83 maxv 212: * PAGE_SIZE)
213:
1.121 maxv 214: /* Amount of VA used to map the kernel, the syms and the preloaded modules */
215: #define BOOTMAP_VA_SIZE \
216: (NKL2_KIMG_ENTRIES * (1 << L2_SHIFT) - TABLESIZE - IOM_SIZE)
217:
1.83 maxv 218: /*
219: * fillkpt - Fill in a kernel page table
220: * eax = pte (page frame | control | status)
221: * ebx = page table address
222: * ecx = number of pages to map
223: *
224: * Each entry is 8 (PDE_SIZE) bytes long: we must set the 4 upper bytes to 0.
225: */
226: #define fillkpt \
1.94 maxv 227: cmpl $0,%ecx ; /* zero-sized? */ \
228: je 2f ; \
1.91 maxv 229: 1: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: 0 */ \
230: movl %eax,(%ebx) ; /* store phys addr */ \
231: addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \
232: addl $PAGE_SIZE,%eax ; /* next phys page */ \
1.94 maxv 233: loop 1b ; \
234: 2: ;
1.83 maxv 235:
1.84 maxv 236: /*
1.89 maxv 237: * fillkpt_nox - Same as fillkpt, but sets the NX/XD bit.
238: */
239: #define fillkpt_nox \
1.94 maxv 240: cmpl $0,%ecx ; /* zero-sized? */ \
241: je 2f ; \
1.91 maxv 242: pushl %ebp ; \
243: movl RELOC(nox_flag),%ebp ; \
244: 1: movl %ebp,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: NX */ \
245: movl %eax,(%ebx) ; /* store phys addr */ \
1.89 maxv 246: addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \
247: addl $PAGE_SIZE,%eax ; /* next phys page */ \
1.91 maxv 248: loop 1b ; \
1.94 maxv 249: popl %ebp ; \
250: 2: ;
1.89 maxv 251:
252: /*
1.96 maxv 253: * fillkpt_blank - Fill in a kernel page table with blank entries
254: * ebx = page table address
255: * ecx = number of pages to map
256: */
257: #define fillkpt_blank \
258: cmpl $0,%ecx ; /* zero-sized? */ \
259: je 2f ; \
260: 1: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: 0 */ \
261: movl $0,(%ebx) ; /* lower 32 bits: 0 */ \
262: addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \
263: loop 1b ; \
264: 2: ;
265:
266: /*
1.84 maxv 267: * killkpt - Destroy a kernel page table (long mode)
268: * rbx = page table address
269: * rcx = number of pages to destroy
270: */
271: #define killkpt \
272: 1: movq $0,(%rbx) ; \
273: addq $PDE_SIZE,%rbx ; \
274: loop 1b ;
275:
1.83 maxv 276:
1.32 bouyer 277: #ifdef XEN
1.99 bouyer 278: #define __ASSEMBLY__
279: #include <xen/xen-public/elfnote.h>
280: #include <xen/xen-public/xen.h>
281: #define ELFNOTE(name, type, desctype, descdata...) \
282: .pushsection .note.name ; \
283: .align 4 ; \
284: .long 2f - 1f /* namesz */ ; \
285: .long 4f - 3f /* descsz */ ; \
286: .long type ; \
287: 1:.asciz #name ; \
288: 2:.align 4 ; \
289: 3:desctype descdata ; \
290: 4:.align 4 ; \
291: .popsection
292:
1.32 bouyer 293: /*
1.73 uebayasi 294: * Xen guest identifier and loader selection
1.32 bouyer 295: */
296: .section __xen_guest
1.99 bouyer 297: ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "NetBSD")
298: ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "4.99")
299: ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
300: ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad, KERNBASE)
301: ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, KERNBASE)
302: ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, start)
303: ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypercall_page)
304: ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .quad, HYPERVISOR_VIRT_START)
305: ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "")
306: ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes")
307: ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PG_V, PG_V)\
308: ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
309: ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 0)
1.49 ad 310: #if NKSYMS > 0 || defined(DDB) || defined(MODULAR)
1.99 bouyer 311: ELFNOTE(Xen, XEN_ELFNOTE_BSD_SYMTAB, .asciz, "yes")
1.32 bouyer 312: #endif
313: #endif /* XEN */
1.1 fvdl 314:
315: /*
316: * Initialization
317: */
318: .data
319:
1.98 maxv 320: .globl _C_LABEL(tablesize)
1.86 maxv 321: .globl _C_LABEL(nox_flag)
1.100 maxv 322: .globl _C_LABEL(cputype)
1.57 jym 323: .globl _C_LABEL(cpuid_level)
1.71 uebayasi 324: .globl _C_LABEL(esym)
325: .globl _C_LABEL(eblob)
1.100 maxv 326: .globl _C_LABEL(atdevbase)
327: .globl _C_LABEL(PDPpaddr)
1.71 uebayasi 328: .globl _C_LABEL(boothowto)
329: .globl _C_LABEL(bootinfo)
330: .globl _C_LABEL(biosbasemem)
331: .globl _C_LABEL(biosextmem)
1.43 ad 332:
1.98 maxv 333: .type _C_LABEL(tablesize), @object
334: _C_LABEL(tablesize): .long TABLESIZE
335: END(tablesize)
1.86 maxv 336: .type _C_LABEL(nox_flag), @object
337: LABEL(nox_flag) .long 0 /* 32bit NOX flag, set if supported */
338: END(nox_flag)
1.71 uebayasi 339: .type _C_LABEL(cputype), @object
1.84 maxv 340: LABEL(cputype) .long 0 /* are we 80486, Pentium, or.. */
1.71 uebayasi 341: END(cputype)
342: .type _C_LABEL(cpuid_level), @object
1.84 maxv 343: LABEL(cpuid_level) .long -1 /* max. level accepted by cpuid instr */
1.71 uebayasi 344: END(cpuid_level)
345: .type _C_LABEL(esym), @object
1.84 maxv 346: LABEL(esym) .quad 0 /* ptr to end of syms */
1.71 uebayasi 347: END(esym)
348: .type _C_LABEL(eblob), @object
1.84 maxv 349: LABEL(eblob) .quad 0 /* ptr to end of modules */
1.71 uebayasi 350: END(eblob)
351: .type _C_LABEL(atdevbase), @object
1.84 maxv 352: LABEL(atdevbase) .quad 0 /* location of start of iomem in virt */
1.71 uebayasi 353: END(atdevbase)
354: .type _C_LABEL(PDPpaddr), @object
1.84 maxv 355: LABEL(PDPpaddr) .quad 0 /* paddr of PTD, for libkvm */
1.71 uebayasi 356: END(PDPpaddr)
357: .type _C_LABEL(biosbasemem), @object
1.1 fvdl 358: #ifndef REALBASEMEM
1.84 maxv 359: LABEL(biosbasemem) .long 0 /* base memory reported by BIOS */
1.1 fvdl 360: #else
1.71 uebayasi 361: LABEL(biosbasemem) .long REALBASEMEM
1.1 fvdl 362: #endif
1.71 uebayasi 363: END(biosbasemem)
364: .type _C_LABEL(biosextmem), @object
1.1 fvdl 365: #ifndef REALEXTMEM
1.84 maxv 366: LABEL(biosextmem) .long 0 /* extended memory reported by BIOS */
1.1 fvdl 367: #else
1.71 uebayasi 368: LABEL(biosextmem) .long REALEXTMEM
1.1 fvdl 369: #endif
1.71 uebayasi 370: END(biosextmem)
1.1 fvdl 371:
1.32 bouyer 372: #ifndef XEN
1.56 jym 373: .globl gdt64_lo
374: .globl gdt64_hi
1.1 fvdl 375:
1.56 jym 376: #define GDT64_LIMIT gdt64_end-gdt64_start-1
377: /* Temporary gdt64, with base address in low memory */
1.71 uebayasi 378: .type _C_LABEL(gdt64_lo), @object
379: LABEL(gdt64_lo)
1.56 jym 380: .word GDT64_LIMIT
1.1 fvdl 381: .quad _RELOC(gdt64_start)
1.71 uebayasi 382: END(gdt64_lo)
1.1 fvdl 383: .align 64
384:
1.56 jym 385: /* Temporary gdt64, with base address in high memory */
1.71 uebayasi 386: .type _C_LABEL(gdt64_hi), @object
387: LABEL(gdt64_hi)
1.56 jym 388: .word GDT64_LIMIT
389: .quad gdt64_start
1.71 uebayasi 390: END(gdt64_hi)
1.56 jym 391: .align 64
392: #undef GDT64_LIMIT
393:
1.71 uebayasi 394: .type _C_LABEL(gdt64_start), @object
395: _C_LABEL(gdt64_start):
1.1 fvdl 396: .quad 0x0000000000000000 /* always empty */
397: .quad 0x00af9a000000ffff /* kernel CS */
398: .quad 0x00cf92000000ffff /* kernel DS */
1.71 uebayasi 399: END(gdt64_start)
1.1 fvdl 400: gdt64_end:
401:
1.71 uebayasi 402: .type _C_LABEL(farjmp64), @object
403: _C_LABEL(farjmp64):
1.57 jym 404: .long _RELOC(longmode)
1.1 fvdl 405: .word GSEL(GCODE_SEL, SEL_KPL)
1.71 uebayasi 406: END(farjmp64)
1.84 maxv 407:
1.32 bouyer 408: #endif /* !XEN */
1.71 uebayasi 409:
1.84 maxv 410: /* Space for the temporary stack */
1.71 uebayasi 411: .size tmpstk, tmpstk - .
1.84 maxv 412: .space 512
1.1 fvdl 413: tmpstk:
414:
415: .globl _C_LABEL(cpu_private)
1.73 uebayasi 416: .comm _C_LABEL(cpu_private),PAGE_SIZE,PAGE_SIZE
1.1 fvdl 417:
418: /*
419: * Some hackage to deal with 64bit symbols in 32 bit mode.
1.79 maxv 420: * This may not be needed if things are cleaned up a little.
1.1 fvdl 421: */
422:
423: .text
424: .globl _C_LABEL(kernel_text)
425: .set _C_LABEL(kernel_text),KERNTEXTOFF
426:
1.71 uebayasi 427: ENTRY(start)
1.32 bouyer 428: #ifndef XEN
1.71 uebayasi 429: .code32
1.81 maxv 430:
431: /* Warm boot */
432: movw $0x1234,0x472
433:
1.1 fvdl 434: /*
1.84 maxv 435: * Load parameters from the stack (32 bits):
1.91 maxv 436: * boothowto, [bootdev], bootinfo, esym, biosextmem, biosbasemem
1.81 maxv 437: * We are not interested in 'bootdev'.
1.1 fvdl 438: */
1.81 maxv 439:
440: /* Load 'boothowto' */
1.1 fvdl 441: movl 4(%esp),%eax
442: movl %eax,RELOC(boothowto)
1.81 maxv 443:
444: /* Load 'bootinfo' */
1.1 fvdl 445: movl 12(%esp),%eax
1.81 maxv 446: testl %eax,%eax /* bootinfo = NULL? */
447: jz bootinfo_finished
448:
1.121 maxv 449: movl (%eax),%ebx /* bootinfo::bi_nentries */
1.1 fvdl 450: movl $RELOC(bootinfo),%ebp
1.81 maxv 451: movl %ebp,%edx
1.1 fvdl 452: addl $BOOTINFO_MAXSIZE,%ebp
1.81 maxv 453: movl %ebx,(%edx)
454: addl $4,%edx
455:
456: bootinfo_entryloop:
457: testl %ebx,%ebx /* no remaining entries? */
458: jz bootinfo_finished
459:
460: addl $4,%eax
461: movl (%eax),%ecx /* address of entry */
1.1 fvdl 462: pushl %edi
463: pushl %esi
464: pushl %eax
465:
1.84 maxv 466: movl (%ecx),%eax /* btinfo_common::len (size of entry) */
1.1 fvdl 467: movl %edx,%edi
1.121 maxv 468: addl %eax,%edx /* update dest pointer */
1.81 maxv 469: cmpl %ebp,%edx /* beyond bootinfo+BOOTINFO_MAXSIZE? */
470: jg bootinfo_overflow
471:
1.1 fvdl 472: movl %ecx,%esi
473: movl %eax,%ecx
1.81 maxv 474:
1.43 ad 475: /*
1.121 maxv 476: * If any modules were loaded, record where they end. 'eblob' is used
477: * later to compute the initial bootstrap tables.
1.43 ad 478: */
1.84 maxv 479: cmpl $BTINFO_MODULELIST,4(%esi) /* btinfo_common::type */
1.121 maxv 480: jne bootinfo_copy
1.81 maxv 481:
1.121 maxv 482: /* Skip the modules if we won't have enough VA to map them */
483: movl 12(%esi),%eax /* btinfo_modulelist::endpa */
484: addl $PGOFSET,%eax /* roundup to a page */
485: andl $~PGOFSET,%eax
486: cmpl $BOOTMAP_VA_SIZE,%eax
487: jg bootinfo_skip
488: movl %eax,RELOC(eblob)
1.81 maxv 489: addl $KERNBASE_LO,RELOC(eblob)
490: adcl $KERNBASE_HI,RELOC(eblob)+4
491:
1.121 maxv 492: bootinfo_copy:
1.1 fvdl 493: rep
1.81 maxv 494: movsb /* copy esi -> edi */
1.121 maxv 495: jmp bootinfo_next
496:
497: bootinfo_skip:
498: subl %ecx,%edx /* revert dest pointer */
499:
500: bootinfo_next:
1.1 fvdl 501: popl %eax
502: popl %esi
503: popl %edi
1.81 maxv 504: subl $1,%ebx /* decrement the # of entries */
505: jmp bootinfo_entryloop
506:
507: bootinfo_overflow:
508: /*
509: * Cleanup for overflow case. Pop the registers, and correct the number
510: * of entries.
511: */
1.1 fvdl 512: popl %eax
513: popl %esi
514: popl %edi
515: movl $RELOC(bootinfo),%ebp
1.81 maxv 516: movl %ebp,%edx
517: subl %ebx,(%edx) /* correct the number of entries */
1.121 maxv 518: bootinfo_finished:
1.1 fvdl 519:
1.81 maxv 520: /* Load 'esym' */
1.72 uebayasi 521: movl 16(%esp),%eax
1.81 maxv 522: testl %eax,%eax /* esym = NULL? */
1.1 fvdl 523: jz 1f
1.81 maxv 524:
1.1 fvdl 525: addl $KERNBASE_LO,%eax
1.81 maxv 526:
527: 1:
528: movl $RELOC(esym),%ebp
1.1 fvdl 529: movl %eax,(%ebp)
530: movl $KERNBASE_HI,4(%ebp)
531:
1.83 maxv 532: /* Load 'biosextmem' */
1.1 fvdl 533: movl $RELOC(biosextmem),%ebp
534: movl (%ebp),%eax
1.83 maxv 535: testl %eax,%eax /* already set? */
536: jnz biosextmem_finished
1.81 maxv 537:
1.1 fvdl 538: movl 20(%esp),%eax
539: movl %eax,(%ebp)
1.81 maxv 540:
1.83 maxv 541: biosextmem_finished:
542: /* Load 'biosbasemem' */
1.1 fvdl 543: movl $RELOC(biosbasemem),%ebp
544: movl (%ebp),%eax
1.83 maxv 545: testl %eax,%eax /* already set? */
546: jnz biosbasemem_finished
547:
1.1 fvdl 548: movl 24(%esp),%eax
549: movl %eax,(%ebp)
550:
1.83 maxv 551: biosbasemem_finished:
552: /*
1.84 maxv 553: * Done with the parameters!
1.83 maxv 554: */
1.84 maxv 555:
556: /* First, reset the PSL. */
1.1 fvdl 557: pushl $PSL_MBO
558: popfl
559:
560: xorl %eax,%eax
561: cpuid
562: movl %eax,RELOC(cpuid_level)
563:
564: /*
565: * Finished with old stack; load new %esp now instead of later so we
566: * can trace this code without having to worry about the trace trap
567: * clobbering the memory test or the zeroing of the bss+bootstrap page
568: * tables.
569: *
570: * The boot program should check:
571: * text+data <= &stack_variable - more_space_for_stack
572: * text+data+bss+pad+space_for_page_tables <= end_of_memory
1.100 maxv 573: *
1.84 maxv 574: * XXX: the gdt is in the carcass of the boot program so clearing
1.1 fvdl 575: * the rest of memory is still not possible.
576: */
577: movl $RELOC(tmpstk),%esp
578:
1.86 maxv 579: /*
580: * Retrieve the NX/XD flag. We use the 32bit version of PG_NX.
581: */
582: movl $0x80000001,%eax
583: cpuid
584: andl $CPUID_NOX,%edx
585: jz no_NOX
586: movl $PG_NX32,RELOC(nox_flag)
587: no_NOX:
588:
1.1 fvdl 589: /*
1.84 maxv 590: * There are four levels of pages in amd64: PML4 -> PDP -> PD -> PT. They will
591: * be referred to as: L4 -> L3 -> L2 -> L1.
592: *
593: * Virtual address space of the kernel:
1.97 maxv 594: * +------+--------+------+-----+--------+---------------------+----------
595: * | TEXT | RODATA | DATA | BSS | [SYMS] | [PRELOADED MODULES] | L4 ->
596: * +------+--------+------+-----+--------+---------------------+----------
597: * (1) (2) (3)
598: *
1.101 maxv 599: * --------------+-----+-----+----+-------------+
600: * -> PROC0 STK -> L3 -> L2 -> L1 | ISA I/O MEM |
601: * --------------+-----+-----+----+-------------+
1.97 maxv 602: * (4)
1.84 maxv 603: *
1.97 maxv 604: * PROC0 STK is obviously not linked as a page level. It just happens to be
605: * caught between L4 and L3.
606: *
607: * (PROC0 STK + L4 + L3 + L2 + L1) is later referred to as BOOTSTRAP TABLES.
1.1 fvdl 608: *
1.116 maxv 609: * ISA I/O MEM has no physical page allocated here, just virtual addresses.
610: *
1.88 maxv 611: * Important note: the kernel segments are properly 4k-aligned
612: * (see kern.ldscript), so there's no need to enforce alignment.
1.1 fvdl 613: */
614:
1.84 maxv 615: /* Find end of kernel image; brings us on (1). */
1.115 maxv 616: movl $RELOC(__kernel_end),%edi
1.83 maxv 617:
1.78 uebayasi 618: #if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB)
1.84 maxv 619: /* Save the symbols (if loaded); brinds us on (2). */
1.1 fvdl 620: movl RELOC(esym),%eax
621: testl %eax,%eax
622: jz 1f
623: subl $KERNBASE_LO,%eax /* XXX */
624: movl %eax,%edi
625: 1:
626: #endif
1.84 maxv 627: /* Skip over any modules/blobs; brings us on (3). */
1.43 ad 628: movl RELOC(eblob),%eax
629: testl %eax,%eax
630: jz 1f
631: subl $KERNBASE_LO,%eax /* XXX */
632: movl %eax,%edi
633: 1:
1.81 maxv 634:
1.97 maxv 635: /* We are on (3). Align up for BOOTSTRAP TABLES. */
1.1 fvdl 636: movl %edi,%esi
1.84 maxv 637: addl $PGOFSET,%esi
1.1 fvdl 638: andl $~PGOFSET,%esi
639:
1.93 maxv 640: /* We are on the BOOTSTRAP TABLES. Save L4's physical address. */
641: movl $RELOC(PDPpaddr),%ebp
642: movl %esi,(%ebp)
643: movl $0,4(%ebp)
644:
1.84 maxv 645: /* Now, zero out the BOOTSTRAP TABLES (before filling them in). */
1.91 maxv 646: movl %esi,%edi
1.1 fvdl 647: xorl %eax,%eax
648: cld
649: movl $TABLESIZE,%ecx
650: shrl $2,%ecx
651: rep
1.83 maxv 652: stosl /* copy eax -> edi */
1.1 fvdl 653:
1.73 uebayasi 654: /*
1.84 maxv 655: * Build the page tables and levels. We go from L1 to L4, and link the levels
656: * together. Note: RELOC computes &addr - KERNBASE in 32 bits; the value can't
657: * be > 4G, or we can't deal with it anyway, since we are in 32bit mode.
1.73 uebayasi 658: */
1.84 maxv 659: /*
660: * Build L1.
661: */
1.83 maxv 662: leal (PROC0_PTP1_OFF)(%esi),%ebx
1.81 maxv 663:
1.119 maxv 664: /* Skip the area below the kernel text. */
1.96 maxv 665: movl $(KERNTEXTOFF_LO - KERNBASE_LO),%ecx
666: shrl $PGSHIFT,%ecx
667: fillkpt_blank
1.81 maxv 668:
1.88 maxv 669: /* Map the kernel text RX. */
1.96 maxv 670: movl $(KERNTEXTOFF_LO - KERNBASE_LO),%eax /* start of TEXT */
1.88 maxv 671: movl $RELOC(__rodata_start),%ecx
672: subl %eax,%ecx
673: shrl $PGSHIFT,%ecx
674: orl $(PG_V|PG_KR),%eax
675: fillkpt
1.1 fvdl 676:
1.91 maxv 677: /* Map the kernel rodata R. */
1.88 maxv 678: movl $RELOC(__rodata_start),%eax
679: movl $RELOC(__data_start),%ecx
1.1 fvdl 680: subl %eax,%ecx
681: shrl $PGSHIFT,%ecx
1.73 uebayasi 682: orl $(PG_V|PG_KR),%eax
1.89 maxv 683: fillkpt_nox
1.1 fvdl 684:
1.90 maxv 685: /* Map the kernel data+bss RW. */
686: movl $RELOC(__data_start),%eax
687: movl $RELOC(__kernel_end),%ecx
688: subl %eax,%ecx
689: shrl $PGSHIFT,%ecx
690: orl $(PG_V|PG_KW),%eax
691: fillkpt_nox
692:
1.105 maxv 693: /* Map [SYMS]+[PRELOADED MODULES] RW. */
1.90 maxv 694: movl $RELOC(__kernel_end),%eax
1.97 maxv 695: movl %esi,%ecx /* start of BOOTSTRAP TABLES */
1.95 maxv 696: subl %eax,%ecx
697: shrl $PGSHIFT,%ecx
698: orl $(PG_V|PG_KW),%eax
1.105 maxv 699: fillkpt_nox
1.95 maxv 700:
1.92 maxv 701: /* Map the BOOTSTRAP TABLES RW. */
702: movl %esi,%eax /* start of BOOTSTRAP TABLES */
703: movl $TABLESIZE,%ecx /* length of BOOTSTRAP TABLES */
704: shrl $PGSHIFT,%ecx
705: orl $(PG_V|PG_KW),%eax
706: fillkpt_nox
707:
1.102 maxv 708: /* We are on (4). Map ISA I/O MEM RW. */
1.101 maxv 709: movl $IOM_BEGIN,%eax
710: movl $IOM_SIZE,%ecx /* size of ISA I/O MEM */
711: shrl $PGSHIFT,%ecx
712: orl $(PG_V|PG_KW/*|PG_N*/),%eax
1.102 maxv 713: fillkpt_nox
1.1 fvdl 714:
1.84 maxv 715: /*
716: * Build L2. Linked to L1.
717: */
1.73 uebayasi 718: leal (PROC0_PTP2_OFF)(%esi),%ebx
1.1 fvdl 719: leal (PROC0_PTP1_OFF)(%esi),%eax
1.81 maxv 720: orl $(PG_V|PG_KW),%eax
1.1 fvdl 721: movl $(NKL2_KIMG_ENTRIES+1),%ecx
722: fillkpt
723:
724: #if L2_SLOT_KERNBASE > 0
725: /* If needed, set up level 2 entries for actual kernel mapping */
1.84 maxv 726: leal (PROC0_PTP2_OFF + L2_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
1.73 uebayasi 727: leal (PROC0_PTP1_OFF)(%esi),%eax
1.81 maxv 728: orl $(PG_V|PG_KW),%eax
1.73 uebayasi 729: movl $(NKL2_KIMG_ENTRIES+1),%ecx
1.1 fvdl 730: fillkpt
731: #endif
732:
1.84 maxv 733: /*
734: * Build L3. Linked to L2.
735: */
1.73 uebayasi 736: leal (PROC0_PTP3_OFF)(%esi),%ebx
1.1 fvdl 737: leal (PROC0_PTP2_OFF)(%esi),%eax
1.81 maxv 738: orl $(PG_V|PG_KW),%eax
1.1 fvdl 739: movl $NKL3_KIMG_ENTRIES,%ecx
740: fillkpt
741:
742: #if L3_SLOT_KERNBASE > 0
743: /* If needed, set up level 3 entries for actual kernel mapping */
1.84 maxv 744: leal (PROC0_PTP3_OFF + L3_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
1.73 uebayasi 745: leal (PROC0_PTP2_OFF)(%esi),%eax
1.83 maxv 746: orl $(PG_V|PG_KW),%eax
1.73 uebayasi 747: movl $NKL3_KIMG_ENTRIES,%ecx
1.1 fvdl 748: fillkpt
749: #endif
750:
1.84 maxv 751: /*
752: * Build L4 for identity mapping. Linked to L3.
753: */
1.73 uebayasi 754: leal (PROC0_PML4_OFF)(%esi),%ebx
1.1 fvdl 755: leal (PROC0_PTP3_OFF)(%esi),%eax
1.81 maxv 756: orl $(PG_V|PG_KW),%eax
1.1 fvdl 757: movl $NKL4_KIMG_ENTRIES,%ecx
758: fillkpt
759:
1.84 maxv 760: /* Set up L4 entries for actual kernel mapping */
761: leal (PROC0_PML4_OFF + L4_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
1.1 fvdl 762: leal (PROC0_PTP3_OFF)(%esi),%eax
1.83 maxv 763: orl $(PG_V|PG_KW),%eax
1.1 fvdl 764: movl $NKL4_KIMG_ENTRIES,%ecx
765: fillkpt
766:
1.106 maxv 767: /* Install recursive top level PDE (one entry) */
1.84 maxv 768: leal (PROC0_PML4_OFF + PDIR_SLOT_PTE * PDE_SIZE)(%esi),%ebx
1.73 uebayasi 769: leal (PROC0_PML4_OFF)(%esi),%eax
1.1 fvdl 770: orl $(PG_V|PG_KW),%eax
1.106 maxv 771: movl $1,%ecx
772: fillkpt_nox
1.1 fvdl 773:
774: /*
775: * Startup checklist:
776: * 1. Enable PAE (and SSE while here).
777: */
778: movl %cr4,%eax
779: orl $(CR4_PAE|CR4_OSFXSR|CR4_OSXMMEXCPT),%eax
780: movl %eax,%cr4
781:
782: /*
1.86 maxv 783: * 2. Set Long Mode Enable in EFER. Also enable the syscall extensions,
784: * and NOX if available.
1.1 fvdl 785: */
1.73 uebayasi 786: movl $MSR_EFER,%ecx
1.1 fvdl 787: rdmsr
788: xorl %eax,%eax /* XXX */
789: orl $(EFER_LME|EFER_SCE),%eax
1.86 maxv 790: movl RELOC(nox_flag),%ebx
791: cmpl $0,%ebx
792: je skip_NOX
793: orl $(EFER_NXE),%eax
794: skip_NOX:
1.1 fvdl 795: wrmsr
796:
797: /*
798: * 3. Load %cr3 with pointer to PML4.
799: */
800: movl %esi,%eax
801: movl %eax,%cr3
802:
803: /*
804: * 4. Enable paging and the rest of it.
805: */
806: movl %cr0,%eax
1.68 jym 807: orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP|CR0_AM),%eax
1.1 fvdl 808: movl %eax,%cr0
809: jmp compat
810: compat:
811:
812: /*
1.83 maxv 813: * 5. Not quite done yet, we're now in a compatibility segment, in
814: * legacy mode. We must jump to a long mode segment. Need to set up
815: * a temporary GDT with a long mode segment in it to do that.
1.1 fvdl 816: */
1.56 jym 817: movl $RELOC(gdt64_lo),%eax
1.1 fvdl 818: lgdt (%eax)
819: movl $RELOC(farjmp64),%eax
820: ljmp *(%eax)
821:
1.83 maxv 822: .code64
1.1 fvdl 823: longmode:
824: /*
1.83 maxv 825: * 6. Finally, we're in long mode. However, we're still in the identity
826: * mapped area (could not jump out of that earlier because it would
827: * have been a > 32bit jump). We can do that now, so here we go.
1.1 fvdl 828: */
829: movabsq $longmode_hi,%rax
830: jmp *%rax
1.56 jym 831:
1.1 fvdl 832: longmode_hi:
1.56 jym 833:
834: /*
835: * We left the identity mapped area. Base address of
836: * the temporary gdt64 should now be in high memory.
837: */
838: movq $RELOC(gdt64_hi),%rax
839: lgdt (%rax)
840:
1.1 fvdl 841: /*
1.83 maxv 842: * We have arrived. There's no need anymore for the identity mapping in
843: * low memory, remove it.
1.1 fvdl 844: */
845: movq $KERNBASE,%r8
846:
847: #if L2_SLOT_KERNBASE > 0
848: movq $(NKL2_KIMG_ENTRIES+1),%rcx
1.84 maxv 849: leaq (PROC0_PTP2_OFF)(%rsi),%rbx /* old, phys address */
850: addq %r8,%rbx /* new, virt address */
851: killkpt
1.1 fvdl 852: #endif
853:
854: #if L3_SLOT_KERNBASE > 0
855: movq $NKL3_KIMG_ENTRIES,%rcx
1.84 maxv 856: leaq (PROC0_PTP3_OFF)(%rsi),%rbx /* old, phys address */
857: addq %r8,%rbx /* new, virt address */
858: killkpt
1.1 fvdl 859: #endif
860:
861: movq $NKL4_KIMG_ENTRIES,%rcx
1.84 maxv 862: leaq (PROC0_PML4_OFF)(%rsi),%rbx /* old, phys address of PML4 */
863: addq %r8,%rbx /* new, virt address of PML4 */
1.85 maxv 864: killkpt
1.1 fvdl 865:
866: /* Relocate atdevbase. */
867: movq $(TABLESIZE+KERNBASE),%rdx
868: addq %rsi,%rdx
869: movq %rdx,_C_LABEL(atdevbase)(%rip)
870:
871: /* Set up bootstrap stack. */
1.97 maxv 872: leaq (PROC0_STK_OFF)(%rsi),%rax
1.1 fvdl 873: addq %r8,%rax
1.55 rmind 874: movq %rax,(_C_LABEL(lwp0)+L_PCB)(%rip) /* XXX L_PCB != uarea */
1.1 fvdl 875: leaq (USPACE-FRAMESIZE)(%rax),%rsp
1.84 maxv 876: movq %rsi,PCB_CR3(%rax) /* pcb->pcb_cr3 */
877: xorq %rbp,%rbp /* mark end of frames */
1.1 fvdl 878:
879: xorw %ax,%ax
880: movw %ax,%gs
881: movw %ax,%fs
882:
1.116 maxv 883: /* The first physical page available. */
884: leaq (TABLESIZE)(%rsi),%rdi
1.32 bouyer 885:
886: #else /* XEN */
887: /* First, reset the PSL. */
888: pushq $2
889: popfq
890:
891: cld
892:
893: /*
894: * Xen info:
895: * - %rsi -> start_info struct
1.111 maxv 896: * - %rsp -> stack, *theoretically* the last used page by Xen bootstrap
1.32 bouyer 897: */
1.111 maxv 898: movq %rsi,%rbx
1.32 bouyer 899:
1.73 uebayasi 900: /* Clear BSS. */
1.32 bouyer 901: xorq %rax,%rax
902: movq $_C_LABEL(__bss_start),%rdi
903: movq $_C_LABEL(_end),%rcx
904: subq %rdi,%rcx
905: rep
906: stosb
907:
1.111 maxv 908: /* Copy start_info to a safe place. */
1.32 bouyer 909: movq %rbx,%rsi
910: movq $_C_LABEL(start_info_union),%rdi
911: movq $64,%rcx
912: rep
913: movsq
914:
915: /*
916: * Memory layout at start of the day:
917: * - Kernel image
918: * - Page frames list
919: * - start_info struct. we copied it, so it can be recycled.
920: * - xenstore
921: * - console
922: * - Xen bootstrap page tables
923: * - kernel stack. provided by Xen
1.79 maxv 924: * - guaranteed 512kB padding
1.32 bouyer 925: *
926: * As we want to rebuild our page tables and place our stack
927: * in proc0 struct, all data starting from after console can be
928: * discarded after we've done a little setup.
929: */
930:
931: /*
1.111 maxv 932: * We want our own page tables, and will rebuild them. We will reclaim
933: * the Xen space later, INCLUDING the stack. So we need to switch to a
934: * temporary one now.
935: */
936: movq $tmpstk,%rax
937: subq $8,%rax
938: movq %rax,%rsp
1.32 bouyer 939:
1.45 bouyer 940: xorl %eax,%eax
941: cpuid
942: movl %eax,_C_LABEL(cpuid_level)
943:
1.111 maxv 944: movq $cpu_info_primary,%rdi
945: movq %rdi,CPU_INFO_SELF(%rdi) /* ci->ci_self = ci */
946: movq $1,%rsi
1.67 cherry 947: call cpu_init_msrs /* cpu_init_msrs(ci, true); */
948:
1.113 maxv 949: call xen_locore
1.32 bouyer 950:
951: /*
1.113 maxv 952: * The first VA available is returned by xen_locore in %rax. We
1.111 maxv 953: * use it as the UAREA, and set up the stack here.
1.32 bouyer 954: */
1.111 maxv 955: movq %rax,%rsi
1.55 rmind 956: movq %rsi,(_C_LABEL(lwp0)+L_PCB) /* XXX L_PCB != uarea */
1.32 bouyer 957: leaq (USPACE-FRAMESIZE)(%rsi),%rsp
958: xorq %rbp,%rbp
959:
1.111 maxv 960: /* Clear segment registers. */
1.63 jym 961: xorw %ax,%ax
1.32 bouyer 962: movw %ax,%gs
963: movw %ax,%fs
1.84 maxv 964:
1.117 maxv 965: /* Set first_avail after the DUMMY PAGE (see xen_locore). */
1.32 bouyer 966: movq %rsi,%rdi
1.117 maxv 967: addq $(USPACE+PAGE_SIZE),%rdi
1.84 maxv 968: subq $KERNBASE,%rdi /* init_x86_64 wants a physical address */
1.107 maxv 969: #endif /* XEN */
1.32 bouyer 970:
1.1 fvdl 971: call _C_LABEL(init_x86_64)
972: call _C_LABEL(main)
1.71 uebayasi 973: END(start)
1.1 fvdl 974:
1.73 uebayasi 975: #if defined(XEN)
1.32 bouyer 976: /* space for the hypercall call page */
977: #define HYPERCALL_PAGE_OFFSET 0x1000
978: .org HYPERCALL_PAGE_OFFSET
979: ENTRY(hypercall_page)
980: .skip 0x1000
1.71 uebayasi 981: END(hypercall_page)
1.32 bouyer 982: #endif /* XEN */
1.1 fvdl 983:
984: /*
1.22 yamt 985: * int setjmp(label_t *)
986: *
987: * Used primarily by DDB.
988: */
1.1 fvdl 989: ENTRY(setjmp)
990: /*
991: * Only save registers that must be preserved across function
992: * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15)
993: * and %rip.
994: */
995: movq %rdi,%rax
996: movq %rbx,(%rax)
997: movq %rsp,8(%rax)
998: movq %rbp,16(%rax)
999: movq %r12,24(%rax)
1000: movq %r13,32(%rax)
1001: movq %r14,40(%rax)
1002: movq %r15,48(%rax)
1003: movq (%rsp),%rdx
1004: movq %rdx,56(%rax)
1005: xorl %eax,%eax
1006: ret
1.71 uebayasi 1007: END(setjmp)
1.1 fvdl 1008:
1.22 yamt 1009: /*
1010: * int longjmp(label_t *)
1011: *
1012: * Used primarily by DDB.
1013: */
1.1 fvdl 1014: ENTRY(longjmp)
1015: movq %rdi,%rax
1016: movq (%rax),%rbx
1017: movq 8(%rax),%rsp
1018: movq 16(%rax),%rbp
1019: movq 24(%rax),%r12
1020: movq 32(%rax),%r13
1021: movq 40(%rax),%r14
1022: movq 48(%rax),%r15
1023: movq 56(%rax),%rdx
1024: movq %rdx,(%rsp)
1.22 yamt 1025: movl $1,%eax
1.1 fvdl 1026: ret
1.71 uebayasi 1027: END(longjmp)
1.1 fvdl 1028:
1.73 uebayasi 1029: /*
1030: * void dumpsys(void)
1031: *
1032: * Mimic cpu_switchto() for postmortem debugging.
1033: */
1.25 yamt 1034: ENTRY(dumpsys)
1.84 maxv 1035: /* Build a fake switch frame. */
1.25 yamt 1036: pushq %rbx
1037: pushq %r12
1038: pushq %r13
1039: pushq %r14
1040: pushq %r15
1.84 maxv 1041:
1042: /* Save a context. */
1.25 yamt 1043: movq $dumppcb, %rax
1044: movq %rsp, PCB_RSP(%rax)
1045: movq %rbp, PCB_RBP(%rax)
1046:
1047: call _C_LABEL(dodumpsys)
1048:
1.84 maxv 1049: addq $(5*8), %rsp /* sizeof(switchframe) - sizeof(%rip) */
1.25 yamt 1050: ret
1.71 uebayasi 1051: END(dumpsys)
1.25 yamt 1052:
1.1 fvdl 1053: /*
1.58 chs 1054: * struct lwp *cpu_switchto(struct lwp *oldlwp, struct lwp *newlwp,
1.103 maxv 1055: * bool returning)
1.22 yamt 1056: *
1057: * 1. if (oldlwp != NULL), save its context.
1058: * 2. then, restore context of newlwp.
1059: *
1060: * Note that the stack frame layout is known to "struct switchframe" in
1061: * <machine/frame.h> and to the code in cpu_lwp_fork() which initializes
1062: * it for a new lwp.
1.1 fvdl 1063: */
1.22 yamt 1064: ENTRY(cpu_switchto)
1.1 fvdl 1065: pushq %rbx
1066: pushq %r12
1067: pushq %r13
1068: pushq %r14
1069: pushq %r15
1070:
1.84 maxv 1071: movq %rdi,%r13 /* oldlwp */
1072: movq %rsi,%r12 /* newlwp */
1.1 fvdl 1073:
1.103 maxv 1074: testq %r13,%r13 /* oldlwp = NULL ? */
1075: jz skip_save
1.1 fvdl 1076:
1.22 yamt 1077: /* Save old context. */
1.55 rmind 1078: movq L_PCB(%r13),%rax
1.22 yamt 1079: movq %rsp,PCB_RSP(%rax)
1080: movq %rbp,PCB_RBP(%rax)
1.103 maxv 1081: skip_save:
1.22 yamt 1082:
1083: /* Switch to newlwp's stack. */
1.103 maxv 1084: movq L_PCB(%r12),%r14
1.32 bouyer 1085: #ifdef XEN /* XXX debug code */
1.103 maxv 1086: cmpq $0,PCB_RSP(%r14)
1.32 bouyer 1087: jne 999f
1088: callq _C_LABEL(cpu_Debugger);
1089: 999:
1090: #endif
1.22 yamt 1091: movq PCB_RSP(%r14),%rsp
1092: movq PCB_RBP(%r14),%rbp
1.1 fvdl 1093:
1.39 ad 1094: /*
1095: * Set curlwp. This must be globally visible in order to permit
1096: * non-interlocked mutex release.
1097: */
1098: movq %r12,%rcx
1099: xchgq %rcx,CPUVAR(CURLWP)
1.35 ad 1100:
1101: /* Skip the rest if returning to a pinned LWP. */
1.103 maxv 1102: testb %dl,%dl /* returning = true ? */
1103: jnz switch_return
1.35 ad 1104:
1.38 yamt 1105: /* Switch ring0 stack */
1106: #ifndef XEN
1107: movq PCB_RSP0(%r14),%rax
1108: movq %rax,CPUVAR(RSP0)
1109: #else
1.103 maxv 1110: movq %r14,%rdi
1.32 bouyer 1111: callq _C_LABEL(x86_64_switch_context);
1112: #endif
1.1 fvdl 1113:
1.22 yamt 1114: /* Don't bother with the rest if switching to a system process. */
1115: testl $LW_SYSTEM,L_FLAG(%r12)
1.103 maxv 1116: jnz switch_return
1.1 fvdl 1117:
1.22 yamt 1118: /* Is this process using RAS (restartable atomic sequences)? */
1119: movq L_PROC(%r12),%rdi
1120: cmpq $0,P_RASLIST(%rdi)
1.104 maxv 1121: je no_RAS
1122:
1123: /* Handle restartable atomic sequences (RAS). */
1124: movq L_MD_REGS(%r12),%rbx
1125: movq TF_RIP(%rbx),%rsi
1126: call _C_LABEL(ras_lookup)
1127: cmpq $-1,%rax
1128: je no_RAS
1129: movq %rax,TF_RIP(%rbx)
1130: no_RAS:
1.1 fvdl 1131:
1132: /*
1.76 rmind 1133: * Restore cr0 including FPU state (may have CR0_TS set). Note that
1134: * IPL_SCHED prevents from FPU interrupt altering the LWP's saved cr0.
1.1 fvdl 1135: */
1.32 bouyer 1136: #ifndef XEN
1.75 christos 1137: movl $IPL_HIGH,CPUVAR(ILEVEL)
1138: movl PCB_CR0(%r14),%ecx /* has CR0_TS clear */
1.22 yamt 1139: movq %cr0,%rdx
1.1 fvdl 1140:
1.75 christos 1141: /*
1142: * If our floating point registers are on a different CPU,
1143: * set CR0_TS so we'll trap rather than reuse bogus state.
1144: */
1145: cmpq CPUVAR(FPCURLWP),%r12
1.103 maxv 1146: je skip_TS
1.75 christos 1147: orq $CR0_TS,%rcx
1.103 maxv 1148: skip_TS:
1.75 christos 1149:
1.22 yamt 1150: /* Reloading CR0 is very expensive - avoid if possible. */
1.103 maxv 1151: cmpq %rdx,%rcx
1152: je skip_CR0
1.1 fvdl 1153: movq %rcx,%cr0
1.103 maxv 1154: skip_CR0:
1.58 chs 1155:
1.103 maxv 1156: /* The 32bit LWPs are handled differently. */
1157: testl $PCB_COMPAT32,PCB_FLAGS(%r14)
1.118 maxv 1158: jnz lwp_32bit
1.58 chs 1159:
1.118 maxv 1160: lwp_64bit:
1.107 maxv 1161: /* Zero out %fs/%gs registers. */
1.103 maxv 1162: xorq %rax,%rax
1163: movw %ax,%fs
1.58 chs 1164: CLI(cx)
1.70 dsl 1165: SWAPGS
1.103 maxv 1166: movw %ax,%gs
1.70 dsl 1167: SWAPGS
1.58 chs 1168: STI(cx)
1169:
1.107 maxv 1170: /* Zero out GDT descriptors. */
1.58 chs 1171: movq CPUVAR(GDT),%rcx
1.103 maxv 1172: movq %rax,(GUFS_SEL*8)(%rcx)
1173: movq %rax,(GUGS_SEL*8)(%rcx)
1.58 chs 1174:
1175: /* Reload 64-bit %fs/%gs MSRs. */
1.103 maxv 1176: movl $MSR_FSBASE,%ecx
1177: movl PCB_FS(%r14),%eax
1178: movl 4+PCB_FS(%r14),%edx
1.58 chs 1179: wrmsr
1.103 maxv 1180: movl $MSR_KERNELGSBASE,%ecx
1181: movl PCB_GS(%r14),%eax
1182: movl 4+PCB_GS(%r14),%edx
1.58 chs 1183: wrmsr
1.107 maxv 1184:
1.103 maxv 1185: jmp switch_return
1.58 chs 1186:
1.118 maxv 1187: lwp_32bit:
1.58 chs 1188: /* Reload %fs/%gs GDT descriptors. */
1189: movq CPUVAR(GDT),%rcx
1.103 maxv 1190: movq PCB_FS(%r14),%rax
1191: movq %rax,(GUFS_SEL*8)(%rcx)
1192: movq PCB_GS(%r14),%rax
1193: movq %rax,(GUGS_SEL*8)(%rcx)
1.58 chs 1194:
1195: /* Reload %fs and %gs */
1.103 maxv 1196: movq L_MD_REGS(%r12),%rbx
1197: movw TF_FS(%rbx),%fs
1.58 chs 1198: CLI(ax)
1.70 dsl 1199: SWAPGS
1.103 maxv 1200: movw TF_GS(%rbx),%gs
1.70 dsl 1201: SWAPGS
1.58 chs 1202: STI(ax)
1203: #else
1204: movq %r12,%rdi
1205: callq _C_LABEL(x86_64_tls_switch)
1.32 bouyer 1206: #endif
1.84 maxv 1207:
1.103 maxv 1208: switch_return:
1.22 yamt 1209: /* Return to the new LWP, returning 'oldlwp' in %rax. */
1.103 maxv 1210: movq %r13,%rax
1.1 fvdl 1211: popq %r15
1212: popq %r14
1213: popq %r13
1214: popq %r12
1215: popq %rbx
1216: ret
1.71 uebayasi 1217: END(cpu_switchto)
1.1 fvdl 1218:
1219: /*
1.22 yamt 1220: * void savectx(struct pcb *pcb);
1221: *
1.1 fvdl 1222: * Update pcb, saving current processor state.
1223: */
1224: ENTRY(savectx)
1225: /* Save stack pointers. */
1226: movq %rsp,PCB_RSP(%rdi)
1227: movq %rbp,PCB_RBP(%rdi)
1228: ret
1.71 uebayasi 1229: END(savectx)
1.1 fvdl 1230:
1231: IDTVEC(syscall32)
1232: sysret /* go away please */
1.71 uebayasi 1233: IDTVEC_END(syscall32)
1.1 fvdl 1234:
1235: /*
1.22 yamt 1236: * syscall()
1237: *
1.70 dsl 1238: * syscall insn entry.
1239: * This currently isn't much faster, but it can be made faster in the future.
1240: * (Actually we've already saved a few 100 clocks by not loading the trap gate)
1.1 fvdl 1241: */
1242: IDTVEC(syscall)
1.32 bouyer 1243: #ifndef XEN
1.70 dsl 1244: /*
1.109 maxv 1245: * The user %rip is in %rcx and the user %rflags in %r11. The kernel %cs
1.107 maxv 1246: * and %ss are loaded, but nothing else is.
1247: *
1248: * The 'swapgs' instruction gives us access to cpu-specific memory where
1249: * we can save a user register and then read the LWP's kernel stack
1250: * pointer.
1251: *
1252: * This code doesn't seem to set %ds, this may not matter since it is
1253: * ignored in 64bit mode, OTOH the syscall instruction sets %ss and that
1254: * is ignored as well.
1.70 dsl 1255: */
1.1 fvdl 1256: swapgs
1257: movq %r15,CPUVAR(SCRATCH)
1.22 yamt 1258: movq CPUVAR(CURLWP),%r15
1.55 rmind 1259: movq L_PCB(%r15),%r15
1.70 dsl 1260: movq PCB_RSP0(%r15),%r15 /* LWP's kernel stack pointer */
1261:
1262: /* Make stack look like an 'int nn' frame */
1263: #define SP(x) (x)-(TF_SS+8)(%r15)
1.109 maxv 1264: movq $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS) /* user %ss */
1265: movq %rsp,SP(TF_RSP) /* user %rsp */
1266: movq %r11,SP(TF_RFLAGS) /* user %rflags */
1267: movq $(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS) /* user %cs */
1268: movq %rcx,SP(TF_RIP) /* user %rip */
1.1 fvdl 1269:
1.70 dsl 1270: leaq SP(0),%rsp /* %rsp now valid after frame */
1.1 fvdl 1271: movq CPUVAR(SCRATCH),%r15
1.70 dsl 1272: #undef SP
1273:
1274: movq $2,TF_ERR(%rsp) /* syscall instruction size */
1.107 maxv 1275: movq $T_ASTFLT,TF_TRAPNO(%rsp)
1.70 dsl 1276:
1.59 yamt 1277: movw %es,TF_ES(%rsp)
1.41 ad 1278: sti
1.1 fvdl 1279: INTR_SAVE_GPRS
1.24 fvdl 1280: movw %fs,TF_FS(%rsp)
1281: movw %gs,TF_GS(%rsp)
1.1 fvdl 1282: movw $(LSEL(LUDATA_SEL, SEL_UPL)),TF_DS(%rsp)
1.32 bouyer 1283: #else
1284: /* Xen already switched to kernel stack */
1285: pushq %rsi
1.36 dsl 1286: STI(si)
1.32 bouyer 1287: popq %rsi
1.37 dsl 1288: addq $0x10,%rsp /* gap to match cs:rip */
1289: pushq $2 /* error code */
1.32 bouyer 1290: pushq $T_ASTFLT
1.37 dsl 1291: subq $TF_REGSIZE,%rsp
1.32 bouyer 1292: INTR_SAVE_GPRS
1293: movw %fs,TF_FS(%rsp)
1294: movw %gs,TF_GS(%rsp)
1295: movw %es,TF_ES(%rsp)
1296: movw $(LSEL(LUDATA_SEL, SEL_UPL)),TF_DS(%rsp)
1297: #endif
1.1 fvdl 1298:
1.70 dsl 1299: do_syscall:
1.1 fvdl 1300: movq CPUVAR(CURLWP),%r14
1.84 maxv 1301: incq CPUVAR(NSYSCALL) /* count it atomically */
1302: movq %rsp,L_MD_REGS(%r14) /* save pointer to frame */
1.1 fvdl 1303: movq L_PROC(%r14),%r15
1.70 dsl 1304: andl $~MDL_IRET,L_MD_FLAGS(%r14) /* Allow sysret return */
1.37 dsl 1305: movq %rsp,%rdi /* Pass frame as arg0 */
1.1 fvdl 1306: call *P_MD_SYSCALL(%r15)
1.29 yamt 1307: .Lsyscall_checkast:
1.70 dsl 1308: /*
1309: * Disable interrupts to avoid new ASTs (etc) being added and
1310: * to ensure we don't take an interrupt with some of the user
1311: * registers loaded.
1312: */
1313: CLI(si)
1.29 yamt 1314: /* Check for ASTs on exit to user mode. */
1.107 maxv 1315: movl L_MD_ASTPENDING(%r14),%eax
1316: orl CPUVAR(WANT_PMAPLOAD),%eax
1.29 yamt 1317: jnz 9f
1.107 maxv 1318:
1.1 fvdl 1319: #ifdef DIAGNOSTIC
1320: cmpl $IPL_NONE,CPUVAR(ILEVEL)
1.107 maxv 1321: jne spl_error
1.1 fvdl 1322: #endif
1.107 maxv 1323:
1324: testl $(MDL_IRET|MDL_COMPAT32),L_MD_FLAGS(%r14)
1.70 dsl 1325: INTR_RESTORE_GPRS
1.58 chs 1326: movw TF_ES(%rsp),%es
1.120 maxv 1327: movw TF_DS(%rsp),%ds
1.70 dsl 1328: SWAPGS
1329: jnz 2f
1.32 bouyer 1330: #ifndef XEN
1.107 maxv 1331: movq TF_RIP(%rsp),%rcx /* %rip for sysret */
1332: movq TF_RFLAGS(%rsp),%r11 /* %flags for sysret */
1333: movq TF_RSP(%rsp),%rsp
1.1 fvdl 1334: sysretq
1.32 bouyer 1335: #else
1.107 maxv 1336: addq $TF_RIP,%rsp
1.32 bouyer 1337: pushq $256 /* VGCF_IN_SYSCALL */
1338: jmp HYPERVISOR_iret
1339: #endif
1.1 fvdl 1340:
1.70 dsl 1341: /*
1342: * If the syscall might have modified some registers, or we are a 32bit
1343: * process we must return to user with an 'iret' instruction.
1344: * If the iret faults in kernel (assumed due to illegal register values)
1345: * then a SIGSEGV will be signalled.
1346: */
1347: 2:
1.107 maxv 1348: addq $TF_RIP,%rsp
1.70 dsl 1349: iretq
1350:
1.1 fvdl 1351: #ifdef DIAGNOSTIC
1.107 maxv 1352: /* Report SPL error */
1353: spl_error:
1354: movabsq $4f,%rdi
1.1 fvdl 1355: movl TF_RAX(%rsp),%esi
1356: movl TF_RDI(%rsp),%edx
1357: movl %ebx,%ecx
1358: movl CPUVAR(ILEVEL),%r8d
1359: xorq %rax,%rax
1360: call _C_LABEL(printf)
1.35 ad 1361: movl $IPL_NONE,%edi
1362: call _C_LABEL(spllower)
1.29 yamt 1363: jmp .Lsyscall_checkast
1.1 fvdl 1364: 4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n"
1365: #endif
1.70 dsl 1366:
1367: /* AST pending or pmap load needed */
1.41 ad 1368: 9:
1.107 maxv 1369: cmpl $0,CPUVAR(WANT_PMAPLOAD)
1.41 ad 1370: jz 10f
1371: STI(si)
1.29 yamt 1372: call _C_LABEL(do_pmap_load)
1373: jmp .Lsyscall_checkast /* re-check ASTs */
1.41 ad 1374: 10:
1375: CLEAR_ASTPENDING(%r14)
1376: STI(si)
1377: /* Pushed T_ASTFLT into tf_trapno on entry. */
1378: movq %rsp,%rdi
1379: call _C_LABEL(trap)
1380: jmp .Lsyscall_checkast /* re-check ASTs */
1.71 uebayasi 1381: IDTVEC_END(syscall)
1.29 yamt 1382:
1.22 yamt 1383: /*
1384: * void lwp_trampoline(void);
1385: *
1386: * This is a trampoline function pushed run by newly created LWPs
1.70 dsl 1387: * in order to do additional setup in their context.
1.22 yamt 1388: */
1389: NENTRY(lwp_trampoline)
1390: movq %rbp,%rsi
1.41 ad 1391: movq %rbp,%r14 /* for .Lsyscall_checkast */
1.22 yamt 1392: movq %rax,%rdi
1393: xorq %rbp,%rbp
1394: call _C_LABEL(lwp_startup)
1.1 fvdl 1395: movq %r13,%rdi
1396: call *%r12
1.41 ad 1397: jmp .Lsyscall_checkast
1.71 uebayasi 1398: END(lwp_trampoline)
1.1 fvdl 1399:
1400: /*
1.22 yamt 1401: * osyscall()
1402: *
1.1 fvdl 1403: * Trap gate entry for int $80 syscall, also used by sigreturn.
1404: */
1405: IDTVEC(osyscall)
1.32 bouyer 1406: #ifdef XEN
1407: movq (%rsp),%rcx
1408: movq 8(%rsp),%r11
1409: addq $0x10,%rsp
1410: #endif
1.84 maxv 1411: pushq $2 /* size of instruction for restart */
1412: pushq $T_ASTFLT /* trap # for doing ASTs */
1.1 fvdl 1413: INTRENTRY
1.36 dsl 1414: STI(si)
1.70 dsl 1415: jmp do_syscall
1.71 uebayasi 1416: IDTVEC_END(osyscall)
1.26 ad 1417:
1418: /*
1.66 chs 1419: * bool sse2_idlezero_page(void *pg)
1.26 ad 1420: *
1.47 ad 1421: * Zero a page without polluting the cache. Preemption must be
1422: * disabled by the caller. Abort if a preemption is pending.
1.66 chs 1423: * Returns true if the page is zeroed, false if not.
1.26 ad 1424: */
1.47 ad 1425: ENTRY(sse2_idlezero_page)
1426: pushq %rbp
1427: movq %rsp,%rbp
1428: movl $(PAGE_SIZE/64), %ecx
1.26 ad 1429: xorq %rax, %rax
1430: .align 16
1431: 1:
1.50 ad 1432: testl $RESCHED_KPREEMPT, CPUVAR(RESCHED)
1.47 ad 1433: jnz 2f
1.26 ad 1434: movnti %rax, 0(%rdi)
1435: movnti %rax, 8(%rdi)
1436: movnti %rax, 16(%rdi)
1437: movnti %rax, 24(%rdi)
1438: movnti %rax, 32(%rdi)
1439: movnti %rax, 40(%rdi)
1440: movnti %rax, 48(%rdi)
1441: movnti %rax, 56(%rdi)
1.47 ad 1442: addq $64, %rdi
1443: decl %ecx
1.26 ad 1444: jnz 1b
1445: sfence
1.47 ad 1446: incl %eax
1447: popq %rbp
1.26 ad 1448: ret
1.47 ad 1449: 2:
1.26 ad 1450: sfence
1.47 ad 1451: popq %rbp
1.26 ad 1452: ret
1.71 uebayasi 1453: END(sse2_idlezero_page)
1.66 chs 1454:
1455: /*
1456: * void pagezero(vaddr_t va)
1457: *
1458: * Zero a page without polluting the cache.
1459: */
1460:
1461: ENTRY(pagezero)
1462: movq $-PAGE_SIZE,%rdx
1463: subq %rdx,%rdi
1464: xorq %rax,%rax
1465: 1:
1466: movnti %rax,(%rdi,%rdx)
1467: movnti %rax,8(%rdi,%rdx)
1468: movnti %rax,16(%rdi,%rdx)
1469: movnti %rax,24(%rdi,%rdx)
1470: movnti %rax,32(%rdi,%rdx)
1471: movnti %rax,40(%rdi,%rdx)
1472: movnti %rax,48(%rdi,%rdx)
1473: movnti %rax,56(%rdi,%rdx)
1474: addq $64,%rdx
1475: jne 1b
1476: sfence
1477: ret
1.71 uebayasi 1478: END(pagezero)
CVSweb <webmaster@jp.NetBSD.org>