Annotation of src/sys/arch/amd64/amd64/locore.S, Revision 1.207
1.207 ! christos 1: /* $NetBSD: locore.S,v 1.206 2020/05/02 16:44:34 bouyer Exp $ */
1.1 fvdl 2:
3: /*
4: * Copyright-o-rama!
5: */
6:
7: /*
1.84 maxv 8: * Copyright (c) 1998, 2000, 2007, 2008, 2016 The NetBSD Foundation, Inc.
9: * All rights reserved.
10: *
11: * This code is derived from software contributed to The NetBSD Foundation
1.100 maxv 12: * by Charles M. Hannum and by Maxime Villard.
1.84 maxv 13: *
14: * Redistribution and use in source and binary forms, with or without
15: * modification, are permitted provided that the following conditions
16: * are met:
17: * 1. Redistributions of source code must retain the above copyright
18: * notice, this list of conditions and the following disclaimer.
19: * 2. Redistributions in binary form must reproduce the above copyright
20: * notice, this list of conditions and the following disclaimer in the
21: * documentation and/or other materials provided with the distribution.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33: * POSSIBILITY OF SUCH DAMAGE.
34: */
35:
36: /*
1.32 bouyer 37: * Copyright (c) 2007 Manuel Bouyer.
38: *
39: * Redistribution and use in source and binary forms, with or without
40: * modification, are permitted provided that the following conditions
41: * are met:
42: * 1. Redistributions of source code must retain the above copyright
43: * notice, this list of conditions and the following disclaimer.
44: * 2. Redistributions in binary form must reproduce the above copyright
45: * notice, this list of conditions and the following disclaimer in the
46: * documentation and/or other materials provided with the distribution.
47: *
48: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
49: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
50: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
51: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
52: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
53: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
54: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
55: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
56: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
57: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58: *
59: */
60:
61: /*
62: * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
63: *
64: * Permission to use, copy, modify, and distribute this software for any
65: * purpose with or without fee is hereby granted, provided that the above
66: * copyright notice and this permission notice appear in all copies.
67: *
68: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
69: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
70: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
71: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
72: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
73: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
74: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
75: */
76:
77: /*
1.1 fvdl 78: * Copyright (c) 2001 Wasabi Systems, Inc.
79: * All rights reserved.
80: *
81: * Written by Frank van der Linden for Wasabi Systems, Inc.
82: *
83: * Redistribution and use in source and binary forms, with or without
84: * modification, are permitted provided that the following conditions
85: * are met:
86: * 1. Redistributions of source code must retain the above copyright
87: * notice, this list of conditions and the following disclaimer.
88: * 2. Redistributions in binary form must reproduce the above copyright
89: * notice, this list of conditions and the following disclaimer in the
90: * documentation and/or other materials provided with the distribution.
91: * 3. All advertising materials mentioning features or use of this software
92: * must display the following acknowledgement:
93: * This product includes software developed for the NetBSD Project by
94: * Wasabi Systems, Inc.
95: * 4. The name of Wasabi Systems, Inc. may not be used to endorse
96: * or promote products derived from this software without specific prior
97: * written permission.
98: *
99: * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
100: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
101: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
102: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
103: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
104: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
105: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
106: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
107: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
108: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
109: * POSSIBILITY OF SUCH DAMAGE.
110: */
111:
112: /*-
113: * Copyright (c) 1990 The Regents of the University of California.
114: * All rights reserved.
115: *
116: * This code is derived from software contributed to Berkeley by
117: * William Jolitz.
118: *
119: * Redistribution and use in source and binary forms, with or without
120: * modification, are permitted provided that the following conditions
121: * are met:
122: * 1. Redistributions of source code must retain the above copyright
123: * notice, this list of conditions and the following disclaimer.
124: * 2. Redistributions in binary form must reproduce the above copyright
125: * notice, this list of conditions and the following disclaimer in the
126: * documentation and/or other materials provided with the distribution.
1.5 agc 127: * 3. Neither the name of the University nor the names of its contributors
1.1 fvdl 128: * may be used to endorse or promote products derived from this software
129: * without specific prior written permission.
130: *
131: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
132: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
133: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
134: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
135: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
136: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
137: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
138: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
139: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
140: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
141: * SUCH DAMAGE.
142: *
143: * @(#)locore.s 7.3 (Berkeley) 5/13/91
144: */
145:
1.84 maxv 146: /* Override user-land alignment before including asm.h */
1.34 dsl 147: #define ALIGN_DATA .align 8
148: #define ALIGN_TEXT .align 16,0x90
149: #define _ALIGN_TEXT ALIGN_TEXT
150:
151: #include <machine/asm.h>
152:
1.205 maxv 153: #include "opt_kasan.h"
1.78 uebayasi 154: #include "opt_copy_symtab.h"
1.1 fvdl 155: #include "opt_ddb.h"
156: #include "opt_ddbparam.h"
1.51 apb 157: #include "opt_modular.h"
1.1 fvdl 158: #include "opt_realmem.h"
159:
1.12 drochner 160: #include "opt_compat_netbsd.h"
161: #include "opt_compat_netbsd32.h"
1.32 bouyer 162: #include "opt_xen.h"
1.145 maxv 163: #include "opt_svs.h"
1.12 drochner 164:
1.1 fvdl 165: #include "assym.h"
166: #include "lapic.h"
167: #include "ioapic.h"
1.2 fvdl 168: #include "ksyms.h"
1.1 fvdl 169:
170: #include <sys/errno.h>
171: #include <sys/syscall.h>
172:
173: #include <machine/pte.h>
174: #include <machine/segments.h>
175: #include <machine/specialreg.h>
176: #include <machine/trap.h>
177: #include <machine/bootinfo.h>
178: #include <machine/frameasm.h>
1.44 ad 179: #include <machine/cputypes.h>
1.1 fvdl 180:
181: #if NLAPIC > 0
182: #include <machine/i82489reg.h>
183: #endif
184:
185: /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
186: #include <dev/isa/isareg.h>
187:
1.71 uebayasi 188: #define _RELOC(x) ((x) - KERNBASE)
189: #define RELOC(x) _RELOC(_C_LABEL(x))
190:
1.180 maxv 191: /* 32bit version of PTE_NX */
192: #define PTE_NX32 0x80000000
1.86 maxv 193:
1.83 maxv 194: #if L2_SLOT_KERNBASE > 0
195: #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
196: #else
197: #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
198: #endif
199:
200: #if L3_SLOT_KERNBASE > 0
201: #define TABLE_L3_ENTRIES (2 * NKL3_KIMG_ENTRIES)
202: #else
203: #define TABLE_L3_ENTRIES NKL3_KIMG_ENTRIES
204: #endif
205:
206: #define PROC0_PML4_OFF 0
1.97 maxv 207: #define PROC0_STK_OFF (PROC0_PML4_OFF + 1 * PAGE_SIZE)
208: #define PROC0_PTP3_OFF (PROC0_STK_OFF + UPAGES * PAGE_SIZE)
1.83 maxv 209: #define PROC0_PTP2_OFF (PROC0_PTP3_OFF + NKL4_KIMG_ENTRIES * PAGE_SIZE)
210: #define PROC0_PTP1_OFF (PROC0_PTP2_OFF + TABLE_L3_ENTRIES * PAGE_SIZE)
211: #define TABLESIZE \
1.97 maxv 212: ((NKL4_KIMG_ENTRIES + TABLE_L3_ENTRIES + TABLE_L2_ENTRIES + 1 + UPAGES) \
1.83 maxv 213: * PAGE_SIZE)
214:
1.121 maxv 215: /* Amount of VA used to map the kernel, the syms and the preloaded modules */
216: #define BOOTMAP_VA_SIZE \
217: (NKL2_KIMG_ENTRIES * (1 << L2_SHIFT) - TABLESIZE - IOM_SIZE)
218:
1.83 maxv 219: /*
220: * fillkpt - Fill in a kernel page table
221: * eax = pte (page frame | control | status)
222: * ebx = page table address
223: * ecx = number of pages to map
224: *
225: * Each entry is 8 (PDE_SIZE) bytes long: we must set the 4 upper bytes to 0.
226: */
227: #define fillkpt \
1.94 maxv 228: cmpl $0,%ecx ; /* zero-sized? */ \
229: je 2f ; \
1.91 maxv 230: 1: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: 0 */ \
231: movl %eax,(%ebx) ; /* store phys addr */ \
232: addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \
233: addl $PAGE_SIZE,%eax ; /* next phys page */ \
1.94 maxv 234: loop 1b ; \
235: 2: ;
1.83 maxv 236:
1.84 maxv 237: /*
1.89 maxv 238: * fillkpt_nox - Same as fillkpt, but sets the NX/XD bit.
239: */
240: #define fillkpt_nox \
1.94 maxv 241: cmpl $0,%ecx ; /* zero-sized? */ \
242: je 2f ; \
1.91 maxv 243: pushl %ebp ; \
244: movl RELOC(nox_flag),%ebp ; \
245: 1: movl %ebp,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: NX */ \
246: movl %eax,(%ebx) ; /* store phys addr */ \
1.89 maxv 247: addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \
248: addl $PAGE_SIZE,%eax ; /* next phys page */ \
1.91 maxv 249: loop 1b ; \
1.94 maxv 250: popl %ebp ; \
251: 2: ;
1.89 maxv 252:
253: /*
1.96 maxv 254: * fillkpt_blank - Fill in a kernel page table with blank entries
255: * ebx = page table address
256: * ecx = number of pages to map
257: */
258: #define fillkpt_blank \
259: cmpl $0,%ecx ; /* zero-sized? */ \
260: je 2f ; \
261: 1: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: 0 */ \
262: movl $0,(%ebx) ; /* lower 32 bits: 0 */ \
263: addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \
264: loop 1b ; \
265: 2: ;
266:
267: /*
1.84 maxv 268: * killkpt - Destroy a kernel page table (long mode)
269: * rbx = page table address
270: * rcx = number of pages to destroy
271: */
272: #define killkpt \
273: 1: movq $0,(%rbx) ; \
274: addq $PDE_SIZE,%rbx ; \
275: loop 1b ;
276:
1.83 maxv 277:
1.32 bouyer 278: #ifdef XEN
1.99 bouyer 279: #define __ASSEMBLY__
1.175 cherry 280: #include <xen/include/public/elfnote.h>
281: #include <xen/include/public/xen.h>
1.177 cherry 282:
1.99 bouyer 283: #define ELFNOTE(name, type, desctype, descdata...) \
284: .pushsection .note.name ; \
285: .align 4 ; \
286: .long 2f - 1f /* namesz */ ; \
287: .long 4f - 3f /* descsz */ ; \
288: .long type ; \
289: 1:.asciz #name ; \
290: 2:.align 4 ; \
291: 3:desctype descdata ; \
292: 4:.align 4 ; \
293: .popsection
294:
1.32 bouyer 295: /*
1.73 uebayasi 296: * Xen guest identifier and loader selection
1.32 bouyer 297: */
298: .section __xen_guest
1.99 bouyer 299: ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "NetBSD")
300: ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "4.99")
301: ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
302: ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad, KERNBASE)
1.206 bouyer 303: #ifdef XENPV
1.99 bouyer 304: ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, KERNBASE)
305: ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, start)
1.206 bouyer 306: #else
307: ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, 0)
308: ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, RELOC(start_xen32))
309: #endif /* XENPV */
1.99 bouyer 310: ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypercall_page)
311: ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .quad, HYPERVISOR_VIRT_START)
1.206 bouyer 312: ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel|hvm_callback_vector")
1.99 bouyer 313: ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes")
1.180 maxv 314: ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PTE_P, PTE_P)\
1.99 bouyer 315: ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
316: ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 0)
1.49 ad 317: #if NKSYMS > 0 || defined(DDB) || defined(MODULAR)
1.99 bouyer 318: ELFNOTE(Xen, XEN_ELFNOTE_BSD_SYMTAB, .asciz, "yes")
1.32 bouyer 319: #endif
1.206 bouyer 320: #endif /* XEN */
1.1 fvdl 321:
322: /*
323: * Initialization
324: */
325: .data
326:
1.98 maxv 327: .globl _C_LABEL(tablesize)
1.86 maxv 328: .globl _C_LABEL(nox_flag)
1.100 maxv 329: .globl _C_LABEL(cputype)
1.57 jym 330: .globl _C_LABEL(cpuid_level)
1.71 uebayasi 331: .globl _C_LABEL(esym)
332: .globl _C_LABEL(eblob)
1.100 maxv 333: .globl _C_LABEL(atdevbase)
334: .globl _C_LABEL(PDPpaddr)
1.71 uebayasi 335: .globl _C_LABEL(boothowto)
336: .globl _C_LABEL(bootinfo)
337: .globl _C_LABEL(biosbasemem)
338: .globl _C_LABEL(biosextmem)
1.130 maxv 339: .globl _C_LABEL(lwp0uarea)
1.138 maxv 340: .globl do_mov_es
341: .globl do_mov_ds
342: .globl do_mov_fs
343: .globl do_mov_gs
1.129 maxv 344: .globl do_iret
1.43 ad 345:
1.98 maxv 346: .type _C_LABEL(tablesize), @object
347: _C_LABEL(tablesize): .long TABLESIZE
348: END(tablesize)
1.86 maxv 349: .type _C_LABEL(nox_flag), @object
350: LABEL(nox_flag) .long 0 /* 32bit NOX flag, set if supported */
351: END(nox_flag)
1.71 uebayasi 352: .type _C_LABEL(cputype), @object
1.84 maxv 353: LABEL(cputype) .long 0 /* are we 80486, Pentium, or.. */
1.71 uebayasi 354: END(cputype)
355: .type _C_LABEL(cpuid_level), @object
1.84 maxv 356: LABEL(cpuid_level) .long -1 /* max. level accepted by cpuid instr */
1.71 uebayasi 357: END(cpuid_level)
358: .type _C_LABEL(esym), @object
1.84 maxv 359: LABEL(esym) .quad 0 /* ptr to end of syms */
1.71 uebayasi 360: END(esym)
361: .type _C_LABEL(eblob), @object
1.84 maxv 362: LABEL(eblob) .quad 0 /* ptr to end of modules */
1.71 uebayasi 363: END(eblob)
364: .type _C_LABEL(atdevbase), @object
1.84 maxv 365: LABEL(atdevbase) .quad 0 /* location of start of iomem in virt */
1.71 uebayasi 366: END(atdevbase)
367: .type _C_LABEL(PDPpaddr), @object
1.84 maxv 368: LABEL(PDPpaddr) .quad 0 /* paddr of PTD, for libkvm */
1.71 uebayasi 369: END(PDPpaddr)
370: .type _C_LABEL(biosbasemem), @object
1.1 fvdl 371: #ifndef REALBASEMEM
1.84 maxv 372: LABEL(biosbasemem) .long 0 /* base memory reported by BIOS */
1.1 fvdl 373: #else
1.71 uebayasi 374: LABEL(biosbasemem) .long REALBASEMEM
1.1 fvdl 375: #endif
1.71 uebayasi 376: END(biosbasemem)
377: .type _C_LABEL(biosextmem), @object
1.1 fvdl 378: #ifndef REALEXTMEM
1.84 maxv 379: LABEL(biosextmem) .long 0 /* extended memory reported by BIOS */
1.1 fvdl 380: #else
1.71 uebayasi 381: LABEL(biosextmem) .long REALEXTMEM
1.1 fvdl 382: #endif
1.71 uebayasi 383: END(biosextmem)
1.130 maxv 384: .type _C_LABEL(lwp0uarea), @object
385: LABEL(lwp0uarea) .quad 0
386: END(lwp0uarea)
1.1 fvdl 387:
1.176 cherry 388: #ifndef XENPV
1.56 jym 389: .globl gdt64_lo
390: .globl gdt64_hi
1.1 fvdl 391:
1.56 jym 392: #define GDT64_LIMIT gdt64_end-gdt64_start-1
393: /* Temporary gdt64, with base address in low memory */
1.71 uebayasi 394: .type _C_LABEL(gdt64_lo), @object
395: LABEL(gdt64_lo)
1.56 jym 396: .word GDT64_LIMIT
1.1 fvdl 397: .quad _RELOC(gdt64_start)
1.71 uebayasi 398: END(gdt64_lo)
1.1 fvdl 399: .align 64
400:
1.56 jym 401: /* Temporary gdt64, with base address in high memory */
1.71 uebayasi 402: .type _C_LABEL(gdt64_hi), @object
403: LABEL(gdt64_hi)
1.56 jym 404: .word GDT64_LIMIT
405: .quad gdt64_start
1.71 uebayasi 406: END(gdt64_hi)
1.56 jym 407: .align 64
408: #undef GDT64_LIMIT
409:
1.71 uebayasi 410: .type _C_LABEL(gdt64_start), @object
411: _C_LABEL(gdt64_start):
1.1 fvdl 412: .quad 0x0000000000000000 /* always empty */
413: .quad 0x00af9a000000ffff /* kernel CS */
414: .quad 0x00cf92000000ffff /* kernel DS */
1.71 uebayasi 415: END(gdt64_start)
1.1 fvdl 416: gdt64_end:
417:
1.71 uebayasi 418: .type _C_LABEL(farjmp64), @object
419: _C_LABEL(farjmp64):
1.57 jym 420: .long _RELOC(longmode)
1.1 fvdl 421: .word GSEL(GCODE_SEL, SEL_KPL)
1.71 uebayasi 422: END(farjmp64)
1.84 maxv 423:
1.206 bouyer 424: #ifdef XEN
425: /* 32bit GDT */
426: gdtdesc32:
427: .word gdt32end - gdt32
428: .long RELOC(gdt32)
429: .long 0
430: gdt32:
431: .long 0 # null descriptor
432: .long 0
433: .long 0x0000ffff # %cs
434: .long 0x00cf9a00
435: .long 0x0000ffff # %ds, %es, %ss
436: .long 0x00cf9200
437: gdt32end:
438: #endif /* XEN */
1.176 cherry 439: #endif /* !XENPV */
1.71 uebayasi 440:
1.84 maxv 441: /* Space for the temporary stack */
1.71 uebayasi 442: .size tmpstk, tmpstk - .
1.84 maxv 443: .space 512
1.1 fvdl 444: tmpstk:
445:
446: /*
447: * Some hackage to deal with 64bit symbols in 32 bit mode.
1.79 maxv 448: * This may not be needed if things are cleaned up a little.
1.1 fvdl 449: */
450:
451: .text
452: .globl _C_LABEL(kernel_text)
453: .set _C_LABEL(kernel_text),KERNTEXTOFF
454:
1.71 uebayasi 455: ENTRY(start)
1.176 cherry 456: #ifndef XENPV
1.71 uebayasi 457: .code32
1.81 maxv 458:
459: /* Warm boot */
460: movw $0x1234,0x472
461:
1.1 fvdl 462: /*
1.84 maxv 463: * Load parameters from the stack (32 bits):
1.91 maxv 464: * boothowto, [bootdev], bootinfo, esym, biosextmem, biosbasemem
1.81 maxv 465: * We are not interested in 'bootdev'.
1.1 fvdl 466: */
1.81 maxv 467:
468: /* Load 'boothowto' */
1.1 fvdl 469: movl 4(%esp),%eax
470: movl %eax,RELOC(boothowto)
1.81 maxv 471:
472: /* Load 'bootinfo' */
1.1 fvdl 473: movl 12(%esp),%eax
1.81 maxv 474: testl %eax,%eax /* bootinfo = NULL? */
1.142 maxv 475: jz .Lbootinfo_finished
1.81 maxv 476:
1.121 maxv 477: movl (%eax),%ebx /* bootinfo::bi_nentries */
1.1 fvdl 478: movl $RELOC(bootinfo),%ebp
1.81 maxv 479: movl %ebp,%edx
1.1 fvdl 480: addl $BOOTINFO_MAXSIZE,%ebp
1.81 maxv 481: movl %ebx,(%edx)
482: addl $4,%edx
483:
1.142 maxv 484: .Lbootinfo_entryloop:
1.81 maxv 485: testl %ebx,%ebx /* no remaining entries? */
1.142 maxv 486: jz .Lbootinfo_finished
1.81 maxv 487:
488: addl $4,%eax
489: movl (%eax),%ecx /* address of entry */
1.1 fvdl 490: pushl %edi
491: pushl %esi
492: pushl %eax
493:
1.84 maxv 494: movl (%ecx),%eax /* btinfo_common::len (size of entry) */
1.1 fvdl 495: movl %edx,%edi
1.121 maxv 496: addl %eax,%edx /* update dest pointer */
1.81 maxv 497: cmpl %ebp,%edx /* beyond bootinfo+BOOTINFO_MAXSIZE? */
1.142 maxv 498: jg .Lbootinfo_overflow
1.81 maxv 499:
1.1 fvdl 500: movl %ecx,%esi
501: movl %eax,%ecx
1.81 maxv 502:
1.43 ad 503: /*
1.121 maxv 504: * If any modules were loaded, record where they end. 'eblob' is used
505: * later to compute the initial bootstrap tables.
1.43 ad 506: */
1.84 maxv 507: cmpl $BTINFO_MODULELIST,4(%esi) /* btinfo_common::type */
1.142 maxv 508: jne .Lbootinfo_copy
1.81 maxv 509:
1.121 maxv 510: /* Skip the modules if we won't have enough VA to map them */
511: movl 12(%esi),%eax /* btinfo_modulelist::endpa */
512: addl $PGOFSET,%eax /* roundup to a page */
513: andl $~PGOFSET,%eax
514: cmpl $BOOTMAP_VA_SIZE,%eax
1.142 maxv 515: jg .Lbootinfo_skip
1.121 maxv 516: movl %eax,RELOC(eblob)
1.81 maxv 517: addl $KERNBASE_LO,RELOC(eblob)
518: adcl $KERNBASE_HI,RELOC(eblob)+4
519:
1.142 maxv 520: .Lbootinfo_copy:
1.1 fvdl 521: rep
1.81 maxv 522: movsb /* copy esi -> edi */
1.142 maxv 523: jmp .Lbootinfo_next
1.121 maxv 524:
1.142 maxv 525: .Lbootinfo_skip:
1.121 maxv 526: subl %ecx,%edx /* revert dest pointer */
527:
1.142 maxv 528: .Lbootinfo_next:
1.1 fvdl 529: popl %eax
530: popl %esi
531: popl %edi
1.81 maxv 532: subl $1,%ebx /* decrement the # of entries */
1.142 maxv 533: jmp .Lbootinfo_entryloop
1.81 maxv 534:
1.142 maxv 535: .Lbootinfo_overflow:
1.81 maxv 536: /*
537: * Cleanup for overflow case. Pop the registers, and correct the number
538: * of entries.
539: */
1.1 fvdl 540: popl %eax
541: popl %esi
542: popl %edi
543: movl $RELOC(bootinfo),%ebp
1.81 maxv 544: movl %ebp,%edx
545: subl %ebx,(%edx) /* correct the number of entries */
1.142 maxv 546: .Lbootinfo_finished:
1.1 fvdl 547:
1.81 maxv 548: /* Load 'esym' */
1.72 uebayasi 549: movl 16(%esp),%eax
1.81 maxv 550: testl %eax,%eax /* esym = NULL? */
1.1 fvdl 551: jz 1f
1.81 maxv 552:
1.1 fvdl 553: addl $KERNBASE_LO,%eax
1.81 maxv 554:
555: 1:
556: movl $RELOC(esym),%ebp
1.1 fvdl 557: movl %eax,(%ebp)
558: movl $KERNBASE_HI,4(%ebp)
559:
1.83 maxv 560: /* Load 'biosextmem' */
1.1 fvdl 561: movl $RELOC(biosextmem),%ebp
562: movl (%ebp),%eax
1.83 maxv 563: testl %eax,%eax /* already set? */
1.142 maxv 564: jnz .Lbiosextmem_finished
1.81 maxv 565:
1.1 fvdl 566: movl 20(%esp),%eax
567: movl %eax,(%ebp)
1.81 maxv 568:
1.142 maxv 569: .Lbiosextmem_finished:
1.83 maxv 570: /* Load 'biosbasemem' */
1.1 fvdl 571: movl $RELOC(biosbasemem),%ebp
572: movl (%ebp),%eax
1.83 maxv 573: testl %eax,%eax /* already set? */
1.142 maxv 574: jnz .Lbiosbasemem_finished
1.83 maxv 575:
1.1 fvdl 576: movl 24(%esp),%eax
577: movl %eax,(%ebp)
578:
1.142 maxv 579: .Lbiosbasemem_finished:
1.83 maxv 580: /*
1.84 maxv 581: * Done with the parameters!
1.83 maxv 582: */
1.84 maxv 583:
584: /* First, reset the PSL. */
1.1 fvdl 585: pushl $PSL_MBO
586: popfl
587:
588: xorl %eax,%eax
589: cpuid
590: movl %eax,RELOC(cpuid_level)
591:
592: /*
593: * Finished with old stack; load new %esp now instead of later so we
594: * can trace this code without having to worry about the trace trap
595: * clobbering the memory test or the zeroing of the bss+bootstrap page
596: * tables.
597: *
598: * The boot program should check:
599: * text+data <= &stack_variable - more_space_for_stack
600: * text+data+bss+pad+space_for_page_tables <= end_of_memory
1.100 maxv 601: *
1.84 maxv 602: * XXX: the gdt is in the carcass of the boot program so clearing
1.1 fvdl 603: * the rest of memory is still not possible.
604: */
605: movl $RELOC(tmpstk),%esp
606:
1.86 maxv 607: /*
1.180 maxv 608: * Retrieve the NX/XD flag. We use the 32bit version of PTE_NX.
1.86 maxv 609: */
610: movl $0x80000001,%eax
611: cpuid
612: andl $CPUID_NOX,%edx
1.142 maxv 613: jz .Lno_NOX
1.180 maxv 614: movl $PTE_NX32,RELOC(nox_flag)
1.142 maxv 615: .Lno_NOX:
1.86 maxv 616:
1.1 fvdl 617: /*
1.84 maxv 618: * There are four levels of pages in amd64: PML4 -> PDP -> PD -> PT. They will
619: * be referred to as: L4 -> L3 -> L2 -> L1.
620: *
621: * Virtual address space of the kernel:
1.97 maxv 622: * +------+--------+------+-----+--------+---------------------+----------
623: * | TEXT | RODATA | DATA | BSS | [SYMS] | [PRELOADED MODULES] | L4 ->
624: * +------+--------+------+-----+--------+---------------------+----------
625: * (1) (2) (3)
626: *
1.101 maxv 627: * --------------+-----+-----+----+-------------+
628: * -> PROC0 STK -> L3 -> L2 -> L1 | ISA I/O MEM |
629: * --------------+-----+-----+----+-------------+
1.97 maxv 630: * (4)
1.84 maxv 631: *
1.97 maxv 632: * PROC0 STK is obviously not linked as a page level. It just happens to be
633: * caught between L4 and L3.
634: *
635: * (PROC0 STK + L4 + L3 + L2 + L1) is later referred to as BOOTSTRAP TABLES.
1.1 fvdl 636: *
1.116 maxv 637: * ISA I/O MEM has no physical page allocated here, just virtual addresses.
638: *
1.88 maxv 639: * Important note: the kernel segments are properly 4k-aligned
640: * (see kern.ldscript), so there's no need to enforce alignment.
1.1 fvdl 641: */
642:
1.84 maxv 643: /* Find end of kernel image; brings us on (1). */
1.115 maxv 644: movl $RELOC(__kernel_end),%edi
1.83 maxv 645:
1.78 uebayasi 646: #if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB)
1.84 maxv 647: /* Save the symbols (if loaded); brinds us on (2). */
1.1 fvdl 648: movl RELOC(esym),%eax
649: testl %eax,%eax
650: jz 1f
651: subl $KERNBASE_LO,%eax /* XXX */
652: movl %eax,%edi
653: 1:
654: #endif
1.84 maxv 655: /* Skip over any modules/blobs; brings us on (3). */
1.43 ad 656: movl RELOC(eblob),%eax
657: testl %eax,%eax
658: jz 1f
659: subl $KERNBASE_LO,%eax /* XXX */
660: movl %eax,%edi
661: 1:
1.81 maxv 662:
1.97 maxv 663: /* We are on (3). Align up for BOOTSTRAP TABLES. */
1.1 fvdl 664: movl %edi,%esi
1.84 maxv 665: addl $PGOFSET,%esi
1.1 fvdl 666: andl $~PGOFSET,%esi
667:
1.93 maxv 668: /* We are on the BOOTSTRAP TABLES. Save L4's physical address. */
669: movl $RELOC(PDPpaddr),%ebp
670: movl %esi,(%ebp)
671: movl $0,4(%ebp)
672:
1.84 maxv 673: /* Now, zero out the BOOTSTRAP TABLES (before filling them in). */
1.91 maxv 674: movl %esi,%edi
1.1 fvdl 675: xorl %eax,%eax
676: cld
677: movl $TABLESIZE,%ecx
678: shrl $2,%ecx
679: rep
1.83 maxv 680: stosl /* copy eax -> edi */
1.1 fvdl 681:
1.73 uebayasi 682: /*
1.84 maxv 683: * Build the page tables and levels. We go from L1 to L4, and link the levels
684: * together. Note: RELOC computes &addr - KERNBASE in 32 bits; the value can't
685: * be > 4G, or we can't deal with it anyway, since we are in 32bit mode.
1.73 uebayasi 686: */
1.84 maxv 687: /*
688: * Build L1.
689: */
1.83 maxv 690: leal (PROC0_PTP1_OFF)(%esi),%ebx
1.81 maxv 691:
1.119 maxv 692: /* Skip the area below the kernel text. */
1.96 maxv 693: movl $(KERNTEXTOFF_LO - KERNBASE_LO),%ecx
694: shrl $PGSHIFT,%ecx
695: fillkpt_blank
1.81 maxv 696:
1.88 maxv 697: /* Map the kernel text RX. */
1.96 maxv 698: movl $(KERNTEXTOFF_LO - KERNBASE_LO),%eax /* start of TEXT */
1.88 maxv 699: movl $RELOC(__rodata_start),%ecx
700: subl %eax,%ecx
701: shrl $PGSHIFT,%ecx
1.180 maxv 702: orl $(PTE_P),%eax
1.88 maxv 703: fillkpt
1.1 fvdl 704:
1.91 maxv 705: /* Map the kernel rodata R. */
1.88 maxv 706: movl $RELOC(__rodata_start),%eax
707: movl $RELOC(__data_start),%ecx
1.1 fvdl 708: subl %eax,%ecx
709: shrl $PGSHIFT,%ecx
1.180 maxv 710: orl $(PTE_P),%eax
1.89 maxv 711: fillkpt_nox
1.1 fvdl 712:
1.90 maxv 713: /* Map the kernel data+bss RW. */
714: movl $RELOC(__data_start),%eax
715: movl $RELOC(__kernel_end),%ecx
716: subl %eax,%ecx
717: shrl $PGSHIFT,%ecx
1.180 maxv 718: orl $(PTE_P|PTE_W),%eax
1.90 maxv 719: fillkpt_nox
720:
1.105 maxv 721: /* Map [SYMS]+[PRELOADED MODULES] RW. */
1.90 maxv 722: movl $RELOC(__kernel_end),%eax
1.97 maxv 723: movl %esi,%ecx /* start of BOOTSTRAP TABLES */
1.95 maxv 724: subl %eax,%ecx
725: shrl $PGSHIFT,%ecx
1.180 maxv 726: orl $(PTE_P|PTE_W),%eax
1.105 maxv 727: fillkpt_nox
1.95 maxv 728:
1.92 maxv 729: /* Map the BOOTSTRAP TABLES RW. */
730: movl %esi,%eax /* start of BOOTSTRAP TABLES */
731: movl $TABLESIZE,%ecx /* length of BOOTSTRAP TABLES */
732: shrl $PGSHIFT,%ecx
1.180 maxv 733: orl $(PTE_P|PTE_W),%eax
1.92 maxv 734: fillkpt_nox
735:
1.102 maxv 736: /* We are on (4). Map ISA I/O MEM RW. */
1.101 maxv 737: movl $IOM_BEGIN,%eax
738: movl $IOM_SIZE,%ecx /* size of ISA I/O MEM */
739: shrl $PGSHIFT,%ecx
1.180 maxv 740: orl $(PTE_P|PTE_W/*|PTE_PCD*/),%eax
1.102 maxv 741: fillkpt_nox
1.1 fvdl 742:
1.84 maxv 743: /*
744: * Build L2. Linked to L1.
745: */
1.73 uebayasi 746: leal (PROC0_PTP2_OFF)(%esi),%ebx
1.1 fvdl 747: leal (PROC0_PTP1_OFF)(%esi),%eax
1.180 maxv 748: orl $(PTE_P|PTE_W),%eax
1.1 fvdl 749: movl $(NKL2_KIMG_ENTRIES+1),%ecx
750: fillkpt
751:
752: #if L2_SLOT_KERNBASE > 0
753: /* If needed, set up level 2 entries for actual kernel mapping */
1.84 maxv 754: leal (PROC0_PTP2_OFF + L2_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
1.73 uebayasi 755: leal (PROC0_PTP1_OFF)(%esi),%eax
1.180 maxv 756: orl $(PTE_P|PTE_W),%eax
1.73 uebayasi 757: movl $(NKL2_KIMG_ENTRIES+1),%ecx
1.1 fvdl 758: fillkpt
759: #endif
760:
1.84 maxv 761: /*
762: * Build L3. Linked to L2.
763: */
1.73 uebayasi 764: leal (PROC0_PTP3_OFF)(%esi),%ebx
1.1 fvdl 765: leal (PROC0_PTP2_OFF)(%esi),%eax
1.180 maxv 766: orl $(PTE_P|PTE_W),%eax
1.1 fvdl 767: movl $NKL3_KIMG_ENTRIES,%ecx
768: fillkpt
769:
770: #if L3_SLOT_KERNBASE > 0
771: /* If needed, set up level 3 entries for actual kernel mapping */
1.84 maxv 772: leal (PROC0_PTP3_OFF + L3_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
1.73 uebayasi 773: leal (PROC0_PTP2_OFF)(%esi),%eax
1.180 maxv 774: orl $(PTE_P|PTE_W),%eax
1.73 uebayasi 775: movl $NKL3_KIMG_ENTRIES,%ecx
1.1 fvdl 776: fillkpt
777: #endif
778:
1.84 maxv 779: /*
780: * Build L4 for identity mapping. Linked to L3.
781: */
1.73 uebayasi 782: leal (PROC0_PML4_OFF)(%esi),%ebx
1.1 fvdl 783: leal (PROC0_PTP3_OFF)(%esi),%eax
1.180 maxv 784: orl $(PTE_P|PTE_W),%eax
1.1 fvdl 785: movl $NKL4_KIMG_ENTRIES,%ecx
786: fillkpt
787:
1.84 maxv 788: /* Set up L4 entries for actual kernel mapping */
789: leal (PROC0_PML4_OFF + L4_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
1.1 fvdl 790: leal (PROC0_PTP3_OFF)(%esi),%eax
1.180 maxv 791: orl $(PTE_P|PTE_W),%eax
1.1 fvdl 792: movl $NKL4_KIMG_ENTRIES,%ecx
793: fillkpt
794:
795: /*
796: * Startup checklist:
797: * 1. Enable PAE (and SSE while here).
798: */
799: movl %cr4,%eax
800: orl $(CR4_PAE|CR4_OSFXSR|CR4_OSXMMEXCPT),%eax
801: movl %eax,%cr4
802:
803: /*
1.86 maxv 804: * 2. Set Long Mode Enable in EFER. Also enable the syscall extensions,
805: * and NOX if available.
1.1 fvdl 806: */
1.73 uebayasi 807: movl $MSR_EFER,%ecx
1.1 fvdl 808: rdmsr
809: xorl %eax,%eax /* XXX */
810: orl $(EFER_LME|EFER_SCE),%eax
1.86 maxv 811: movl RELOC(nox_flag),%ebx
812: cmpl $0,%ebx
1.142 maxv 813: je .Lskip_NOX
1.86 maxv 814: orl $(EFER_NXE),%eax
1.142 maxv 815: .Lskip_NOX:
1.1 fvdl 816: wrmsr
817:
818: /*
819: * 3. Load %cr3 with pointer to PML4.
820: */
821: movl %esi,%eax
822: movl %eax,%cr3
823:
824: /*
825: * 4. Enable paging and the rest of it.
826: */
827: movl %cr0,%eax
1.68 jym 828: orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP|CR0_AM),%eax
1.1 fvdl 829: movl %eax,%cr0
830: jmp compat
831: compat:
832:
833: /*
1.83 maxv 834: * 5. Not quite done yet, we're now in a compatibility segment, in
835: * legacy mode. We must jump to a long mode segment. Need to set up
836: * a temporary GDT with a long mode segment in it to do that.
1.1 fvdl 837: */
1.56 jym 838: movl $RELOC(gdt64_lo),%eax
1.1 fvdl 839: lgdt (%eax)
840: movl $RELOC(farjmp64),%eax
841: ljmp *(%eax)
842:
1.83 maxv 843: .code64
1.1 fvdl 844: longmode:
845: /*
1.83 maxv 846: * 6. Finally, we're in long mode. However, we're still in the identity
847: * mapped area (could not jump out of that earlier because it would
848: * have been a > 32bit jump). We can do that now, so here we go.
1.1 fvdl 849: */
850: movabsq $longmode_hi,%rax
851: jmp *%rax
1.56 jym 852:
1.1 fvdl 853: longmode_hi:
1.56 jym 854:
855: /*
856: * We left the identity mapped area. Base address of
857: * the temporary gdt64 should now be in high memory.
858: */
859: movq $RELOC(gdt64_hi),%rax
860: lgdt (%rax)
861:
1.1 fvdl 862: /*
1.83 maxv 863: * We have arrived. There's no need anymore for the identity mapping in
864: * low memory, remove it.
1.1 fvdl 865: */
866: movq $KERNBASE,%r8
867:
868: #if L2_SLOT_KERNBASE > 0
869: movq $(NKL2_KIMG_ENTRIES+1),%rcx
1.84 maxv 870: leaq (PROC0_PTP2_OFF)(%rsi),%rbx /* old, phys address */
871: addq %r8,%rbx /* new, virt address */
872: killkpt
1.1 fvdl 873: #endif
874:
875: #if L3_SLOT_KERNBASE > 0
876: movq $NKL3_KIMG_ENTRIES,%rcx
1.84 maxv 877: leaq (PROC0_PTP3_OFF)(%rsi),%rbx /* old, phys address */
878: addq %r8,%rbx /* new, virt address */
879: killkpt
1.1 fvdl 880: #endif
881:
882: movq $NKL4_KIMG_ENTRIES,%rcx
1.84 maxv 883: leaq (PROC0_PML4_OFF)(%rsi),%rbx /* old, phys address of PML4 */
884: addq %r8,%rbx /* new, virt address of PML4 */
1.85 maxv 885: killkpt
1.1 fvdl 886:
887: /* Relocate atdevbase. */
888: movq $(TABLESIZE+KERNBASE),%rdx
889: addq %rsi,%rdx
890: movq %rdx,_C_LABEL(atdevbase)(%rip)
891:
892: /* Set up bootstrap stack. */
1.97 maxv 893: leaq (PROC0_STK_OFF)(%rsi),%rax
1.1 fvdl 894: addq %r8,%rax
1.130 maxv 895: movq %rax,_C_LABEL(lwp0uarea)(%rip)
1.1 fvdl 896: leaq (USPACE-FRAMESIZE)(%rax),%rsp
1.84 maxv 897: xorq %rbp,%rbp /* mark end of frames */
1.1 fvdl 898:
899: xorw %ax,%ax
900: movw %ax,%gs
901: movw %ax,%fs
902:
1.116 maxv 903: /* The first physical page available. */
904: leaq (TABLESIZE)(%rsi),%rdi
1.32 bouyer 905:
1.176 cherry 906: #else /* XENPV */
1.32 bouyer 907: /* First, reset the PSL. */
908: pushq $2
909: popfq
910:
911: cld
912:
913: /*
914: * Xen info:
915: * - %rsi -> start_info struct
1.111 maxv 916: * - %rsp -> stack, *theoretically* the last used page by Xen bootstrap
1.32 bouyer 917: */
1.111 maxv 918: movq %rsi,%rbx
1.32 bouyer 919:
1.73 uebayasi 920: /* Clear BSS. */
1.32 bouyer 921: xorq %rax,%rax
922: movq $_C_LABEL(__bss_start),%rdi
923: movq $_C_LABEL(_end),%rcx
924: subq %rdi,%rcx
925: rep
926: stosb
927:
1.111 maxv 928: /* Copy start_info to a safe place. */
1.32 bouyer 929: movq %rbx,%rsi
930: movq $_C_LABEL(start_info_union),%rdi
931: movq $64,%rcx
932: rep
933: movsq
934:
935: /*
936: * Memory layout at start of the day:
937: * - Kernel image
938: * - Page frames list
939: * - start_info struct. we copied it, so it can be recycled.
940: * - xenstore
941: * - console
942: * - Xen bootstrap page tables
943: * - kernel stack. provided by Xen
1.79 maxv 944: * - guaranteed 512kB padding
1.32 bouyer 945: *
946: * As we want to rebuild our page tables and place our stack
947: * in proc0 struct, all data starting from after console can be
948: * discarded after we've done a little setup.
949: */
950:
951: /*
1.111 maxv 952: * We want our own page tables, and will rebuild them. We will reclaim
953: * the Xen space later, INCLUDING the stack. So we need to switch to a
954: * temporary one now.
955: */
956: movq $tmpstk,%rax
957: subq $8,%rax
958: movq %rax,%rsp
1.32 bouyer 959:
1.45 bouyer 960: xorl %eax,%eax
961: cpuid
962: movl %eax,_C_LABEL(cpuid_level)
963:
1.201 bouyer 964: movl $VM_GUEST_XENPV, _C_LABEL(vm_guest)
965:
1.111 maxv 966: movq $cpu_info_primary,%rdi
967: movq %rdi,CPU_INFO_SELF(%rdi) /* ci->ci_self = ci */
968: movq $1,%rsi
1.67 cherry 969: call cpu_init_msrs /* cpu_init_msrs(ci, true); */
970:
1.113 maxv 971: call xen_locore
1.32 bouyer 972:
973: /*
1.113 maxv 974: * The first VA available is returned by xen_locore in %rax. We
1.111 maxv 975: * use it as the UAREA, and set up the stack here.
1.32 bouyer 976: */
1.111 maxv 977: movq %rax,%rsi
1.130 maxv 978: movq %rsi,_C_LABEL(lwp0uarea)(%rip)
1.32 bouyer 979: leaq (USPACE-FRAMESIZE)(%rsi),%rsp
980: xorq %rbp,%rbp
981:
1.111 maxv 982: /* Clear segment registers. */
1.63 jym 983: xorw %ax,%ax
1.32 bouyer 984: movw %ax,%gs
985: movw %ax,%fs
1.84 maxv 986:
1.117 maxv 987: /* Set first_avail after the DUMMY PAGE (see xen_locore). */
1.32 bouyer 988: movq %rsi,%rdi
1.117 maxv 989: addq $(USPACE+PAGE_SIZE),%rdi
1.84 maxv 990: subq $KERNBASE,%rdi /* init_x86_64 wants a physical address */
1.176 cherry 991: #endif /* XENPV */
1.32 bouyer 992:
1.131 maxv 993: pushq %rdi
1.206 bouyer 994: #if defined(XEN) && !defined(XENPV)
995: call _C_LABEL(init_xen_early)
996: #endif
1.131 maxv 997: call _C_LABEL(init_bootspace)
1.205 maxv 998: #ifdef KASAN
999: movq _C_LABEL(lwp0uarea)(%rip),%rdi
1000: call _C_LABEL(kasan_early_init)
1001: #endif
1.172 maxv 1002: call _C_LABEL(init_slotspace)
1.131 maxv 1003: popq %rdi
1.1 fvdl 1004: call _C_LABEL(init_x86_64)
1005: call _C_LABEL(main)
1.71 uebayasi 1006: END(start)
1.1 fvdl 1007:
1.207 ! christos 1008: #if defined(XEN)
! 1009: # if !defined(XENPV)
1.206 bouyer 1010: /* entry point for Xen PVH */
1011: .code32
1012: ENTRY(start_xen32)
1013: /* Xen doesn't start us with a valid gdt */
1014: movl $RELOC(gdtdesc32), %eax
1015: lgdt (%eax)
1016: jmp $GSEL(GCODE_SEL, SEL_KPL), $RELOC(.Lreload_cs)
1017:
1018: .Lreload_cs:
1019: movw $GSEL(GDATA_SEL, SEL_KPL), %ax
1020: movw %ax, %ds
1021: movw %ax, %es
1022: movw %ax, %ss
1023:
1024: /* we need a valid stack */
1025: movl $RELOC(tmpstk),%esp
1026:
1027: /* clear BSS */
1028: xorl %eax,%eax
1029: movl $RELOC(__bss_start),%edi
1030: movl $RELOC(_end),%ecx
1031: subl %edi,%ecx
1032: rep
1033: stosb
1034:
1035: /*
1036: * save addr of the hvm_start_info structure. This is also the end
1037: * of the symbol table
1038: */
1039: movl %ebx, RELOC(hvm_start_paddr)
1040: movl %ebx, %eax
1041: addl $KERNBASE_LO,%eax
1042: movl $RELOC(esym),%ebp
1043: movl %eax,(%ebp)
1044: movl $KERNBASE_HI,4(%ebp)
1045: /* get a page for HYPERVISOR_shared_info */
1046: addl $PAGE_SIZE, %ebx
1047: addl $PGOFSET,%ebx
1048: andl $~PGOFSET,%ebx
1049: movl $RELOC(HYPERVISOR_shared_info_pa),%ebp
1050: movl %ebx,(%ebp)
1051: movl $0,4(%ebp)
1052: /* XXX assume hvm_start_info+dependant struture fits in a single page */
1053: addl $PAGE_SIZE, %ebx
1054: addl $PGOFSET,%ebx
1055: andl $~PGOFSET,%ebx
1056: addl $KERNBASE_LO,%ebx
1057: movl $RELOC(eblob),%ebp
1058: movl %ebx,(%ebp)
1059: movl $KERNBASE_HI,4(%ebp)
1060:
1061: /* annouce ourself */
1062: movl $VM_GUEST_XENPVH, RELOC(vm_guest)
1063: jmp .Lbiosbasemem_finished
1064: END(start_xen32)
1065: .code64
1.207 ! christos 1066: # endif /* !XENPV */
1.32 bouyer 1067: /* space for the hypercall call page */
1068: #define HYPERCALL_PAGE_OFFSET 0x1000
1.177 cherry 1069: .align HYPERCALL_PAGE_OFFSET
1070: ENTRY(hypercall_page) /* Returns -1, on HYPERVISOR_xen_version() */
1071: .skip (__HYPERVISOR_xen_version*32), 0x90
1072: movq $-1, %rax
1073: retq
1074: .align HYPERCALL_PAGE_OFFSET, 0x90
1.71 uebayasi 1075: END(hypercall_page)
1.32 bouyer 1076: #endif /* XEN */
1.1 fvdl 1077:
1078: /*
1.22 yamt 1079: * int setjmp(label_t *)
1080: *
1081: * Used primarily by DDB.
1082: */
1.1 fvdl 1083: ENTRY(setjmp)
1084: /*
1085: * Only save registers that must be preserved across function
1086: * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15)
1087: * and %rip.
1088: */
1089: movq %rdi,%rax
1090: movq %rbx,(%rax)
1091: movq %rsp,8(%rax)
1092: movq %rbp,16(%rax)
1093: movq %r12,24(%rax)
1094: movq %r13,32(%rax)
1095: movq %r14,40(%rax)
1096: movq %r15,48(%rax)
1097: movq (%rsp),%rdx
1098: movq %rdx,56(%rax)
1099: xorl %eax,%eax
1100: ret
1.71 uebayasi 1101: END(setjmp)
1.1 fvdl 1102:
1.22 yamt 1103: /*
1104: * int longjmp(label_t *)
1105: *
1106: * Used primarily by DDB.
1107: */
1.1 fvdl 1108: ENTRY(longjmp)
1109: movq %rdi,%rax
1110: movq (%rax),%rbx
1111: movq 8(%rax),%rsp
1112: movq 16(%rax),%rbp
1113: movq 24(%rax),%r12
1114: movq 32(%rax),%r13
1115: movq 40(%rax),%r14
1116: movq 48(%rax),%r15
1117: movq 56(%rax),%rdx
1118: movq %rdx,(%rsp)
1.22 yamt 1119: movl $1,%eax
1.1 fvdl 1120: ret
1.71 uebayasi 1121: END(longjmp)
1.1 fvdl 1122:
1.73 uebayasi 1123: /*
1124: * void dumpsys(void)
1125: *
1126: * Mimic cpu_switchto() for postmortem debugging.
1127: */
1.25 yamt 1128: ENTRY(dumpsys)
1.84 maxv 1129: /* Build a fake switch frame. */
1.25 yamt 1130: pushq %rbx
1131: pushq %r12
1132: pushq %r13
1133: pushq %r14
1134: pushq %r15
1.84 maxv 1135:
1136: /* Save a context. */
1.25 yamt 1137: movq $dumppcb, %rax
1138: movq %rsp, PCB_RSP(%rax)
1139: movq %rbp, PCB_RBP(%rax)
1140:
1141: call _C_LABEL(dodumpsys)
1142:
1.84 maxv 1143: addq $(5*8), %rsp /* sizeof(switchframe) - sizeof(%rip) */
1.25 yamt 1144: ret
1.71 uebayasi 1145: END(dumpsys)
1.25 yamt 1146:
1.1 fvdl 1147: /*
1.58 chs 1148: * struct lwp *cpu_switchto(struct lwp *oldlwp, struct lwp *newlwp,
1.103 maxv 1149: * bool returning)
1.22 yamt 1150: *
1.197 skrll 1151: * 1. save context of oldlwp.
1152: * 2. restore context of newlwp.
1.22 yamt 1153: *
1154: * Note that the stack frame layout is known to "struct switchframe" in
1155: * <machine/frame.h> and to the code in cpu_lwp_fork() which initializes
1156: * it for a new lwp.
1.1 fvdl 1157: */
1.22 yamt 1158: ENTRY(cpu_switchto)
1.1 fvdl 1159: pushq %rbx
1160: pushq %r12
1161: pushq %r13
1162: pushq %r14
1163: pushq %r15
1164:
1.84 maxv 1165: movq %rdi,%r13 /* oldlwp */
1166: movq %rsi,%r12 /* newlwp */
1.1 fvdl 1167:
1.22 yamt 1168: /* Save old context. */
1.55 rmind 1169: movq L_PCB(%r13),%rax
1.22 yamt 1170: movq %rsp,PCB_RSP(%rax)
1171: movq %rbp,PCB_RBP(%rax)
1172:
1.168 maxv 1173: /* Switch to newlwp's stack. */
1174: movq L_PCB(%r12),%r14
1175: movq PCB_RSP(%r14),%rsp
1176: movq PCB_RBP(%r14),%rbp
1177:
1178: /*
1179: * Set curlwp. This must be globally visible in order to permit
1180: * non-interlocked mutex release.
1181: */
1182: movq %r12,%rcx
1183: xchgq %rcx,CPUVAR(CURLWP)
1184:
1185: /* Skip the rest if returning to a pinned LWP. */
1186: testb %dl,%dl /* returning = true ? */
1187: jnz .Lswitch_return
1188:
1.145 maxv 1189: #ifdef SVS
1.151 maxv 1190: movb _C_LABEL(svs_enabled),%dl
1191: testb %dl,%dl
1192: jz .Lskip_svs
1.145 maxv 1193: callq _C_LABEL(svs_lwp_switch)
1.151 maxv 1194: .Lskip_svs:
1.145 maxv 1195: #endif
1196:
1.201 bouyer 1197: #ifndef XENPV
1.168 maxv 1198: movq %r13,%rdi
1199: movq %r12,%rsi
1.160 maxv 1200: callq _C_LABEL(speculation_barrier)
1.163 maxv 1201: #endif
1.160 maxv 1202:
1.38 yamt 1203: /* Switch ring0 stack */
1.147 maxv 1204: #ifdef SVS
1.152 maxv 1205: movb _C_LABEL(svs_enabled),%al
1206: testb %al,%al
1207: jz .Lno_svs_switch
1208:
1.147 maxv 1209: movq CPUVAR(RSP0),%rax
1210: movq CPUVAR(TSS),%rdi
1211: movq %rax,TSS_RSP0(%rdi)
1.152 maxv 1212: jmp .Lring0_switched
1213:
1214: .Lno_svs_switch:
1215: #endif
1216:
1.176 cherry 1217: #if !defined(XENPV)
1.38 yamt 1218: movq PCB_RSP0(%r14),%rax
1.144 maxv 1219: movq CPUVAR(TSS),%rdi
1220: movq %rax,TSS_RSP0(%rdi)
1.38 yamt 1221: #else
1.103 maxv 1222: movq %r14,%rdi
1.152 maxv 1223: callq _C_LABEL(x86_64_switch_context)
1.32 bouyer 1224: #endif
1.152 maxv 1225: .Lring0_switched:
1.1 fvdl 1226:
1.173 maxv 1227: /* Switch the dbregs. */
1228: movq %r13,%rdi
1229: movq %r12,%rsi
1230: callq _C_LABEL(x86_dbregs_switch)
1231:
1.187 maxv 1232: /* Switch the FPU. */
1.167 maxv 1233: movq %r13,%rdi
1234: movq %r12,%rsi
1.188 maxv 1235: callq _C_LABEL(fpu_switch)
1.167 maxv 1236:
1.22 yamt 1237: /* Don't bother with the rest if switching to a system process. */
1238: testl $LW_SYSTEM,L_FLAG(%r12)
1.142 maxv 1239: jnz .Lswitch_return
1.1 fvdl 1240:
1.22 yamt 1241: /* Is this process using RAS (restartable atomic sequences)? */
1242: movq L_PROC(%r12),%rdi
1243: cmpq $0,P_RASLIST(%rdi)
1.142 maxv 1244: je .Lno_RAS
1.104 maxv 1245:
1246: /* Handle restartable atomic sequences (RAS). */
1247: movq L_MD_REGS(%r12),%rbx
1248: movq TF_RIP(%rbx),%rsi
1249: call _C_LABEL(ras_lookup)
1250: cmpq $-1,%rax
1.142 maxv 1251: je .Lno_RAS
1.104 maxv 1252: movq %rax,TF_RIP(%rbx)
1.142 maxv 1253: .Lno_RAS:
1.1 fvdl 1254:
1.176 cherry 1255: #ifndef XENPV
1.189 maxv 1256: /* Raise the IPL to IPL_HIGH. Dropping the priority is deferred until
1257: * mi_switch(), when cpu_switchto() returns. XXX Still needed? */
1.75 christos 1258: movl $IPL_HIGH,CPUVAR(ILEVEL)
1.58 chs 1259:
1.103 maxv 1260: /* The 32bit LWPs are handled differently. */
1261: testl $PCB_COMPAT32,PCB_FLAGS(%r14)
1.142 maxv 1262: jnz .Llwp_32bit
1.58 chs 1263:
1.142 maxv 1264: .Llwp_64bit:
1.134 maxv 1265: /* Set default 64bit values in %ds, %es, %fs and %gs. */
1266: movq $GSEL(GUDATA_SEL, SEL_UPL),%rax
1267: movw %ax,%ds
1268: movw %ax,%es
1.103 maxv 1269: xorq %rax,%rax
1270: movw %ax,%fs
1.58 chs 1271: CLI(cx)
1.70 dsl 1272: SWAPGS
1.103 maxv 1273: movw %ax,%gs
1.70 dsl 1274: SWAPGS
1.58 chs 1275: STI(cx)
1276:
1.107 maxv 1277: /* Zero out GDT descriptors. */
1.58 chs 1278: movq CPUVAR(GDT),%rcx
1.103 maxv 1279: movq %rax,(GUFS_SEL*8)(%rcx)
1280: movq %rax,(GUGS_SEL*8)(%rcx)
1.58 chs 1281:
1282: /* Reload 64-bit %fs/%gs MSRs. */
1.103 maxv 1283: movl $MSR_FSBASE,%ecx
1284: movl PCB_FS(%r14),%eax
1285: movl 4+PCB_FS(%r14),%edx
1.58 chs 1286: wrmsr
1.103 maxv 1287: movl $MSR_KERNELGSBASE,%ecx
1288: movl PCB_GS(%r14),%eax
1289: movl 4+PCB_GS(%r14),%edx
1.58 chs 1290: wrmsr
1.107 maxv 1291:
1.142 maxv 1292: jmp .Lswitch_return
1.58 chs 1293:
1.142 maxv 1294: .Llwp_32bit:
1.58 chs 1295: /* Reload %fs/%gs GDT descriptors. */
1296: movq CPUVAR(GDT),%rcx
1.103 maxv 1297: movq PCB_FS(%r14),%rax
1298: movq %rax,(GUFS_SEL*8)(%rcx)
1299: movq PCB_GS(%r14),%rax
1300: movq %rax,(GUGS_SEL*8)(%rcx)
1.58 chs 1301:
1.143 maxv 1302: /* Set default 32bit values in %ds, %es, %fs and %gs. */
1.103 maxv 1303: movq L_MD_REGS(%r12),%rbx
1.134 maxv 1304: movq $GSEL(GUDATA32_SEL, SEL_UPL),%rax
1305: movw %ax,%ds
1306: movw %ax,%es
1.135 maxv 1307: movw %ax,%fs
1.58 chs 1308: CLI(ax)
1.70 dsl 1309: SWAPGS
1.137 maxv 1310: movw %ax,%gs
1.70 dsl 1311: SWAPGS
1.58 chs 1312: STI(ax)
1313: #else
1314: movq %r12,%rdi
1315: callq _C_LABEL(x86_64_tls_switch)
1.32 bouyer 1316: #endif
1.84 maxv 1317:
1.142 maxv 1318: .Lswitch_return:
1.22 yamt 1319: /* Return to the new LWP, returning 'oldlwp' in %rax. */
1.190 maxv 1320: KMSAN_INIT_RET(8)
1.103 maxv 1321: movq %r13,%rax
1.1 fvdl 1322: popq %r15
1323: popq %r14
1324: popq %r13
1325: popq %r12
1326: popq %rbx
1327: ret
1.71 uebayasi 1328: END(cpu_switchto)
1.1 fvdl 1329:
1330: /*
1.22 yamt 1331: * void savectx(struct pcb *pcb);
1332: *
1.1 fvdl 1333: * Update pcb, saving current processor state.
1334: */
1335: ENTRY(savectx)
1336: /* Save stack pointers. */
1337: movq %rsp,PCB_RSP(%rdi)
1338: movq %rbp,PCB_RBP(%rdi)
1339: ret
1.71 uebayasi 1340: END(savectx)
1.1 fvdl 1341:
1342: /*
1.148 maxv 1343: * Syscall handler.
1.1 fvdl 1344: */
1.171 maxv 1345: ENTRY(handle_syscall)
1.127 maxv 1346: STI(si)
1.1 fvdl 1347:
1348: movq CPUVAR(CURLWP),%r14
1.84 maxv 1349: incq CPUVAR(NSYSCALL) /* count it atomically */
1350: movq %rsp,L_MD_REGS(%r14) /* save pointer to frame */
1.1 fvdl 1351: movq L_PROC(%r14),%r15
1.70 dsl 1352: andl $~MDL_IRET,L_MD_FLAGS(%r14) /* Allow sysret return */
1.37 dsl 1353: movq %rsp,%rdi /* Pass frame as arg0 */
1.1 fvdl 1354: call *P_MD_SYSCALL(%r15)
1.29 yamt 1355: .Lsyscall_checkast:
1.70 dsl 1356: /*
1357: * Disable interrupts to avoid new ASTs (etc) being added and
1358: * to ensure we don't take an interrupt with some of the user
1359: * registers loaded.
1360: */
1361: CLI(si)
1.29 yamt 1362: /* Check for ASTs on exit to user mode. */
1.107 maxv 1363: movl L_MD_ASTPENDING(%r14),%eax
1364: orl CPUVAR(WANT_PMAPLOAD),%eax
1.29 yamt 1365: jnz 9f
1.107 maxv 1366:
1.1 fvdl 1367: #ifdef DIAGNOSTIC
1368: cmpl $IPL_NONE,CPUVAR(ILEVEL)
1.142 maxv 1369: jne .Lspl_error
1.1 fvdl 1370: #endif
1.107 maxv 1371:
1.189 maxv 1372: HANDLE_DEFERRED_FPU
1373:
1.129 maxv 1374: /*
1.134 maxv 1375: * Decide if we need to take a slow path. That's the case when we
1376: * want to reload %cs and %ss on a 64bit LWP (MDL_IRET set), or when
1377: * we're returning to a 32bit LWP (MDL_COMPAT32 set).
1378: *
1379: * In either case, we jump into intrfastexit and return to userland
1380: * with the iret instruction.
1.129 maxv 1381: */
1.107 maxv 1382: testl $(MDL_IRET|MDL_COMPAT32),L_MD_FLAGS(%r14)
1.134 maxv 1383: jnz intrfastexit
1384:
1.148 maxv 1385: jmp syscall_sysret
1.1 fvdl 1386:
1387: #ifdef DIAGNOSTIC
1.142 maxv 1388: .Lspl_error:
1.107 maxv 1389: movabsq $4f,%rdi
1.186 maxv 1390: movl CPUVAR(ILEVEL),%esi
1391: call _C_LABEL(panic)
1392: 4: .asciz "spl not lowered on syscall, ilevel=%x"
1.1 fvdl 1393: #endif
1.70 dsl 1394:
1395: /* AST pending or pmap load needed */
1.41 ad 1396: 9:
1.107 maxv 1397: cmpl $0,CPUVAR(WANT_PMAPLOAD)
1.41 ad 1398: jz 10f
1399: STI(si)
1.29 yamt 1400: call _C_LABEL(do_pmap_load)
1401: jmp .Lsyscall_checkast /* re-check ASTs */
1.41 ad 1402: 10:
1403: CLEAR_ASTPENDING(%r14)
1404: STI(si)
1405: /* Pushed T_ASTFLT into tf_trapno on entry. */
1406: movq %rsp,%rdi
1.190 maxv 1407: KMSAN_INIT_ARG(8)
1.41 ad 1408: call _C_LABEL(trap)
1409: jmp .Lsyscall_checkast /* re-check ASTs */
1.148 maxv 1410: END(handle_syscall)
1.29 yamt 1411:
1.22 yamt 1412: /*
1413: * void lwp_trampoline(void);
1414: *
1415: * This is a trampoline function pushed run by newly created LWPs
1.70 dsl 1416: * in order to do additional setup in their context.
1.22 yamt 1417: */
1.171 maxv 1418: ENTRY(lwp_trampoline)
1.22 yamt 1419: movq %rbp,%rsi
1.41 ad 1420: movq %rbp,%r14 /* for .Lsyscall_checkast */
1.22 yamt 1421: movq %rax,%rdi
1422: xorq %rbp,%rbp
1.190 maxv 1423: KMSAN_INIT_ARG(16)
1.22 yamt 1424: call _C_LABEL(lwp_startup)
1.1 fvdl 1425: movq %r13,%rdi
1.190 maxv 1426: KMSAN_INIT_ARG(8)
1.1 fvdl 1427: call *%r12
1.41 ad 1428: jmp .Lsyscall_checkast
1.71 uebayasi 1429: END(lwp_trampoline)
1.1 fvdl 1430:
1431: /*
1.148 maxv 1432: * Entry points of the 'syscall' instruction, 64bit and 32bit mode.
1433: */
1434:
1.156 maxv 1435: #define SP(x) (x)-(TF_SS+8)(%rax)
1.152 maxv 1436:
1437: .macro SYSCALL_ENTRY name,is_svs
1438: IDTVEC(\name)
1.176 cherry 1439: #ifndef XENPV
1.148 maxv 1440: /*
1441: * The user %rip is in %rcx and the user %rflags in %r11. The kernel %cs
1442: * and %ss are loaded, but nothing else is.
1443: *
1444: * The 'swapgs' instruction gives us access to cpu-specific memory where
1445: * we can save a user register and then read the LWP's kernel stack
1446: * pointer.
1447: *
1448: * This code doesn't seem to set %ds, this may not matter since it is
1449: * ignored in 64bit mode, OTOH the syscall instruction sets %ss and that
1450: * is ignored as well.
1451: */
1452: swapgs
1453:
1.156 maxv 1454: /* Get the LWP's kernel stack pointer in %rax */
1.152 maxv 1455: .if \is_svs
1.165 joerg 1456: movabs %rax,SVS_UTLS+UTLS_SCRATCH
1457: movabs SVS_UTLS+UTLS_RSP0,%rax
1.156 maxv 1458: .else
1459: movq %rax,CPUVAR(SCRATCH)
1460: movq CPUVAR(CURLWP),%rax
1461: movq L_PCB(%rax),%rax
1462: movq PCB_RSP0(%rax),%rax
1463: .endif
1.152 maxv 1464:
1.156 maxv 1465: /* Make stack look like an 'int nn' frame */
1466: movq $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS) /* user %ss */
1467: movq %rsp,SP(TF_RSP) /* user %rsp */
1468: movq %r11,SP(TF_RFLAGS) /* user %rflags */
1469: movq $(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS) /* user %cs */
1470: movq %rcx,SP(TF_RIP) /* user %rip */
1471: leaq SP(0),%rsp /* %rsp now valid after frame */
1.152 maxv 1472:
1.156 maxv 1473: /* Restore %rax */
1474: .if \is_svs
1.165 joerg 1475: movabs SVS_UTLS+UTLS_SCRATCH,%rax
1.152 maxv 1476: .else
1.156 maxv 1477: movq CPUVAR(SCRATCH),%rax
1.152 maxv 1478: .endif
1.148 maxv 1479:
1480: movq $2,TF_ERR(%rsp) /* syscall instruction size */
1481: movq $T_ASTFLT,TF_TRAPNO(%rsp)
1482: #else
1483: /* Xen already switched to kernel stack */
1484: addq $0x10,%rsp /* gap to match cs:rip */
1485: pushq $2 /* error code */
1486: pushq $T_ASTFLT
1487: subq $TF_REGSIZE,%rsp
1488: cld
1489: #endif
1490: INTR_SAVE_GPRS
1.160 maxv 1491: IBRS_ENTER
1.148 maxv 1492: movw $GSEL(GUDATA_SEL, SEL_UPL),TF_DS(%rsp)
1493: movw $GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp)
1494: movw $0,TF_FS(%rsp)
1495: movw $0,TF_GS(%rsp)
1.152 maxv 1496: .if \is_svs
1497: SVS_ENTER
1498: .endif
1.190 maxv 1499: KMSAN_ENTER
1.148 maxv 1500: jmp handle_syscall
1.152 maxv 1501: IDTVEC_END(\name)
1502: .endm
1503:
1504: SYSCALL_ENTRY syscall,is_svs=0
1505:
1506: TEXT_USER_BEGIN
1507:
1.155 maxv 1508: #ifdef SVS
1.152 maxv 1509: SYSCALL_ENTRY syscall_svs,is_svs=1
1.155 maxv 1510: #endif
1.148 maxv 1511:
1512: IDTVEC(syscall32)
1513: sysret /* go away please */
1514: IDTVEC_END(syscall32)
1515:
1516: TEXT_USER_END
1517:
1518: /*
1.22 yamt 1519: * osyscall()
1520: *
1.1 fvdl 1521: * Trap gate entry for int $80 syscall, also used by sigreturn.
1522: */
1.148 maxv 1523: TEXT_USER_BEGIN
1.1 fvdl 1524: IDTVEC(osyscall)
1.176 cherry 1525: #ifdef XENPV
1.32 bouyer 1526: movq (%rsp),%rcx
1527: movq 8(%rsp),%r11
1528: addq $0x10,%rsp
1529: #endif
1.84 maxv 1530: pushq $2 /* size of instruction for restart */
1531: pushq $T_ASTFLT /* trap # for doing ASTs */
1.1 fvdl 1532: INTRENTRY
1.148 maxv 1533: jmp handle_syscall
1.71 uebayasi 1534: IDTVEC_END(osyscall)
1.148 maxv 1535: TEXT_USER_END
1536:
1537: /*
1538: * Return to userland via 'sysret'.
1539: */
1540: TEXT_USER_BEGIN
1541: _ALIGN_TEXT
1542: LABEL(syscall_sysret)
1.190 maxv 1543: KMSAN_LEAVE
1.181 maxv 1544: MDS_LEAVE
1.148 maxv 1545: SVS_LEAVE
1.160 maxv 1546: IBRS_LEAVE
1.148 maxv 1547: INTR_RESTORE_GPRS
1548: SWAPGS
1.176 cherry 1549: #ifndef XENPV
1.148 maxv 1550: movq TF_RIP(%rsp),%rcx /* %rip for sysret */
1551: movq TF_RFLAGS(%rsp),%r11 /* %flags for sysret */
1552: movq TF_RSP(%rsp),%rsp
1553: sysretq
1554: #else
1555: addq $TF_RIP,%rsp
1556: pushq $256 /* VGCF_IN_SYSCALL */
1557: jmp HYPERVISOR_iret
1558: #endif
1559: END(syscall_sysret)
1560: TEXT_USER_END
1.26 ad 1561:
1562: /*
1.66 chs 1563: * bool sse2_idlezero_page(void *pg)
1.26 ad 1564: *
1.47 ad 1565: * Zero a page without polluting the cache. Preemption must be
1566: * disabled by the caller. Abort if a preemption is pending.
1.66 chs 1567: * Returns true if the page is zeroed, false if not.
1.26 ad 1568: */
1.47 ad 1569: ENTRY(sse2_idlezero_page)
1570: pushq %rbp
1571: movq %rsp,%rbp
1572: movl $(PAGE_SIZE/64), %ecx
1.26 ad 1573: xorq %rax, %rax
1574: .align 16
1575: 1:
1.191 ad 1576: cmpl $0, CPUVAR(RESCHED)
1.47 ad 1577: jnz 2f
1.26 ad 1578: movnti %rax, 0(%rdi)
1579: movnti %rax, 8(%rdi)
1580: movnti %rax, 16(%rdi)
1581: movnti %rax, 24(%rdi)
1582: movnti %rax, 32(%rdi)
1583: movnti %rax, 40(%rdi)
1584: movnti %rax, 48(%rdi)
1585: movnti %rax, 56(%rdi)
1.47 ad 1586: addq $64, %rdi
1587: decl %ecx
1.26 ad 1588: jnz 1b
1589: sfence
1.47 ad 1590: incl %eax
1591: popq %rbp
1.190 maxv 1592: KMSAN_INIT_RET(1)
1.26 ad 1593: ret
1.47 ad 1594: 2:
1.26 ad 1595: sfence
1.47 ad 1596: popq %rbp
1.190 maxv 1597: KMSAN_INIT_RET(1)
1.26 ad 1598: ret
1.71 uebayasi 1599: END(sse2_idlezero_page)
1.66 chs 1600:
1601: /*
1602: * void pagezero(vaddr_t va)
1603: *
1.192 ad 1604: * Zero a page.
1.66 chs 1605: */
1606: ENTRY(pagezero)
1.192 ad 1607: pushq %rbp
1608: movq %rsp,%rbp
1609: movq $(PAGE_SIZE / 8),%rcx
1.66 chs 1610: xorq %rax,%rax
1.192 ad 1611: rep
1612: stosq
1613: leave
1.66 chs 1614: ret
1.71 uebayasi 1615: END(pagezero)
1.129 maxv 1616:
1.148 maxv 1617: TEXT_USER_BEGIN
1618:
1.157 maxv 1619: /*
1620: * In intrfastexit, we advance %rsp at the beginning. We then access the
1621: * segment registers in the trapframe with TF_BACKW (backwards). See the
1622: * documentation in amd64_trap.S for an explanation.
1623: */
1624:
1.159 maxv 1625: #define TF_BACKW(val, reg) (val - (TF_REGSIZE+16))(reg)
1.157 maxv 1626:
1.148 maxv 1627: _ALIGN_TEXT
1.184 maxv 1628: .type intrfastexit,@function
1.148 maxv 1629: LABEL(intrfastexit)
1.147 maxv 1630: NOT_XEN(cli;)
1.190 maxv 1631: KMSAN_LEAVE
1.185 maxv 1632:
1633: testb $SEL_UPL,TF_CS(%rsp)
1634: jz .Lkexit
1635:
1.181 maxv 1636: MDS_LEAVE
1.147 maxv 1637: SVS_LEAVE
1.160 maxv 1638: IBRS_LEAVE
1.129 maxv 1639: INTR_RESTORE_GPRS
1.159 maxv 1640: addq $(TF_REGSIZE+16),%rsp /* iret frame */
1.185 maxv 1641: SWAPGS
1.157 maxv 1642:
1643: cmpw $LSEL(LUCODE_SEL, SEL_UPL),TF_BACKW(TF_CS, %rsp)
1.185 maxv 1644: je do_iret
1.157 maxv 1645: cmpw $GSEL(GUCODE_SEL, SEL_UPL),TF_BACKW(TF_CS, %rsp)
1.185 maxv 1646: je do_iret
1.176 cherry 1647: #ifdef XENPV
1.157 maxv 1648: cmpw $FLAT_RING3_CS64,TF_BACKW(TF_CS, %rsp)
1.185 maxv 1649: je do_iret
1.139 maxv 1650: #endif
1.129 maxv 1651:
1.138 maxv 1652: do_mov_es:
1.157 maxv 1653: movw TF_BACKW(TF_ES, %rsp),%es
1.138 maxv 1654: do_mov_ds:
1.157 maxv 1655: movw TF_BACKW(TF_DS, %rsp),%ds
1.138 maxv 1656: do_mov_fs:
1.157 maxv 1657: movw TF_BACKW(TF_FS, %rsp),%fs
1.176 cherry 1658: #ifndef XENPV
1.138 maxv 1659: do_mov_gs:
1.157 maxv 1660: movw TF_BACKW(TF_GS, %rsp),%gs
1.137 maxv 1661: #endif
1.134 maxv 1662:
1.185 maxv 1663: do_iret:
1664: iretq
1.129 maxv 1665:
1666: .Lkexit:
1.185 maxv 1667: INTR_RESTORE_GPRS
1668: addq $(TF_REGSIZE+16),%rsp /* iret frame */
1.129 maxv 1669: iretq
1670: END(intrfastexit)
1.152 maxv 1671:
1672: TEXT_USER_END
1673:
1.202 maxv 1674: .section .rodata
1675:
1.203 maxv 1676: /*
1677: * Hotpatch templates.
1678: */
1679:
1680: LABEL(hp_nolock)
1681: nop
1682: LABEL(hp_nolock_end)
1683:
1684: LABEL(hp_retfence)
1685: lfence
1686: LABEL(hp_retfence_end)
1687:
1688: LABEL(hp_clac)
1689: clac
1690: LABEL(hp_clac_end)
1691:
1692: LABEL(hp_stac)
1693: stac
1694: LABEL(hp_stac_end)
1695:
1.152 maxv 1696: #ifdef SVS
1697: LABEL(svs_enter)
1.164 joerg 1698: movabs SVS_UTLS+UTLS_KPDIRPA,%rax
1.152 maxv 1699: movq %rax,%cr3
1700: movq CPUVAR(KRSP0),%rsp
1701: LABEL(svs_enter_end)
1702:
1703: LABEL(svs_enter_altstack)
1704: testb $SEL_UPL,TF_CS(%rsp)
1705: jz 1234f
1.164 joerg 1706: movabs SVS_UTLS+UTLS_KPDIRPA,%rax
1.152 maxv 1707: movq %rax,%cr3
1708: 1234:
1709: LABEL(svs_enter_altstack_end)
1710:
1.170 maxv 1711: LABEL(svs_enter_nmi)
1712: movq %cr3,%rax
1713: movq %rax,(FRAMESIZE+1*8)(%rsp) /* nmistore->scratch */
1714: movq (FRAMESIZE+0*8)(%rsp),%rax /* nmistore->cr3 */
1715: movq %rax,%cr3
1716: LABEL(svs_enter_nmi_end)
1717:
1.152 maxv 1718: LABEL(svs_leave)
1719: movq CPUVAR(URSP0),%rsp
1720: movq CPUVAR(UPDIRPA),%rax
1721: movq %rax,%cr3
1722: LABEL(svs_leave_end)
1723:
1724: LABEL(svs_leave_altstack)
1725: testb $SEL_UPL,TF_CS(%rsp)
1726: jz 1234f
1727: movq CPUVAR(UPDIRPA),%rax
1728: movq %rax,%cr3
1729: 1234:
1730: LABEL(svs_leave_altstack_end)
1.153 maxv 1731:
1.170 maxv 1732: LABEL(svs_leave_nmi)
1733: movq (FRAMESIZE+1*8)(%rsp),%rax /* nmistore->scratch */
1734: movq %rax,%cr3
1735: LABEL(svs_leave_nmi_end)
1.152 maxv 1736: #endif
1.160 maxv 1737:
1738: /* IBRS <- 1 */
1739: LABEL(ibrs_enter)
1740: movl $MSR_IA32_SPEC_CTRL,%ecx
1.185 maxv 1741: rdmsr
1742: orl $IA32_SPEC_CTRL_IBRS,%eax
1.160 maxv 1743: wrmsr
1744: LABEL(ibrs_enter_end)
1745:
1746: /* IBRS <- 0 */
1747: LABEL(ibrs_leave)
1748: movl $MSR_IA32_SPEC_CTRL,%ecx
1.185 maxv 1749: rdmsr
1750: andl $~IA32_SPEC_CTRL_IBRS,%eax
1.160 maxv 1751: wrmsr
1752: LABEL(ibrs_leave_end)
1.162 maxv 1753:
1754: LABEL(noibrs_enter)
1755: NOIBRS_ENTER
1756: LABEL(noibrs_enter_end)
1757:
1758: LABEL(noibrs_leave)
1759: NOIBRS_LEAVE
1760: LABEL(noibrs_leave_end)
1.181 maxv 1761:
1762: LABEL(mds_leave)
1763: pushq $GSEL(GDATA_SEL, SEL_KPL)
1764: verw (%rsp)
1765: addq $8,%rsp
1766: LABEL(mds_leave_end)
1767:
1768: LABEL(nomds_leave)
1769: NOMDS_LEAVE
1770: LABEL(nomds_leave_end)
CVSweb <webmaster@jp.NetBSD.org>