[BACK]Return to locore.S CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / arch / amd64 / amd64

Annotation of src/sys/arch/amd64/amd64/locore.S, Revision 1.207

1.207   ! christos    1: /*     $NetBSD: locore.S,v 1.206 2020/05/02 16:44:34 bouyer Exp $      */
1.1       fvdl        2:
                      3: /*
                      4:  * Copyright-o-rama!
                      5:  */
                      6:
                      7: /*
1.84      maxv        8:  * Copyright (c) 1998, 2000, 2007, 2008, 2016 The NetBSD Foundation, Inc.
                      9:  * All rights reserved.
                     10:  *
                     11:  * This code is derived from software contributed to The NetBSD Foundation
1.100     maxv       12:  * by Charles M. Hannum and by Maxime Villard.
1.84      maxv       13:  *
                     14:  * Redistribution and use in source and binary forms, with or without
                     15:  * modification, are permitted provided that the following conditions
                     16:  * are met:
                     17:  * 1. Redistributions of source code must retain the above copyright
                     18:  *    notice, this list of conditions and the following disclaimer.
                     19:  * 2. Redistributions in binary form must reproduce the above copyright
                     20:  *    notice, this list of conditions and the following disclaimer in the
                     21:  *    documentation and/or other materials provided with the distribution.
                     22:  *
                     23:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     24:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     25:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     26:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     27:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     28:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     29:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     30:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     31:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     32:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     33:  * POSSIBILITY OF SUCH DAMAGE.
                     34:  */
                     35:
                     36: /*
1.32      bouyer     37:  * Copyright (c) 2007 Manuel Bouyer.
                     38:  *
                     39:  * Redistribution and use in source and binary forms, with or without
                     40:  * modification, are permitted provided that the following conditions
                     41:  * are met:
                     42:  * 1. Redistributions of source code must retain the above copyright
                     43:  *    notice, this list of conditions and the following disclaimer.
                     44:  * 2. Redistributions in binary form must reproduce the above copyright
                     45:  *    notice, this list of conditions and the following disclaimer in the
                     46:  *    documentation and/or other materials provided with the distribution.
                     47:  *
                     48:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
                     49:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     50:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     51:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
                     52:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
                     53:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     54:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     55:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     56:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     57:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     58:  *
                     59:  */
                     60:
                     61: /*
                     62:  * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
                     63:  *
                     64:  * Permission to use, copy, modify, and distribute this software for any
                     65:  * purpose with or without fee is hereby granted, provided that the above
                     66:  * copyright notice and this permission notice appear in all copies.
                     67:  *
                     68:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     69:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     70:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     71:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     72:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     73:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     74:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     75:  */
                     76:
                     77: /*
1.1       fvdl       78:  * Copyright (c) 2001 Wasabi Systems, Inc.
                     79:  * All rights reserved.
                     80:  *
                     81:  * Written by Frank van der Linden for Wasabi Systems, Inc.
                     82:  *
                     83:  * Redistribution and use in source and binary forms, with or without
                     84:  * modification, are permitted provided that the following conditions
                     85:  * are met:
                     86:  * 1. Redistributions of source code must retain the above copyright
                     87:  *    notice, this list of conditions and the following disclaimer.
                     88:  * 2. Redistributions in binary form must reproduce the above copyright
                     89:  *    notice, this list of conditions and the following disclaimer in the
                     90:  *    documentation and/or other materials provided with the distribution.
                     91:  * 3. All advertising materials mentioning features or use of this software
                     92:  *    must display the following acknowledgement:
                     93:  *      This product includes software developed for the NetBSD Project by
                     94:  *      Wasabi Systems, Inc.
                     95:  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
                     96:  *    or promote products derived from this software without specific prior
                     97:  *    written permission.
                     98:  *
                     99:  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
                    100:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                    101:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                    102:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
                    103:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                    104:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                    105:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                    106:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                    107:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                    108:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                    109:  * POSSIBILITY OF SUCH DAMAGE.
                    110:  */
                    111:
                    112: /*-
                    113:  * Copyright (c) 1990 The Regents of the University of California.
                    114:  * All rights reserved.
                    115:  *
                    116:  * This code is derived from software contributed to Berkeley by
                    117:  * William Jolitz.
                    118:  *
                    119:  * Redistribution and use in source and binary forms, with or without
                    120:  * modification, are permitted provided that the following conditions
                    121:  * are met:
                    122:  * 1. Redistributions of source code must retain the above copyright
                    123:  *    notice, this list of conditions and the following disclaimer.
                    124:  * 2. Redistributions in binary form must reproduce the above copyright
                    125:  *    notice, this list of conditions and the following disclaimer in the
                    126:  *    documentation and/or other materials provided with the distribution.
1.5       agc       127:  * 3. Neither the name of the University nor the names of its contributors
1.1       fvdl      128:  *    may be used to endorse or promote products derived from this software
                    129:  *    without specific prior written permission.
                    130:  *
                    131:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                    132:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                    133:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                    134:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                    135:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                    136:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                    137:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                    138:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                    139:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                    140:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                    141:  * SUCH DAMAGE.
                    142:  *
                    143:  *     @(#)locore.s    7.3 (Berkeley) 5/13/91
                    144:  */
                    145:
1.84      maxv      146: /* Override user-land alignment before including asm.h */
1.34      dsl       147: #define        ALIGN_DATA      .align  8
                    148: #define ALIGN_TEXT     .align 16,0x90
                    149: #define _ALIGN_TEXT    ALIGN_TEXT
                    150:
                    151: #include <machine/asm.h>
                    152:
1.205     maxv      153: #include "opt_kasan.h"
1.78      uebayasi  154: #include "opt_copy_symtab.h"
1.1       fvdl      155: #include "opt_ddb.h"
                    156: #include "opt_ddbparam.h"
1.51      apb       157: #include "opt_modular.h"
1.1       fvdl      158: #include "opt_realmem.h"
                    159:
1.12      drochner  160: #include "opt_compat_netbsd.h"
                    161: #include "opt_compat_netbsd32.h"
1.32      bouyer    162: #include "opt_xen.h"
1.145     maxv      163: #include "opt_svs.h"
1.12      drochner  164:
1.1       fvdl      165: #include "assym.h"
                    166: #include "lapic.h"
                    167: #include "ioapic.h"
1.2       fvdl      168: #include "ksyms.h"
1.1       fvdl      169:
                    170: #include <sys/errno.h>
                    171: #include <sys/syscall.h>
                    172:
                    173: #include <machine/pte.h>
                    174: #include <machine/segments.h>
                    175: #include <machine/specialreg.h>
                    176: #include <machine/trap.h>
                    177: #include <machine/bootinfo.h>
                    178: #include <machine/frameasm.h>
1.44      ad        179: #include <machine/cputypes.h>
1.1       fvdl      180:
                    181: #if NLAPIC > 0
                    182: #include <machine/i82489reg.h>
                    183: #endif
                    184:
                    185: /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
                    186: #include <dev/isa/isareg.h>
                    187:
1.71      uebayasi  188: #define        _RELOC(x)       ((x) - KERNBASE)
                    189: #define        RELOC(x)        _RELOC(_C_LABEL(x))
                    190:
1.180     maxv      191: /* 32bit version of PTE_NX */
                    192: #define PTE_NX32       0x80000000
1.86      maxv      193:
1.83      maxv      194: #if L2_SLOT_KERNBASE > 0
                    195: #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
                    196: #else
                    197: #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
                    198: #endif
                    199:
                    200: #if L3_SLOT_KERNBASE > 0
                    201: #define TABLE_L3_ENTRIES (2 * NKL3_KIMG_ENTRIES)
                    202: #else
                    203: #define TABLE_L3_ENTRIES NKL3_KIMG_ENTRIES
                    204: #endif
                    205:
                    206: #define PROC0_PML4_OFF 0
1.97      maxv      207: #define PROC0_STK_OFF  (PROC0_PML4_OFF + 1 * PAGE_SIZE)
                    208: #define PROC0_PTP3_OFF (PROC0_STK_OFF + UPAGES * PAGE_SIZE)
1.83      maxv      209: #define PROC0_PTP2_OFF (PROC0_PTP3_OFF + NKL4_KIMG_ENTRIES * PAGE_SIZE)
                    210: #define PROC0_PTP1_OFF (PROC0_PTP2_OFF + TABLE_L3_ENTRIES * PAGE_SIZE)
                    211: #define TABLESIZE \
1.97      maxv      212:   ((NKL4_KIMG_ENTRIES + TABLE_L3_ENTRIES + TABLE_L2_ENTRIES + 1 + UPAGES) \
1.83      maxv      213:     * PAGE_SIZE)
                    214:
1.121     maxv      215: /* Amount of VA used to map the kernel, the syms and the preloaded modules */
                    216: #define BOOTMAP_VA_SIZE \
                    217:        (NKL2_KIMG_ENTRIES * (1 << L2_SHIFT) - TABLESIZE - IOM_SIZE)
                    218:
1.83      maxv      219: /*
                    220:  * fillkpt - Fill in a kernel page table
                    221:  *     eax = pte (page frame | control | status)
                    222:  *     ebx = page table address
                    223:  *     ecx = number of pages to map
                    224:  *
                    225:  * Each entry is 8 (PDE_SIZE) bytes long: we must set the 4 upper bytes to 0.
                    226:  */
                    227: #define fillkpt        \
1.94      maxv      228:        cmpl    $0,%ecx                 ;       /* zero-sized? */       \
                    229:        je      2f                      ; \
1.91      maxv      230: 1:     movl    $0,(PDE_SIZE-4)(%ebx)   ;       /* upper 32 bits: 0 */  \
                    231:        movl    %eax,(%ebx)             ;       /* store phys addr */   \
                    232:        addl    $PDE_SIZE,%ebx          ;       /* next PTE/PDE */      \
                    233:        addl    $PAGE_SIZE,%eax         ;       /* next phys page */    \
1.94      maxv      234:        loop    1b                      ; \
                    235: 2:                                     ;
1.83      maxv      236:
1.84      maxv      237: /*
1.89      maxv      238:  * fillkpt_nox - Same as fillkpt, but sets the NX/XD bit.
                    239:  */
                    240: #define fillkpt_nox \
1.94      maxv      241:        cmpl    $0,%ecx                 ;       /* zero-sized? */       \
                    242:        je      2f                      ; \
1.91      maxv      243:        pushl   %ebp                    ; \
                    244:        movl    RELOC(nox_flag),%ebp    ; \
                    245: 1:     movl    %ebp,(PDE_SIZE-4)(%ebx) ;       /* upper 32 bits: NX */ \
                    246:        movl    %eax,(%ebx)             ;       /* store phys addr */   \
1.89      maxv      247:        addl    $PDE_SIZE,%ebx          ;       /* next PTE/PDE */      \
                    248:        addl    $PAGE_SIZE,%eax         ;       /* next phys page */    \
1.91      maxv      249:        loop    1b                      ; \
1.94      maxv      250:        popl    %ebp                    ; \
                    251: 2:                                     ;
1.89      maxv      252:
                    253: /*
1.96      maxv      254:  * fillkpt_blank - Fill in a kernel page table with blank entries
                    255:  *     ebx = page table address
                    256:  *     ecx = number of pages to map
                    257:  */
                    258: #define fillkpt_blank  \
                    259:        cmpl    $0,%ecx                 ;       /* zero-sized? */       \
                    260:        je      2f                      ; \
                    261: 1:     movl    $0,(PDE_SIZE-4)(%ebx)   ;       /* upper 32 bits: 0 */  \
                    262:        movl    $0,(%ebx)               ;       /* lower 32 bits: 0 */  \
                    263:        addl    $PDE_SIZE,%ebx          ;       /* next PTE/PDE */      \
                    264:        loop    1b                      ; \
                    265: 2:                                     ;
                    266:
                    267: /*
1.84      maxv      268:  * killkpt - Destroy a kernel page table (long mode)
                    269:  *     rbx = page table address
                    270:  *     rcx = number of pages to destroy
                    271:  */
                    272: #define killkpt \
                    273: 1:     movq    $0,(%rbx)       ; \
                    274:        addq    $PDE_SIZE,%rbx  ; \
                    275:        loop    1b              ;
                    276:
1.83      maxv      277:
1.32      bouyer    278: #ifdef XEN
1.99      bouyer    279: #define __ASSEMBLY__
1.175     cherry    280: #include <xen/include/public/elfnote.h>
                    281: #include <xen/include/public/xen.h>
1.177     cherry    282:
1.99      bouyer    283: #define ELFNOTE(name, type, desctype, descdata...) \
                    284: .pushsection .note.name                        ;       \
                    285:   .align 4                             ;       \
                    286:   .long 2f - 1f                /* namesz */    ;       \
                    287:   .long 4f - 3f                /* descsz */    ;       \
                    288:   .long type                           ;       \
                    289: 1:.asciz #name                         ;       \
                    290: 2:.align 4                             ;       \
                    291: 3:desctype descdata                    ;       \
                    292: 4:.align 4                             ;       \
                    293: .popsection
                    294:
1.32      bouyer    295: /*
1.73      uebayasi  296:  * Xen guest identifier and loader selection
1.32      bouyer    297:  */
                    298: .section __xen_guest
1.99      bouyer    299:        ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz, "NetBSD")
                    300:        ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz, "4.99")
                    301:        ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz, "xen-3.0")
                    302:        ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .quad,  KERNBASE)
1.206     bouyer    303: #ifdef XENPV
1.99      bouyer    304:        ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .quad,  KERNBASE)
                    305:        ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .quad,  start)
1.206     bouyer    306: #else
                    307:        ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .quad,  0)
                    308:        ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,   .long,  RELOC(start_xen32))
                    309: #endif /* XENPV */
1.99      bouyer    310:        ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad,  hypercall_page)
                    311:        ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .quad,  HYPERVISOR_VIRT_START)
1.206     bouyer    312:        ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz, "writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel|hvm_callback_vector")
1.99      bouyer    313:        ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "yes")
1.180     maxv      314:        ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .long,  PTE_P, PTE_P)\
1.99      bouyer    315:        ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz, "generic")
                    316:        ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long,  0)
1.49      ad        317: #if NKSYMS > 0 || defined(DDB) || defined(MODULAR)
1.99      bouyer    318:        ELFNOTE(Xen, XEN_ELFNOTE_BSD_SYMTAB,     .asciz, "yes")
1.32      bouyer    319: #endif
1.206     bouyer    320: #endif  /* XEN */
1.1       fvdl      321:
                    322: /*
                    323:  * Initialization
                    324:  */
                    325:        .data
                    326:
1.98      maxv      327:        .globl  _C_LABEL(tablesize)
1.86      maxv      328:        .globl  _C_LABEL(nox_flag)
1.100     maxv      329:        .globl  _C_LABEL(cputype)
1.57      jym       330:        .globl  _C_LABEL(cpuid_level)
1.71      uebayasi  331:        .globl  _C_LABEL(esym)
                    332:        .globl  _C_LABEL(eblob)
1.100     maxv      333:        .globl  _C_LABEL(atdevbase)
                    334:        .globl  _C_LABEL(PDPpaddr)
1.71      uebayasi  335:        .globl  _C_LABEL(boothowto)
                    336:        .globl  _C_LABEL(bootinfo)
                    337:        .globl  _C_LABEL(biosbasemem)
                    338:        .globl  _C_LABEL(biosextmem)
1.130     maxv      339:        .globl  _C_LABEL(lwp0uarea)
1.138     maxv      340:        .globl  do_mov_es
                    341:        .globl  do_mov_ds
                    342:        .globl  do_mov_fs
                    343:        .globl  do_mov_gs
1.129     maxv      344:        .globl  do_iret
1.43      ad        345:
1.98      maxv      346:        .type   _C_LABEL(tablesize), @object
                    347: _C_LABEL(tablesize):   .long   TABLESIZE
                    348: END(tablesize)
1.86      maxv      349:        .type   _C_LABEL(nox_flag), @object
                    350: LABEL(nox_flag)                .long   0       /* 32bit NOX flag, set if supported */
                    351: END(nox_flag)
1.71      uebayasi  352:        .type   _C_LABEL(cputype), @object
1.84      maxv      353: LABEL(cputype)         .long   0       /* are we 80486, Pentium, or.. */
1.71      uebayasi  354: END(cputype)
                    355:        .type   _C_LABEL(cpuid_level), @object
1.84      maxv      356: LABEL(cpuid_level)     .long   -1      /* max. level accepted by cpuid instr */
1.71      uebayasi  357: END(cpuid_level)
                    358:        .type   _C_LABEL(esym), @object
1.84      maxv      359: LABEL(esym)            .quad   0       /* ptr to end of syms */
1.71      uebayasi  360: END(esym)
                    361:        .type   _C_LABEL(eblob), @object
1.84      maxv      362: LABEL(eblob)           .quad   0       /* ptr to end of modules */
1.71      uebayasi  363: END(eblob)
                    364:        .type   _C_LABEL(atdevbase), @object
1.84      maxv      365: LABEL(atdevbase)       .quad   0       /* location of start of iomem in virt */
1.71      uebayasi  366: END(atdevbase)
                    367:        .type   _C_LABEL(PDPpaddr), @object
1.84      maxv      368: LABEL(PDPpaddr)                .quad   0       /* paddr of PTD, for libkvm */
1.71      uebayasi  369: END(PDPpaddr)
                    370:        .type   _C_LABEL(biosbasemem), @object
1.1       fvdl      371: #ifndef REALBASEMEM
1.84      maxv      372: LABEL(biosbasemem)     .long   0       /* base memory reported by BIOS */
1.1       fvdl      373: #else
1.71      uebayasi  374: LABEL(biosbasemem)     .long   REALBASEMEM
1.1       fvdl      375: #endif
1.71      uebayasi  376: END(biosbasemem)
                    377:        .type   _C_LABEL(biosextmem), @object
1.1       fvdl      378: #ifndef REALEXTMEM
1.84      maxv      379: LABEL(biosextmem)      .long   0       /* extended memory reported by BIOS */
1.1       fvdl      380: #else
1.71      uebayasi  381: LABEL(biosextmem)      .long   REALEXTMEM
1.1       fvdl      382: #endif
1.71      uebayasi  383: END(biosextmem)
1.130     maxv      384:        .type   _C_LABEL(lwp0uarea), @object
                    385: LABEL(lwp0uarea)       .quad   0
                    386: END(lwp0uarea)
1.1       fvdl      387:
1.176     cherry    388: #ifndef XENPV
1.56      jym       389:        .globl  gdt64_lo
                    390:        .globl  gdt64_hi
1.1       fvdl      391:
1.56      jym       392: #define GDT64_LIMIT gdt64_end-gdt64_start-1
                    393: /* Temporary gdt64, with base address in low memory */
1.71      uebayasi  394:        .type   _C_LABEL(gdt64_lo), @object
                    395: LABEL(gdt64_lo)
1.56      jym       396:        .word   GDT64_LIMIT
1.1       fvdl      397:        .quad   _RELOC(gdt64_start)
1.71      uebayasi  398: END(gdt64_lo)
1.1       fvdl      399: .align 64
                    400:
1.56      jym       401: /* Temporary gdt64, with base address in high memory */
1.71      uebayasi  402:        .type   _C_LABEL(gdt64_hi), @object
                    403: LABEL(gdt64_hi)
1.56      jym       404:        .word   GDT64_LIMIT
                    405:        .quad   gdt64_start
1.71      uebayasi  406: END(gdt64_hi)
1.56      jym       407: .align 64
                    408: #undef GDT64_LIMIT
                    409:
1.71      uebayasi  410:        .type   _C_LABEL(gdt64_start), @object
                    411: _C_LABEL(gdt64_start):
1.1       fvdl      412:        .quad 0x0000000000000000        /* always empty */
                    413:        .quad 0x00af9a000000ffff        /* kernel CS */
                    414:        .quad 0x00cf92000000ffff        /* kernel DS */
1.71      uebayasi  415: END(gdt64_start)
1.1       fvdl      416: gdt64_end:
                    417:
1.71      uebayasi  418:        .type   _C_LABEL(farjmp64), @object
                    419: _C_LABEL(farjmp64):
1.57      jym       420:        .long   _RELOC(longmode)
1.1       fvdl      421:        .word   GSEL(GCODE_SEL, SEL_KPL)
1.71      uebayasi  422: END(farjmp64)
1.84      maxv      423:
1.206     bouyer    424: #ifdef XEN
                    425: /* 32bit GDT */
                    426: gdtdesc32:
                    427:        .word   gdt32end - gdt32
                    428:        .long   RELOC(gdt32)
                    429:        .long   0
                    430: gdt32:
                    431:        .long   0                       # null descriptor
                    432:        .long   0
                    433:        .long   0x0000ffff              # %cs
                    434:        .long   0x00cf9a00
                    435:        .long   0x0000ffff              # %ds, %es, %ss
                    436:        .long   0x00cf9200
                    437: gdt32end:
                    438: #endif /* XEN */
1.176     cherry    439: #endif /* !XENPV */
1.71      uebayasi  440:
1.84      maxv      441:        /* Space for the temporary stack */
1.71      uebayasi  442:        .size   tmpstk, tmpstk - .
1.84      maxv      443:        .space  512
1.1       fvdl      444: tmpstk:
                    445:
                    446: /*
                    447:  * Some hackage to deal with 64bit symbols in 32 bit mode.
1.79      maxv      448:  * This may not be needed if things are cleaned up a little.
1.1       fvdl      449:  */
                    450:
                    451:        .text
                    452:        .globl  _C_LABEL(kernel_text)
                    453:        .set    _C_LABEL(kernel_text),KERNTEXTOFF
                    454:
1.71      uebayasi  455: ENTRY(start)
1.176     cherry    456: #ifndef XENPV
1.71      uebayasi  457:        .code32
1.81      maxv      458:
                    459:        /* Warm boot */
                    460:        movw    $0x1234,0x472
                    461:
1.1       fvdl      462:        /*
1.84      maxv      463:         * Load parameters from the stack (32 bits):
1.91      maxv      464:         *     boothowto, [bootdev], bootinfo, esym, biosextmem, biosbasemem
1.81      maxv      465:         * We are not interested in 'bootdev'.
1.1       fvdl      466:         */
1.81      maxv      467:
                    468:        /* Load 'boothowto' */
1.1       fvdl      469:        movl    4(%esp),%eax
                    470:        movl    %eax,RELOC(boothowto)
1.81      maxv      471:
                    472:        /* Load 'bootinfo' */
1.1       fvdl      473:        movl    12(%esp),%eax
1.81      maxv      474:        testl   %eax,%eax               /* bootinfo = NULL? */
1.142     maxv      475:        jz      .Lbootinfo_finished
1.81      maxv      476:
1.121     maxv      477:        movl    (%eax),%ebx             /* bootinfo::bi_nentries */
1.1       fvdl      478:        movl    $RELOC(bootinfo),%ebp
1.81      maxv      479:        movl    %ebp,%edx
1.1       fvdl      480:        addl    $BOOTINFO_MAXSIZE,%ebp
1.81      maxv      481:        movl    %ebx,(%edx)
                    482:        addl    $4,%edx
                    483:
1.142     maxv      484: .Lbootinfo_entryloop:
1.81      maxv      485:        testl   %ebx,%ebx               /* no remaining entries? */
1.142     maxv      486:        jz      .Lbootinfo_finished
1.81      maxv      487:
                    488:        addl    $4,%eax
                    489:        movl    (%eax),%ecx             /* address of entry */
1.1       fvdl      490:        pushl   %edi
                    491:        pushl   %esi
                    492:        pushl   %eax
                    493:
1.84      maxv      494:        movl    (%ecx),%eax             /* btinfo_common::len (size of entry) */
1.1       fvdl      495:        movl    %edx,%edi
1.121     maxv      496:        addl    %eax,%edx               /* update dest pointer */
1.81      maxv      497:        cmpl    %ebp,%edx               /* beyond bootinfo+BOOTINFO_MAXSIZE? */
1.142     maxv      498:        jg      .Lbootinfo_overflow
1.81      maxv      499:
1.1       fvdl      500:        movl    %ecx,%esi
                    501:        movl    %eax,%ecx
1.81      maxv      502:
1.43      ad        503:        /*
1.121     maxv      504:         * If any modules were loaded, record where they end. 'eblob' is used
                    505:         * later to compute the initial bootstrap tables.
1.43      ad        506:         */
1.84      maxv      507:        cmpl    $BTINFO_MODULELIST,4(%esi) /* btinfo_common::type */
1.142     maxv      508:        jne     .Lbootinfo_copy
1.81      maxv      509:
1.121     maxv      510:        /* Skip the modules if we won't have enough VA to map them */
                    511:        movl    12(%esi),%eax           /* btinfo_modulelist::endpa */
                    512:        addl    $PGOFSET,%eax           /* roundup to a page */
                    513:        andl    $~PGOFSET,%eax
                    514:        cmpl    $BOOTMAP_VA_SIZE,%eax
1.142     maxv      515:        jg      .Lbootinfo_skip
1.121     maxv      516:        movl    %eax,RELOC(eblob)
1.81      maxv      517:        addl    $KERNBASE_LO,RELOC(eblob)
                    518:        adcl    $KERNBASE_HI,RELOC(eblob)+4
                    519:
1.142     maxv      520: .Lbootinfo_copy:
1.1       fvdl      521:        rep
1.81      maxv      522:        movsb                           /* copy esi -> edi */
1.142     maxv      523:        jmp     .Lbootinfo_next
1.121     maxv      524:
1.142     maxv      525: .Lbootinfo_skip:
1.121     maxv      526:        subl    %ecx,%edx               /* revert dest pointer */
                    527:
1.142     maxv      528: .Lbootinfo_next:
1.1       fvdl      529:        popl    %eax
                    530:        popl    %esi
                    531:        popl    %edi
1.81      maxv      532:        subl    $1,%ebx                 /* decrement the # of entries */
1.142     maxv      533:        jmp     .Lbootinfo_entryloop
1.81      maxv      534:
1.142     maxv      535: .Lbootinfo_overflow:
1.81      maxv      536:        /*
                    537:         * Cleanup for overflow case. Pop the registers, and correct the number
                    538:         * of entries.
                    539:         */
1.1       fvdl      540:        popl    %eax
                    541:        popl    %esi
                    542:        popl    %edi
                    543:        movl    $RELOC(bootinfo),%ebp
1.81      maxv      544:        movl    %ebp,%edx
                    545:        subl    %ebx,(%edx)             /* correct the number of entries */
1.142     maxv      546: .Lbootinfo_finished:
1.1       fvdl      547:
1.81      maxv      548:        /* Load 'esym' */
1.72      uebayasi  549:        movl    16(%esp),%eax
1.81      maxv      550:        testl   %eax,%eax               /* esym = NULL? */
1.1       fvdl      551:        jz      1f
1.81      maxv      552:
1.1       fvdl      553:        addl    $KERNBASE_LO,%eax
1.81      maxv      554:
                    555: 1:
                    556:        movl    $RELOC(esym),%ebp
1.1       fvdl      557:        movl    %eax,(%ebp)
                    558:        movl    $KERNBASE_HI,4(%ebp)
                    559:
1.83      maxv      560:        /* Load 'biosextmem' */
1.1       fvdl      561:        movl    $RELOC(biosextmem),%ebp
                    562:        movl    (%ebp),%eax
1.83      maxv      563:        testl   %eax,%eax               /* already set? */
1.142     maxv      564:        jnz     .Lbiosextmem_finished
1.81      maxv      565:
1.1       fvdl      566:        movl    20(%esp),%eax
                    567:        movl    %eax,(%ebp)
1.81      maxv      568:
1.142     maxv      569: .Lbiosextmem_finished:
1.83      maxv      570:        /* Load 'biosbasemem' */
1.1       fvdl      571:        movl    $RELOC(biosbasemem),%ebp
                    572:        movl    (%ebp),%eax
1.83      maxv      573:        testl   %eax,%eax               /* already set? */
1.142     maxv      574:        jnz     .Lbiosbasemem_finished
1.83      maxv      575:
1.1       fvdl      576:        movl    24(%esp),%eax
                    577:        movl    %eax,(%ebp)
                    578:
1.142     maxv      579: .Lbiosbasemem_finished:
1.83      maxv      580:        /*
1.84      maxv      581:         * Done with the parameters!
1.83      maxv      582:         */
1.84      maxv      583:
                    584:        /* First, reset the PSL. */
1.1       fvdl      585:        pushl   $PSL_MBO
                    586:        popfl
                    587:
                    588:        xorl    %eax,%eax
                    589:        cpuid
                    590:        movl    %eax,RELOC(cpuid_level)
                    591:
                    592:        /*
                    593:         * Finished with old stack; load new %esp now instead of later so we
                    594:         * can trace this code without having to worry about the trace trap
                    595:         * clobbering the memory test or the zeroing of the bss+bootstrap page
                    596:         * tables.
                    597:         *
                    598:         * The boot program should check:
                    599:         *      text+data <= &stack_variable - more_space_for_stack
                    600:         *      text+data+bss+pad+space_for_page_tables <= end_of_memory
1.100     maxv      601:         *
1.84      maxv      602:         * XXX: the gdt is in the carcass of the boot program so clearing
1.1       fvdl      603:         * the rest of memory is still not possible.
                    604:         */
                    605:        movl    $RELOC(tmpstk),%esp
                    606:
1.86      maxv      607:        /*
1.180     maxv      608:         * Retrieve the NX/XD flag. We use the 32bit version of PTE_NX.
1.86      maxv      609:         */
                    610:        movl    $0x80000001,%eax
                    611:        cpuid
                    612:        andl    $CPUID_NOX,%edx
1.142     maxv      613:        jz      .Lno_NOX
1.180     maxv      614:        movl    $PTE_NX32,RELOC(nox_flag)
1.142     maxv      615: .Lno_NOX:
1.86      maxv      616:
1.1       fvdl      617: /*
1.84      maxv      618:  * There are four levels of pages in amd64: PML4 -> PDP -> PD -> PT. They will
                    619:  * be referred to as: L4 -> L3 -> L2 -> L1.
                    620:  *
                    621:  * Virtual address space of the kernel:
1.97      maxv      622:  * +------+--------+------+-----+--------+---------------------+----------
                    623:  * | TEXT | RODATA | DATA | BSS | [SYMS] | [PRELOADED MODULES] | L4 ->
                    624:  * +------+--------+------+-----+--------+---------------------+----------
                    625:  *                             (1)      (2)                   (3)
                    626:  *
1.101     maxv      627:  * --------------+-----+-----+----+-------------+
                    628:  * -> PROC0 STK -> L3 -> L2 -> L1 | ISA I/O MEM |
                    629:  * --------------+-----+-----+----+-------------+
1.97      maxv      630:  *                               (4)
1.84      maxv      631:  *
1.97      maxv      632:  * PROC0 STK is obviously not linked as a page level. It just happens to be
                    633:  * caught between L4 and L3.
                    634:  *
                    635:  * (PROC0 STK + L4 + L3 + L2 + L1) is later referred to as BOOTSTRAP TABLES.
1.1       fvdl      636:  *
1.116     maxv      637:  * ISA I/O MEM has no physical page allocated here, just virtual addresses.
                    638:  *
1.88      maxv      639:  * Important note: the kernel segments are properly 4k-aligned
                    640:  * (see kern.ldscript), so there's no need to enforce alignment.
1.1       fvdl      641:  */
                    642:
1.84      maxv      643:        /* Find end of kernel image; brings us on (1). */
1.115     maxv      644:        movl    $RELOC(__kernel_end),%edi
1.83      maxv      645:
1.78      uebayasi  646: #if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB)
1.84      maxv      647:        /* Save the symbols (if loaded); brinds us on (2). */
1.1       fvdl      648:        movl    RELOC(esym),%eax
                    649:        testl   %eax,%eax
                    650:        jz      1f
                    651:        subl    $KERNBASE_LO,%eax       /* XXX */
                    652:        movl    %eax,%edi
                    653: 1:
                    654: #endif
1.84      maxv      655:        /* Skip over any modules/blobs; brings us on (3). */
1.43      ad        656:        movl    RELOC(eblob),%eax
                    657:        testl   %eax,%eax
                    658:        jz      1f
                    659:        subl    $KERNBASE_LO,%eax       /* XXX */
                    660:        movl    %eax,%edi
                    661: 1:
1.81      maxv      662:
1.97      maxv      663:        /* We are on (3). Align up for BOOTSTRAP TABLES. */
1.1       fvdl      664:        movl    %edi,%esi
1.84      maxv      665:        addl    $PGOFSET,%esi
1.1       fvdl      666:        andl    $~PGOFSET,%esi
                    667:
1.93      maxv      668:        /* We are on the BOOTSTRAP TABLES. Save L4's physical address. */
                    669:        movl    $RELOC(PDPpaddr),%ebp
                    670:        movl    %esi,(%ebp)
                    671:        movl    $0,4(%ebp)
                    672:
1.84      maxv      673:        /* Now, zero out the BOOTSTRAP TABLES (before filling them in). */
1.91      maxv      674:        movl    %esi,%edi
1.1       fvdl      675:        xorl    %eax,%eax
                    676:        cld
                    677:        movl    $TABLESIZE,%ecx
                    678:        shrl    $2,%ecx
                    679:        rep
1.83      maxv      680:        stosl                           /* copy eax -> edi */
1.1       fvdl      681:
1.73      uebayasi  682: /*
1.84      maxv      683:  * Build the page tables and levels. We go from L1 to L4, and link the levels
                    684:  * together. Note: RELOC computes &addr - KERNBASE in 32 bits; the value can't
                    685:  * be > 4G, or we can't deal with it anyway, since we are in 32bit mode.
1.73      uebayasi  686:  */
1.84      maxv      687:        /*
                    688:         * Build L1.
                    689:         */
1.83      maxv      690:        leal    (PROC0_PTP1_OFF)(%esi),%ebx
1.81      maxv      691:
1.119     maxv      692:        /* Skip the area below the kernel text. */
1.96      maxv      693:        movl    $(KERNTEXTOFF_LO - KERNBASE_LO),%ecx
                    694:        shrl    $PGSHIFT,%ecx
                    695:        fillkpt_blank
1.81      maxv      696:
1.88      maxv      697:        /* Map the kernel text RX. */
1.96      maxv      698:        movl    $(KERNTEXTOFF_LO - KERNBASE_LO),%eax    /* start of TEXT */
1.88      maxv      699:        movl    $RELOC(__rodata_start),%ecx
                    700:        subl    %eax,%ecx
                    701:        shrl    $PGSHIFT,%ecx
1.180     maxv      702:        orl     $(PTE_P),%eax
1.88      maxv      703:        fillkpt
1.1       fvdl      704:
1.91      maxv      705:        /* Map the kernel rodata R. */
1.88      maxv      706:        movl    $RELOC(__rodata_start),%eax
                    707:        movl    $RELOC(__data_start),%ecx
1.1       fvdl      708:        subl    %eax,%ecx
                    709:        shrl    $PGSHIFT,%ecx
1.180     maxv      710:        orl     $(PTE_P),%eax
1.89      maxv      711:        fillkpt_nox
1.1       fvdl      712:
1.90      maxv      713:        /* Map the kernel data+bss RW. */
                    714:        movl    $RELOC(__data_start),%eax
                    715:        movl    $RELOC(__kernel_end),%ecx
                    716:        subl    %eax,%ecx
                    717:        shrl    $PGSHIFT,%ecx
1.180     maxv      718:        orl     $(PTE_P|PTE_W),%eax
1.90      maxv      719:        fillkpt_nox
                    720:
1.105     maxv      721:        /* Map [SYMS]+[PRELOADED MODULES] RW. */
1.90      maxv      722:        movl    $RELOC(__kernel_end),%eax
1.97      maxv      723:        movl    %esi,%ecx               /* start of BOOTSTRAP TABLES */
1.95      maxv      724:        subl    %eax,%ecx
                    725:        shrl    $PGSHIFT,%ecx
1.180     maxv      726:        orl     $(PTE_P|PTE_W),%eax
1.105     maxv      727:        fillkpt_nox
1.95      maxv      728:
1.92      maxv      729:        /* Map the BOOTSTRAP TABLES RW. */
                    730:        movl    %esi,%eax               /* start of BOOTSTRAP TABLES */
                    731:        movl    $TABLESIZE,%ecx         /* length of BOOTSTRAP TABLES */
                    732:        shrl    $PGSHIFT,%ecx
1.180     maxv      733:        orl     $(PTE_P|PTE_W),%eax
1.92      maxv      734:        fillkpt_nox
                    735:
1.102     maxv      736:        /* We are on (4). Map ISA I/O MEM RW. */
1.101     maxv      737:        movl    $IOM_BEGIN,%eax
                    738:        movl    $IOM_SIZE,%ecx  /* size of ISA I/O MEM */
                    739:        shrl    $PGSHIFT,%ecx
1.180     maxv      740:        orl     $(PTE_P|PTE_W/*|PTE_PCD*/),%eax
1.102     maxv      741:        fillkpt_nox
1.1       fvdl      742:
1.84      maxv      743:        /*
                    744:         * Build L2. Linked to L1.
                    745:         */
1.73      uebayasi  746:        leal    (PROC0_PTP2_OFF)(%esi),%ebx
1.1       fvdl      747:        leal    (PROC0_PTP1_OFF)(%esi),%eax
1.180     maxv      748:        orl     $(PTE_P|PTE_W),%eax
1.1       fvdl      749:        movl    $(NKL2_KIMG_ENTRIES+1),%ecx
                    750:        fillkpt
                    751:
                    752: #if L2_SLOT_KERNBASE > 0
                    753:        /* If needed, set up level 2 entries for actual kernel mapping */
1.84      maxv      754:        leal    (PROC0_PTP2_OFF + L2_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
1.73      uebayasi  755:        leal    (PROC0_PTP1_OFF)(%esi),%eax
1.180     maxv      756:        orl     $(PTE_P|PTE_W),%eax
1.73      uebayasi  757:        movl    $(NKL2_KIMG_ENTRIES+1),%ecx
1.1       fvdl      758:        fillkpt
                    759: #endif
                    760:
1.84      maxv      761:        /*
                    762:         * Build L3. Linked to L2.
                    763:         */
1.73      uebayasi  764:        leal    (PROC0_PTP3_OFF)(%esi),%ebx
1.1       fvdl      765:        leal    (PROC0_PTP2_OFF)(%esi),%eax
1.180     maxv      766:        orl     $(PTE_P|PTE_W),%eax
1.1       fvdl      767:        movl    $NKL3_KIMG_ENTRIES,%ecx
                    768:        fillkpt
                    769:
                    770: #if L3_SLOT_KERNBASE > 0
                    771:        /* If needed, set up level 3 entries for actual kernel mapping */
1.84      maxv      772:        leal    (PROC0_PTP3_OFF + L3_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
1.73      uebayasi  773:        leal    (PROC0_PTP2_OFF)(%esi),%eax
1.180     maxv      774:        orl     $(PTE_P|PTE_W),%eax
1.73      uebayasi  775:        movl    $NKL3_KIMG_ENTRIES,%ecx
1.1       fvdl      776:        fillkpt
                    777: #endif
                    778:
1.84      maxv      779:        /*
                    780:         * Build L4 for identity mapping. Linked to L3.
                    781:         */
1.73      uebayasi  782:        leal    (PROC0_PML4_OFF)(%esi),%ebx
1.1       fvdl      783:        leal    (PROC0_PTP3_OFF)(%esi),%eax
1.180     maxv      784:        orl     $(PTE_P|PTE_W),%eax
1.1       fvdl      785:        movl    $NKL4_KIMG_ENTRIES,%ecx
                    786:        fillkpt
                    787:
1.84      maxv      788:        /* Set up L4 entries for actual kernel mapping */
                    789:        leal    (PROC0_PML4_OFF + L4_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
1.1       fvdl      790:        leal    (PROC0_PTP3_OFF)(%esi),%eax
1.180     maxv      791:        orl     $(PTE_P|PTE_W),%eax
1.1       fvdl      792:        movl    $NKL4_KIMG_ENTRIES,%ecx
                    793:        fillkpt
                    794:
                    795:        /*
                    796:         * Startup checklist:
                    797:         * 1. Enable PAE (and SSE while here).
                    798:         */
                    799:        movl    %cr4,%eax
                    800:        orl     $(CR4_PAE|CR4_OSFXSR|CR4_OSXMMEXCPT),%eax
                    801:        movl    %eax,%cr4
                    802:
                    803:        /*
1.86      maxv      804:         * 2. Set Long Mode Enable in EFER. Also enable the syscall extensions,
                    805:         *    and NOX if available.
1.1       fvdl      806:         */
1.73      uebayasi  807:        movl    $MSR_EFER,%ecx
1.1       fvdl      808:        rdmsr
                    809:        xorl    %eax,%eax       /* XXX */
                    810:        orl     $(EFER_LME|EFER_SCE),%eax
1.86      maxv      811:        movl    RELOC(nox_flag),%ebx
                    812:        cmpl    $0,%ebx
1.142     maxv      813:        je      .Lskip_NOX
1.86      maxv      814:        orl     $(EFER_NXE),%eax
1.142     maxv      815: .Lskip_NOX:
1.1       fvdl      816:        wrmsr
                    817:
                    818:        /*
                    819:         * 3. Load %cr3 with pointer to PML4.
                    820:         */
                    821:        movl    %esi,%eax
                    822:        movl    %eax,%cr3
                    823:
                    824:        /*
                    825:         * 4. Enable paging and the rest of it.
                    826:         */
                    827:        movl    %cr0,%eax
1.68      jym       828:        orl     $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP|CR0_AM),%eax
1.1       fvdl      829:        movl    %eax,%cr0
                    830:        jmp     compat
                    831: compat:
                    832:
                    833:        /*
1.83      maxv      834:         * 5. Not quite done yet, we're now in a compatibility segment, in
                    835:         *    legacy mode. We must jump to a long mode segment. Need to set up
                    836:         *    a temporary GDT with a long mode segment in it to do that.
1.1       fvdl      837:         */
1.56      jym       838:        movl    $RELOC(gdt64_lo),%eax
1.1       fvdl      839:        lgdt    (%eax)
                    840:        movl    $RELOC(farjmp64),%eax
                    841:        ljmp    *(%eax)
                    842:
1.83      maxv      843:        .code64
1.1       fvdl      844: longmode:
                    845:        /*
1.83      maxv      846:         * 6. Finally, we're in long mode. However, we're still in the identity
                    847:         *    mapped area (could not jump out of that earlier because it would
                    848:         *    have been a > 32bit jump). We can do that now, so here we go.
1.1       fvdl      849:         */
                    850:        movabsq $longmode_hi,%rax
                    851:        jmp     *%rax
1.56      jym       852:
1.1       fvdl      853: longmode_hi:
1.56      jym       854:
                    855:        /*
                    856:         * We left the identity mapped area. Base address of
                    857:         * the temporary gdt64 should now be in high memory.
                    858:         */
                    859:        movq    $RELOC(gdt64_hi),%rax
                    860:        lgdt    (%rax)
                    861:
1.1       fvdl      862:        /*
1.83      maxv      863:         * We have arrived. There's no need anymore for the identity mapping in
                    864:         * low memory, remove it.
1.1       fvdl      865:         */
                    866:        movq    $KERNBASE,%r8
                    867:
                    868: #if L2_SLOT_KERNBASE > 0
                    869:        movq    $(NKL2_KIMG_ENTRIES+1),%rcx
1.84      maxv      870:        leaq    (PROC0_PTP2_OFF)(%rsi),%rbx     /* old, phys address */
                    871:        addq    %r8,%rbx                        /* new, virt address */
                    872:        killkpt
1.1       fvdl      873: #endif
                    874:
                    875: #if L3_SLOT_KERNBASE > 0
                    876:        movq    $NKL3_KIMG_ENTRIES,%rcx
1.84      maxv      877:        leaq    (PROC0_PTP3_OFF)(%rsi),%rbx     /* old, phys address */
                    878:        addq    %r8,%rbx                        /* new, virt address */
                    879:        killkpt
1.1       fvdl      880: #endif
                    881:
                    882:        movq    $NKL4_KIMG_ENTRIES,%rcx
1.84      maxv      883:        leaq    (PROC0_PML4_OFF)(%rsi),%rbx     /* old, phys address of PML4 */
                    884:        addq    %r8,%rbx                        /* new, virt address of PML4 */
1.85      maxv      885:        killkpt
1.1       fvdl      886:
                    887:        /* Relocate atdevbase. */
                    888:        movq    $(TABLESIZE+KERNBASE),%rdx
                    889:        addq    %rsi,%rdx
                    890:        movq    %rdx,_C_LABEL(atdevbase)(%rip)
                    891:
                    892:        /* Set up bootstrap stack. */
1.97      maxv      893:        leaq    (PROC0_STK_OFF)(%rsi),%rax
1.1       fvdl      894:        addq    %r8,%rax
1.130     maxv      895:        movq    %rax,_C_LABEL(lwp0uarea)(%rip)
1.1       fvdl      896:        leaq    (USPACE-FRAMESIZE)(%rax),%rsp
1.84      maxv      897:        xorq    %rbp,%rbp                       /* mark end of frames */
1.1       fvdl      898:
                    899:        xorw    %ax,%ax
                    900:        movw    %ax,%gs
                    901:        movw    %ax,%fs
                    902:
1.116     maxv      903:        /* The first physical page available. */
                    904:        leaq    (TABLESIZE)(%rsi),%rdi
1.32      bouyer    905:
1.176     cherry    906: #else  /* XENPV */
1.32      bouyer    907:        /* First, reset the PSL. */
                    908:        pushq   $2
                    909:        popfq
                    910:
                    911:        cld
                    912:
                    913:        /*
                    914:         * Xen info:
                    915:         * - %rsi -> start_info struct
1.111     maxv      916:         * - %rsp -> stack, *theoretically* the last used page by Xen bootstrap
1.32      bouyer    917:         */
1.111     maxv      918:        movq    %rsi,%rbx
1.32      bouyer    919:
1.73      uebayasi  920:        /* Clear BSS. */
1.32      bouyer    921:        xorq    %rax,%rax
                    922:        movq    $_C_LABEL(__bss_start),%rdi
                    923:        movq    $_C_LABEL(_end),%rcx
                    924:        subq    %rdi,%rcx
                    925:        rep
                    926:        stosb
                    927:
1.111     maxv      928:        /* Copy start_info to a safe place. */
1.32      bouyer    929:        movq    %rbx,%rsi
                    930:        movq    $_C_LABEL(start_info_union),%rdi
                    931:        movq    $64,%rcx
                    932:        rep
                    933:        movsq
                    934:
                    935:        /*
                    936:         * Memory layout at start of the day:
                    937:         * - Kernel image
                    938:         * - Page frames list
                    939:         * - start_info struct. we copied it, so it can be recycled.
                    940:         * - xenstore
                    941:         * - console
                    942:         * - Xen bootstrap page tables
                    943:         * - kernel stack. provided by Xen
1.79      maxv      944:         * - guaranteed 512kB padding
1.32      bouyer    945:         *
                    946:         * As we want to rebuild our page tables and place our stack
                    947:         * in proc0 struct, all data starting from after console can be
                    948:         * discarded after we've done a little setup.
                    949:         */
                    950:
                    951:        /*
1.111     maxv      952:         * We want our own page tables, and will rebuild them. We will reclaim
                    953:         * the Xen space later, INCLUDING the stack. So we need to switch to a
                    954:         * temporary one now.
                    955:         */
                    956:        movq    $tmpstk,%rax
                    957:        subq    $8,%rax
                    958:        movq    %rax,%rsp
1.32      bouyer    959:
1.45      bouyer    960:        xorl    %eax,%eax
                    961:        cpuid
                    962:        movl    %eax,_C_LABEL(cpuid_level)
                    963:
1.201     bouyer    964:        movl    $VM_GUEST_XENPV, _C_LABEL(vm_guest)
                    965:
1.111     maxv      966:        movq    $cpu_info_primary,%rdi
                    967:        movq    %rdi,CPU_INFO_SELF(%rdi) /* ci->ci_self = ci */
                    968:        movq    $1,%rsi
1.67      cherry    969:        call    cpu_init_msrs   /* cpu_init_msrs(ci, true); */
                    970:
1.113     maxv      971:        call    xen_locore
1.32      bouyer    972:
                    973:        /*
1.113     maxv      974:         * The first VA available is returned by xen_locore in %rax. We
1.111     maxv      975:         * use it as the UAREA, and set up the stack here.
1.32      bouyer    976:         */
1.111     maxv      977:        movq    %rax,%rsi
1.130     maxv      978:        movq    %rsi,_C_LABEL(lwp0uarea)(%rip)
1.32      bouyer    979:        leaq    (USPACE-FRAMESIZE)(%rsi),%rsp
                    980:        xorq    %rbp,%rbp
                    981:
1.111     maxv      982:        /* Clear segment registers. */
1.63      jym       983:        xorw    %ax,%ax
1.32      bouyer    984:        movw    %ax,%gs
                    985:        movw    %ax,%fs
1.84      maxv      986:
1.117     maxv      987:        /* Set first_avail after the DUMMY PAGE (see xen_locore). */
1.32      bouyer    988:        movq    %rsi,%rdi
1.117     maxv      989:        addq    $(USPACE+PAGE_SIZE),%rdi
1.84      maxv      990:        subq    $KERNBASE,%rdi  /* init_x86_64 wants a physical address */
1.176     cherry    991: #endif /* XENPV */
1.32      bouyer    992:
1.131     maxv      993:        pushq   %rdi
1.206     bouyer    994: #if defined(XEN) && !defined(XENPV)
                    995:        call    _C_LABEL(init_xen_early)
                    996: #endif
1.131     maxv      997:        call    _C_LABEL(init_bootspace)
1.205     maxv      998: #ifdef KASAN
                    999:        movq    _C_LABEL(lwp0uarea)(%rip),%rdi
                   1000:        call    _C_LABEL(kasan_early_init)
                   1001: #endif
1.172     maxv     1002:        call    _C_LABEL(init_slotspace)
1.131     maxv     1003:        popq    %rdi
1.1       fvdl     1004:        call    _C_LABEL(init_x86_64)
                   1005:        call    _C_LABEL(main)
1.71      uebayasi 1006: END(start)
1.1       fvdl     1007:
1.207   ! christos 1008: #if defined(XEN)
        !          1009: # if !defined(XENPV)
1.206     bouyer   1010: /* entry point for Xen PVH */
                   1011:        .code32
                   1012: ENTRY(start_xen32)
                   1013:        /* Xen doesn't start us with a valid gdt */
                   1014:        movl    $RELOC(gdtdesc32), %eax
                   1015:        lgdt    (%eax)
                   1016:        jmp     $GSEL(GCODE_SEL, SEL_KPL), $RELOC(.Lreload_cs)
                   1017:
                   1018: .Lreload_cs:
                   1019:        movw    $GSEL(GDATA_SEL, SEL_KPL), %ax
                   1020:        movw    %ax, %ds
                   1021:        movw    %ax, %es
                   1022:        movw    %ax, %ss
                   1023:
                   1024:        /* we need a valid stack */
                   1025:        movl    $RELOC(tmpstk),%esp
                   1026:
                   1027:        /* clear BSS */
                   1028:         xorl    %eax,%eax
                   1029:        movl    $RELOC(__bss_start),%edi
                   1030:        movl    $RELOC(_end),%ecx
                   1031:        subl    %edi,%ecx
                   1032:        rep
                   1033:        stosb
                   1034:
                   1035:        /*
                   1036:         * save addr of the hvm_start_info structure. This is also the end
                   1037:         * of the symbol table
                   1038:         */
                   1039:        movl    %ebx, RELOC(hvm_start_paddr)
                   1040:        movl    %ebx, %eax
                   1041:        addl    $KERNBASE_LO,%eax
                   1042:        movl    $RELOC(esym),%ebp
                   1043:        movl    %eax,(%ebp)
                   1044:        movl    $KERNBASE_HI,4(%ebp)
                   1045:        /* get a page for HYPERVISOR_shared_info */
                   1046:        addl    $PAGE_SIZE, %ebx
                   1047:        addl    $PGOFSET,%ebx
                   1048:        andl    $~PGOFSET,%ebx
                   1049:        movl    $RELOC(HYPERVISOR_shared_info_pa),%ebp
                   1050:        movl    %ebx,(%ebp)
                   1051:        movl    $0,4(%ebp)
                   1052:        /* XXX assume hvm_start_info+dependant struture fits in a single page */
                   1053:        addl    $PAGE_SIZE, %ebx
                   1054:        addl    $PGOFSET,%ebx
                   1055:        andl    $~PGOFSET,%ebx
                   1056:        addl    $KERNBASE_LO,%ebx
                   1057:        movl    $RELOC(eblob),%ebp
                   1058:        movl    %ebx,(%ebp)
                   1059:        movl    $KERNBASE_HI,4(%ebp)
                   1060:
                   1061:        /* annouce ourself */
                   1062:        movl    $VM_GUEST_XENPVH, RELOC(vm_guest)
                   1063:        jmp .Lbiosbasemem_finished
                   1064: END(start_xen32)
                   1065:        .code64
1.207   ! christos 1066: # endif /* !XENPV */
1.32      bouyer   1067: /* space for the hypercall call page */
                   1068: #define HYPERCALL_PAGE_OFFSET 0x1000
1.177     cherry   1069: .align HYPERCALL_PAGE_OFFSET
                   1070: ENTRY(hypercall_page) /* Returns -1, on HYPERVISOR_xen_version() */
                   1071: .skip  (__HYPERVISOR_xen_version*32), 0x90
                   1072:        movq    $-1, %rax
                   1073:        retq
                   1074: .align HYPERCALL_PAGE_OFFSET, 0x90
1.71      uebayasi 1075: END(hypercall_page)
1.32      bouyer   1076: #endif /* XEN */
1.1       fvdl     1077:
                   1078: /*
1.22      yamt     1079:  * int setjmp(label_t *)
                   1080:  *
                   1081:  * Used primarily by DDB.
                   1082:  */
1.1       fvdl     1083: ENTRY(setjmp)
                   1084:        /*
                   1085:         * Only save registers that must be preserved across function
                   1086:         * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15)
                   1087:         * and %rip.
                   1088:         */
                   1089:        movq    %rdi,%rax
                   1090:        movq    %rbx,(%rax)
                   1091:        movq    %rsp,8(%rax)
                   1092:        movq    %rbp,16(%rax)
                   1093:        movq    %r12,24(%rax)
                   1094:        movq    %r13,32(%rax)
                   1095:        movq    %r14,40(%rax)
                   1096:        movq    %r15,48(%rax)
                   1097:        movq    (%rsp),%rdx
                   1098:        movq    %rdx,56(%rax)
                   1099:        xorl    %eax,%eax
                   1100:        ret
1.71      uebayasi 1101: END(setjmp)
1.1       fvdl     1102:
1.22      yamt     1103: /*
                   1104:  * int longjmp(label_t *)
                   1105:  *
                   1106:  * Used primarily by DDB.
                   1107:  */
1.1       fvdl     1108: ENTRY(longjmp)
                   1109:        movq    %rdi,%rax
                   1110:        movq    (%rax),%rbx
                   1111:        movq    8(%rax),%rsp
                   1112:        movq    16(%rax),%rbp
                   1113:        movq    24(%rax),%r12
                   1114:        movq    32(%rax),%r13
                   1115:        movq    40(%rax),%r14
                   1116:        movq    48(%rax),%r15
                   1117:        movq    56(%rax),%rdx
                   1118:        movq    %rdx,(%rsp)
1.22      yamt     1119:        movl    $1,%eax
1.1       fvdl     1120:        ret
1.71      uebayasi 1121: END(longjmp)
1.1       fvdl     1122:
1.73      uebayasi 1123: /*
                   1124:  * void dumpsys(void)
                   1125:  *
                   1126:  * Mimic cpu_switchto() for postmortem debugging.
                   1127:  */
1.25      yamt     1128: ENTRY(dumpsys)
1.84      maxv     1129:        /* Build a fake switch frame. */
1.25      yamt     1130:        pushq   %rbx
                   1131:        pushq   %r12
                   1132:        pushq   %r13
                   1133:        pushq   %r14
                   1134:        pushq   %r15
1.84      maxv     1135:
                   1136:        /* Save a context. */
1.25      yamt     1137:        movq    $dumppcb, %rax
                   1138:        movq    %rsp, PCB_RSP(%rax)
                   1139:        movq    %rbp, PCB_RBP(%rax)
                   1140:
                   1141:        call    _C_LABEL(dodumpsys)
                   1142:
1.84      maxv     1143:        addq    $(5*8), %rsp    /* sizeof(switchframe) - sizeof(%rip) */
1.25      yamt     1144:        ret
1.71      uebayasi 1145: END(dumpsys)
1.25      yamt     1146:
1.1       fvdl     1147: /*
1.58      chs      1148:  * struct lwp *cpu_switchto(struct lwp *oldlwp, struct lwp *newlwp,
1.103     maxv     1149:  *     bool returning)
1.22      yamt     1150:  *
1.197     skrll    1151:  *     1. save context of oldlwp.
                   1152:  *     2. restore context of newlwp.
1.22      yamt     1153:  *
                   1154:  * Note that the stack frame layout is known to "struct switchframe" in
                   1155:  * <machine/frame.h> and to the code in cpu_lwp_fork() which initializes
                   1156:  * it for a new lwp.
1.1       fvdl     1157:  */
1.22      yamt     1158: ENTRY(cpu_switchto)
1.1       fvdl     1159:        pushq   %rbx
                   1160:        pushq   %r12
                   1161:        pushq   %r13
                   1162:        pushq   %r14
                   1163:        pushq   %r15
                   1164:
1.84      maxv     1165:        movq    %rdi,%r13       /* oldlwp */
                   1166:        movq    %rsi,%r12       /* newlwp */
1.1       fvdl     1167:
1.22      yamt     1168:        /* Save old context. */
1.55      rmind    1169:        movq    L_PCB(%r13),%rax
1.22      yamt     1170:        movq    %rsp,PCB_RSP(%rax)
                   1171:        movq    %rbp,PCB_RBP(%rax)
                   1172:
1.168     maxv     1173:        /* Switch to newlwp's stack. */
                   1174:        movq    L_PCB(%r12),%r14
                   1175:        movq    PCB_RSP(%r14),%rsp
                   1176:        movq    PCB_RBP(%r14),%rbp
                   1177:
                   1178:        /*
                   1179:         * Set curlwp.  This must be globally visible in order to permit
                   1180:         * non-interlocked mutex release.
                   1181:         */
                   1182:        movq    %r12,%rcx
                   1183:        xchgq   %rcx,CPUVAR(CURLWP)
                   1184:
                   1185:        /* Skip the rest if returning to a pinned LWP. */
                   1186:        testb   %dl,%dl         /* returning = true ? */
                   1187:        jnz     .Lswitch_return
                   1188:
1.145     maxv     1189: #ifdef SVS
1.151     maxv     1190:        movb    _C_LABEL(svs_enabled),%dl
                   1191:        testb   %dl,%dl
                   1192:        jz      .Lskip_svs
1.145     maxv     1193:        callq   _C_LABEL(svs_lwp_switch)
1.151     maxv     1194: .Lskip_svs:
1.145     maxv     1195: #endif
                   1196:
1.201     bouyer   1197: #ifndef XENPV
1.168     maxv     1198:        movq    %r13,%rdi
                   1199:        movq    %r12,%rsi
1.160     maxv     1200:        callq   _C_LABEL(speculation_barrier)
1.163     maxv     1201: #endif
1.160     maxv     1202:
1.38      yamt     1203:        /* Switch ring0 stack */
1.147     maxv     1204: #ifdef SVS
1.152     maxv     1205:        movb    _C_LABEL(svs_enabled),%al
                   1206:        testb   %al,%al
                   1207:        jz      .Lno_svs_switch
                   1208:
1.147     maxv     1209:        movq    CPUVAR(RSP0),%rax
                   1210:        movq    CPUVAR(TSS),%rdi
                   1211:        movq    %rax,TSS_RSP0(%rdi)
1.152     maxv     1212:        jmp     .Lring0_switched
                   1213:
                   1214: .Lno_svs_switch:
                   1215: #endif
                   1216:
1.176     cherry   1217: #if !defined(XENPV)
1.38      yamt     1218:        movq    PCB_RSP0(%r14),%rax
1.144     maxv     1219:        movq    CPUVAR(TSS),%rdi
                   1220:        movq    %rax,TSS_RSP0(%rdi)
1.38      yamt     1221: #else
1.103     maxv     1222:        movq    %r14,%rdi
1.152     maxv     1223:        callq   _C_LABEL(x86_64_switch_context)
1.32      bouyer   1224: #endif
1.152     maxv     1225: .Lring0_switched:
1.1       fvdl     1226:
1.173     maxv     1227:        /* Switch the dbregs. */
                   1228:        movq    %r13,%rdi
                   1229:        movq    %r12,%rsi
                   1230:        callq   _C_LABEL(x86_dbregs_switch)
                   1231:
1.187     maxv     1232:        /* Switch the FPU. */
1.167     maxv     1233:        movq    %r13,%rdi
                   1234:        movq    %r12,%rsi
1.188     maxv     1235:        callq   _C_LABEL(fpu_switch)
1.167     maxv     1236:
1.22      yamt     1237:        /* Don't bother with the rest if switching to a system process. */
                   1238:        testl   $LW_SYSTEM,L_FLAG(%r12)
1.142     maxv     1239:        jnz     .Lswitch_return
1.1       fvdl     1240:
1.22      yamt     1241:        /* Is this process using RAS (restartable atomic sequences)? */
                   1242:        movq    L_PROC(%r12),%rdi
                   1243:        cmpq    $0,P_RASLIST(%rdi)
1.142     maxv     1244:        je      .Lno_RAS
1.104     maxv     1245:
                   1246:        /* Handle restartable atomic sequences (RAS). */
                   1247:        movq    L_MD_REGS(%r12),%rbx
                   1248:        movq    TF_RIP(%rbx),%rsi
                   1249:        call    _C_LABEL(ras_lookup)
                   1250:        cmpq    $-1,%rax
1.142     maxv     1251:        je      .Lno_RAS
1.104     maxv     1252:        movq    %rax,TF_RIP(%rbx)
1.142     maxv     1253: .Lno_RAS:
1.1       fvdl     1254:
1.176     cherry   1255: #ifndef XENPV
1.189     maxv     1256:        /* Raise the IPL to IPL_HIGH. Dropping the priority is deferred until
                   1257:         * mi_switch(), when cpu_switchto() returns. XXX Still needed? */
1.75      christos 1258:        movl    $IPL_HIGH,CPUVAR(ILEVEL)
1.58      chs      1259:
1.103     maxv     1260:        /* The 32bit LWPs are handled differently. */
                   1261:        testl   $PCB_COMPAT32,PCB_FLAGS(%r14)
1.142     maxv     1262:        jnz     .Llwp_32bit
1.58      chs      1263:
1.142     maxv     1264: .Llwp_64bit:
1.134     maxv     1265:        /* Set default 64bit values in %ds, %es, %fs and %gs. */
                   1266:        movq    $GSEL(GUDATA_SEL, SEL_UPL),%rax
                   1267:        movw    %ax,%ds
                   1268:        movw    %ax,%es
1.103     maxv     1269:        xorq    %rax,%rax
                   1270:        movw    %ax,%fs
1.58      chs      1271:        CLI(cx)
1.70      dsl      1272:        SWAPGS
1.103     maxv     1273:        movw    %ax,%gs
1.70      dsl      1274:        SWAPGS
1.58      chs      1275:        STI(cx)
                   1276:
1.107     maxv     1277:        /* Zero out GDT descriptors. */
1.58      chs      1278:        movq    CPUVAR(GDT),%rcx
1.103     maxv     1279:        movq    %rax,(GUFS_SEL*8)(%rcx)
                   1280:        movq    %rax,(GUGS_SEL*8)(%rcx)
1.58      chs      1281:
                   1282:        /* Reload 64-bit %fs/%gs MSRs. */
1.103     maxv     1283:        movl    $MSR_FSBASE,%ecx
                   1284:        movl    PCB_FS(%r14),%eax
                   1285:        movl    4+PCB_FS(%r14),%edx
1.58      chs      1286:        wrmsr
1.103     maxv     1287:        movl    $MSR_KERNELGSBASE,%ecx
                   1288:        movl    PCB_GS(%r14),%eax
                   1289:        movl    4+PCB_GS(%r14),%edx
1.58      chs      1290:        wrmsr
1.107     maxv     1291:
1.142     maxv     1292:        jmp     .Lswitch_return
1.58      chs      1293:
1.142     maxv     1294: .Llwp_32bit:
1.58      chs      1295:        /* Reload %fs/%gs GDT descriptors. */
                   1296:        movq    CPUVAR(GDT),%rcx
1.103     maxv     1297:        movq    PCB_FS(%r14),%rax
                   1298:        movq    %rax,(GUFS_SEL*8)(%rcx)
                   1299:        movq    PCB_GS(%r14),%rax
                   1300:        movq    %rax,(GUGS_SEL*8)(%rcx)
1.58      chs      1301:
1.143     maxv     1302:        /* Set default 32bit values in %ds, %es, %fs and %gs. */
1.103     maxv     1303:        movq    L_MD_REGS(%r12),%rbx
1.134     maxv     1304:        movq    $GSEL(GUDATA32_SEL, SEL_UPL),%rax
                   1305:        movw    %ax,%ds
                   1306:        movw    %ax,%es
1.135     maxv     1307:        movw    %ax,%fs
1.58      chs      1308:        CLI(ax)
1.70      dsl      1309:        SWAPGS
1.137     maxv     1310:        movw    %ax,%gs
1.70      dsl      1311:        SWAPGS
1.58      chs      1312:        STI(ax)
                   1313: #else
                   1314:        movq    %r12,%rdi
                   1315:        callq   _C_LABEL(x86_64_tls_switch)
1.32      bouyer   1316: #endif
1.84      maxv     1317:
1.142     maxv     1318: .Lswitch_return:
1.22      yamt     1319:        /* Return to the new LWP, returning 'oldlwp' in %rax. */
1.190     maxv     1320:        KMSAN_INIT_RET(8)
1.103     maxv     1321:        movq    %r13,%rax
1.1       fvdl     1322:        popq    %r15
                   1323:        popq    %r14
                   1324:        popq    %r13
                   1325:        popq    %r12
                   1326:        popq    %rbx
                   1327:        ret
1.71      uebayasi 1328: END(cpu_switchto)
1.1       fvdl     1329:
                   1330: /*
1.22      yamt     1331:  * void savectx(struct pcb *pcb);
                   1332:  *
1.1       fvdl     1333:  * Update pcb, saving current processor state.
                   1334:  */
                   1335: ENTRY(savectx)
                   1336:        /* Save stack pointers. */
                   1337:        movq    %rsp,PCB_RSP(%rdi)
                   1338:        movq    %rbp,PCB_RBP(%rdi)
                   1339:        ret
1.71      uebayasi 1340: END(savectx)
1.1       fvdl     1341:
                   1342: /*
1.148     maxv     1343:  * Syscall handler.
1.1       fvdl     1344:  */
1.171     maxv     1345: ENTRY(handle_syscall)
1.127     maxv     1346:        STI(si)
1.1       fvdl     1347:
                   1348:        movq    CPUVAR(CURLWP),%r14
1.84      maxv     1349:        incq    CPUVAR(NSYSCALL)        /* count it atomically */
                   1350:        movq    %rsp,L_MD_REGS(%r14)    /* save pointer to frame */
1.1       fvdl     1351:        movq    L_PROC(%r14),%r15
1.70      dsl      1352:        andl    $~MDL_IRET,L_MD_FLAGS(%r14)   /* Allow sysret return */
1.37      dsl      1353:        movq    %rsp,%rdi               /* Pass frame as arg0 */
1.1       fvdl     1354:        call    *P_MD_SYSCALL(%r15)
1.29      yamt     1355: .Lsyscall_checkast:
1.70      dsl      1356:        /*
                   1357:         * Disable interrupts to avoid new ASTs (etc) being added and
                   1358:         * to ensure we don't take an interrupt with some of the user
                   1359:         * registers loaded.
                   1360:         */
                   1361:        CLI(si)
1.29      yamt     1362:        /* Check for ASTs on exit to user mode. */
1.107     maxv     1363:        movl    L_MD_ASTPENDING(%r14),%eax
                   1364:        orl     CPUVAR(WANT_PMAPLOAD),%eax
1.29      yamt     1365:        jnz     9f
1.107     maxv     1366:
1.1       fvdl     1367: #ifdef DIAGNOSTIC
                   1368:        cmpl    $IPL_NONE,CPUVAR(ILEVEL)
1.142     maxv     1369:        jne     .Lspl_error
1.1       fvdl     1370: #endif
1.107     maxv     1371:
1.189     maxv     1372:        HANDLE_DEFERRED_FPU
                   1373:
1.129     maxv     1374:        /*
1.134     maxv     1375:         * Decide if we need to take a slow path. That's the case when we
                   1376:         * want to reload %cs and %ss on a 64bit LWP (MDL_IRET set), or when
                   1377:         * we're returning to a 32bit LWP (MDL_COMPAT32 set).
                   1378:         *
                   1379:         * In either case, we jump into intrfastexit and return to userland
                   1380:         * with the iret instruction.
1.129     maxv     1381:         */
1.107     maxv     1382:        testl   $(MDL_IRET|MDL_COMPAT32),L_MD_FLAGS(%r14)
1.134     maxv     1383:        jnz     intrfastexit
                   1384:
1.148     maxv     1385:        jmp     syscall_sysret
1.1       fvdl     1386:
                   1387: #ifdef DIAGNOSTIC
1.142     maxv     1388: .Lspl_error:
1.107     maxv     1389:        movabsq $4f,%rdi
1.186     maxv     1390:        movl    CPUVAR(ILEVEL),%esi
                   1391:        call    _C_LABEL(panic)
                   1392: 4:     .asciz  "spl not lowered on syscall, ilevel=%x"
1.1       fvdl     1393: #endif
1.70      dsl      1394:
                   1395: /* AST pending or pmap load needed */
1.41      ad       1396: 9:
1.107     maxv     1397:        cmpl    $0,CPUVAR(WANT_PMAPLOAD)
1.41      ad       1398:        jz      10f
                   1399:        STI(si)
1.29      yamt     1400:        call    _C_LABEL(do_pmap_load)
                   1401:        jmp     .Lsyscall_checkast      /* re-check ASTs */
1.41      ad       1402: 10:
                   1403:        CLEAR_ASTPENDING(%r14)
                   1404:        STI(si)
                   1405:        /* Pushed T_ASTFLT into tf_trapno on entry. */
                   1406:        movq    %rsp,%rdi
1.190     maxv     1407:        KMSAN_INIT_ARG(8)
1.41      ad       1408:        call    _C_LABEL(trap)
                   1409:        jmp     .Lsyscall_checkast      /* re-check ASTs */
1.148     maxv     1410: END(handle_syscall)
1.29      yamt     1411:
1.22      yamt     1412: /*
                   1413:  * void lwp_trampoline(void);
                   1414:  *
                   1415:  * This is a trampoline function pushed run by newly created LWPs
1.70      dsl      1416:  * in order to do additional setup in their context.
1.22      yamt     1417:  */
1.171     maxv     1418: ENTRY(lwp_trampoline)
1.22      yamt     1419:        movq    %rbp,%rsi
1.41      ad       1420:        movq    %rbp,%r14       /* for .Lsyscall_checkast */
1.22      yamt     1421:        movq    %rax,%rdi
                   1422:        xorq    %rbp,%rbp
1.190     maxv     1423:        KMSAN_INIT_ARG(16)
1.22      yamt     1424:        call    _C_LABEL(lwp_startup)
1.1       fvdl     1425:        movq    %r13,%rdi
1.190     maxv     1426:        KMSAN_INIT_ARG(8)
1.1       fvdl     1427:        call    *%r12
1.41      ad       1428:        jmp     .Lsyscall_checkast
1.71      uebayasi 1429: END(lwp_trampoline)
1.1       fvdl     1430:
                   1431: /*
1.148     maxv     1432:  * Entry points of the 'syscall' instruction, 64bit and 32bit mode.
                   1433:  */
                   1434:
1.156     maxv     1435: #define SP(x)  (x)-(TF_SS+8)(%rax)
1.152     maxv     1436:
                   1437: .macro SYSCALL_ENTRY   name,is_svs
                   1438: IDTVEC(\name)
1.176     cherry   1439: #ifndef XENPV
1.148     maxv     1440:        /*
                   1441:         * The user %rip is in %rcx and the user %rflags in %r11. The kernel %cs
                   1442:         * and %ss are loaded, but nothing else is.
                   1443:         *
                   1444:         * The 'swapgs' instruction gives us access to cpu-specific memory where
                   1445:         * we can save a user register and then read the LWP's kernel stack
                   1446:         * pointer.
                   1447:         *
                   1448:         * This code doesn't seem to set %ds, this may not matter since it is
                   1449:         * ignored in 64bit mode, OTOH the syscall instruction sets %ss and that
                   1450:         * is ignored as well.
                   1451:         */
                   1452:        swapgs
                   1453:
1.156     maxv     1454:        /* Get the LWP's kernel stack pointer in %rax */
1.152     maxv     1455:        .if     \is_svs
1.165     joerg    1456:                movabs  %rax,SVS_UTLS+UTLS_SCRATCH
                   1457:                movabs  SVS_UTLS+UTLS_RSP0,%rax
1.156     maxv     1458:        .else
                   1459:                movq    %rax,CPUVAR(SCRATCH)
                   1460:                movq    CPUVAR(CURLWP),%rax
                   1461:                movq    L_PCB(%rax),%rax
                   1462:                movq    PCB_RSP0(%rax),%rax
                   1463:        .endif
1.152     maxv     1464:
1.156     maxv     1465:        /* Make stack look like an 'int nn' frame */
                   1466:        movq    $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS)  /* user %ss */
                   1467:        movq    %rsp,SP(TF_RSP)                         /* user %rsp */
                   1468:        movq    %r11,SP(TF_RFLAGS)                      /* user %rflags */
                   1469:        movq    $(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS)  /* user %cs */
                   1470:        movq    %rcx,SP(TF_RIP)                         /* user %rip */
                   1471:        leaq    SP(0),%rsp              /* %rsp now valid after frame */
1.152     maxv     1472:
1.156     maxv     1473:        /* Restore %rax */
                   1474:        .if     \is_svs
1.165     joerg    1475:                movabs  SVS_UTLS+UTLS_SCRATCH,%rax
1.152     maxv     1476:        .else
1.156     maxv     1477:                movq    CPUVAR(SCRATCH),%rax
1.152     maxv     1478:        .endif
1.148     maxv     1479:
                   1480:        movq    $2,TF_ERR(%rsp)         /* syscall instruction size */
                   1481:        movq    $T_ASTFLT,TF_TRAPNO(%rsp)
                   1482: #else
                   1483:        /* Xen already switched to kernel stack */
                   1484:        addq    $0x10,%rsp      /* gap to match cs:rip */
                   1485:        pushq   $2              /* error code */
                   1486:        pushq   $T_ASTFLT
                   1487:        subq    $TF_REGSIZE,%rsp
                   1488:        cld
                   1489: #endif
                   1490:        INTR_SAVE_GPRS
1.160     maxv     1491:        IBRS_ENTER
1.148     maxv     1492:        movw    $GSEL(GUDATA_SEL, SEL_UPL),TF_DS(%rsp)
                   1493:        movw    $GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp)
                   1494:        movw    $0,TF_FS(%rsp)
                   1495:        movw    $0,TF_GS(%rsp)
1.152     maxv     1496:        .if     \is_svs
                   1497:                SVS_ENTER
                   1498:        .endif
1.190     maxv     1499:        KMSAN_ENTER
1.148     maxv     1500:        jmp     handle_syscall
1.152     maxv     1501: IDTVEC_END(\name)
                   1502: .endm
                   1503:
                   1504: SYSCALL_ENTRY  syscall,is_svs=0
                   1505:
                   1506:        TEXT_USER_BEGIN
                   1507:
1.155     maxv     1508: #ifdef SVS
1.152     maxv     1509: SYSCALL_ENTRY  syscall_svs,is_svs=1
1.155     maxv     1510: #endif
1.148     maxv     1511:
                   1512: IDTVEC(syscall32)
                   1513:        sysret          /* go away please */
                   1514: IDTVEC_END(syscall32)
                   1515:
                   1516:        TEXT_USER_END
                   1517:
                   1518: /*
1.22      yamt     1519:  * osyscall()
                   1520:  *
1.1       fvdl     1521:  * Trap gate entry for int $80 syscall, also used by sigreturn.
                   1522:  */
1.148     maxv     1523:        TEXT_USER_BEGIN
1.1       fvdl     1524: IDTVEC(osyscall)
1.176     cherry   1525: #ifdef XENPV
1.32      bouyer   1526:        movq (%rsp),%rcx
                   1527:        movq 8(%rsp),%r11
                   1528:        addq $0x10,%rsp
                   1529: #endif
1.84      maxv     1530:        pushq   $2              /* size of instruction for restart */
                   1531:        pushq   $T_ASTFLT       /* trap # for doing ASTs */
1.1       fvdl     1532:        INTRENTRY
1.148     maxv     1533:        jmp     handle_syscall
1.71      uebayasi 1534: IDTVEC_END(osyscall)
1.148     maxv     1535:        TEXT_USER_END
                   1536:
                   1537: /*
                   1538:  * Return to userland via 'sysret'.
                   1539:  */
                   1540:        TEXT_USER_BEGIN
                   1541:        _ALIGN_TEXT
                   1542: LABEL(syscall_sysret)
1.190     maxv     1543:        KMSAN_LEAVE
1.181     maxv     1544:        MDS_LEAVE
1.148     maxv     1545:        SVS_LEAVE
1.160     maxv     1546:        IBRS_LEAVE
1.148     maxv     1547:        INTR_RESTORE_GPRS
                   1548:        SWAPGS
1.176     cherry   1549: #ifndef XENPV
1.148     maxv     1550:        movq    TF_RIP(%rsp),%rcx       /* %rip for sysret */
                   1551:        movq    TF_RFLAGS(%rsp),%r11    /* %flags for sysret */
                   1552:        movq    TF_RSP(%rsp),%rsp
                   1553:        sysretq
                   1554: #else
                   1555:        addq    $TF_RIP,%rsp
                   1556:        pushq   $256    /* VGCF_IN_SYSCALL */
                   1557:        jmp     HYPERVISOR_iret
                   1558: #endif
                   1559: END(syscall_sysret)
                   1560:        TEXT_USER_END
1.26      ad       1561:
                   1562: /*
1.66      chs      1563:  * bool sse2_idlezero_page(void *pg)
1.26      ad       1564:  *
1.47      ad       1565:  * Zero a page without polluting the cache.  Preemption must be
                   1566:  * disabled by the caller. Abort if a preemption is pending.
1.66      chs      1567:  * Returns true if the page is zeroed, false if not.
1.26      ad       1568:  */
1.47      ad       1569: ENTRY(sse2_idlezero_page)
                   1570:        pushq   %rbp
                   1571:        movq    %rsp,%rbp
                   1572:        movl    $(PAGE_SIZE/64), %ecx
1.26      ad       1573:        xorq    %rax, %rax
                   1574:        .align  16
                   1575: 1:
1.191     ad       1576:        cmpl    $0, CPUVAR(RESCHED)
1.47      ad       1577:        jnz     2f
1.26      ad       1578:        movnti  %rax, 0(%rdi)
                   1579:        movnti  %rax, 8(%rdi)
                   1580:        movnti  %rax, 16(%rdi)
                   1581:        movnti  %rax, 24(%rdi)
                   1582:        movnti  %rax, 32(%rdi)
                   1583:        movnti  %rax, 40(%rdi)
                   1584:        movnti  %rax, 48(%rdi)
                   1585:        movnti  %rax, 56(%rdi)
1.47      ad       1586:        addq    $64, %rdi
                   1587:        decl    %ecx
1.26      ad       1588:        jnz     1b
                   1589:        sfence
1.47      ad       1590:        incl    %eax
                   1591:        popq    %rbp
1.190     maxv     1592:        KMSAN_INIT_RET(1)
1.26      ad       1593:        ret
1.47      ad       1594: 2:
1.26      ad       1595:        sfence
1.47      ad       1596:        popq    %rbp
1.190     maxv     1597:        KMSAN_INIT_RET(1)
1.26      ad       1598:        ret
1.71      uebayasi 1599: END(sse2_idlezero_page)
1.66      chs      1600:
                   1601: /*
                   1602:  * void pagezero(vaddr_t va)
                   1603:  *
1.192     ad       1604:  * Zero a page.
1.66      chs      1605:  */
                   1606: ENTRY(pagezero)
1.192     ad       1607:        pushq   %rbp
                   1608:        movq    %rsp,%rbp
                   1609:        movq    $(PAGE_SIZE / 8),%rcx
1.66      chs      1610:        xorq    %rax,%rax
1.192     ad       1611:        rep
                   1612:        stosq
                   1613:        leave
1.66      chs      1614:        ret
1.71      uebayasi 1615: END(pagezero)
1.129     maxv     1616:
1.148     maxv     1617:        TEXT_USER_BEGIN
                   1618:
1.157     maxv     1619: /*
                   1620:  * In intrfastexit, we advance %rsp at the beginning. We then access the
                   1621:  * segment registers in the trapframe with TF_BACKW (backwards). See the
                   1622:  * documentation in amd64_trap.S for an explanation.
                   1623:  */
                   1624:
1.159     maxv     1625: #define TF_BACKW(val, reg)     (val - (TF_REGSIZE+16))(reg)
1.157     maxv     1626:
1.148     maxv     1627:        _ALIGN_TEXT
1.184     maxv     1628:        .type intrfastexit,@function
1.148     maxv     1629: LABEL(intrfastexit)
1.147     maxv     1630:        NOT_XEN(cli;)
1.190     maxv     1631:        KMSAN_LEAVE
1.185     maxv     1632:
                   1633:        testb   $SEL_UPL,TF_CS(%rsp)
                   1634:        jz      .Lkexit
                   1635:
1.181     maxv     1636:        MDS_LEAVE
1.147     maxv     1637:        SVS_LEAVE
1.160     maxv     1638:        IBRS_LEAVE
1.129     maxv     1639:        INTR_RESTORE_GPRS
1.159     maxv     1640:        addq    $(TF_REGSIZE+16),%rsp   /* iret frame */
1.185     maxv     1641:        SWAPGS
1.157     maxv     1642:
                   1643:        cmpw    $LSEL(LUCODE_SEL, SEL_UPL),TF_BACKW(TF_CS, %rsp)
1.185     maxv     1644:        je      do_iret
1.157     maxv     1645:        cmpw    $GSEL(GUCODE_SEL, SEL_UPL),TF_BACKW(TF_CS, %rsp)
1.185     maxv     1646:        je      do_iret
1.176     cherry   1647: #ifdef XENPV
1.157     maxv     1648:        cmpw    $FLAT_RING3_CS64,TF_BACKW(TF_CS, %rsp)
1.185     maxv     1649:        je      do_iret
1.139     maxv     1650: #endif
1.129     maxv     1651:
1.138     maxv     1652: do_mov_es:
1.157     maxv     1653:        movw    TF_BACKW(TF_ES, %rsp),%es
1.138     maxv     1654: do_mov_ds:
1.157     maxv     1655:        movw    TF_BACKW(TF_DS, %rsp),%ds
1.138     maxv     1656: do_mov_fs:
1.157     maxv     1657:        movw    TF_BACKW(TF_FS, %rsp),%fs
1.176     cherry   1658: #ifndef XENPV
1.138     maxv     1659: do_mov_gs:
1.157     maxv     1660:        movw    TF_BACKW(TF_GS, %rsp),%gs
1.137     maxv     1661: #endif
1.134     maxv     1662:
1.185     maxv     1663: do_iret:
                   1664:        iretq
1.129     maxv     1665:
                   1666: .Lkexit:
1.185     maxv     1667:        INTR_RESTORE_GPRS
                   1668:        addq    $(TF_REGSIZE+16),%rsp   /* iret frame */
1.129     maxv     1669:        iretq
                   1670: END(intrfastexit)
1.152     maxv     1671:
                   1672:        TEXT_USER_END
                   1673:
1.202     maxv     1674:        .section .rodata
                   1675:
1.203     maxv     1676:        /*
                   1677:         * Hotpatch templates.
                   1678:         */
                   1679:
                   1680: LABEL(hp_nolock)
                   1681:        nop
                   1682: LABEL(hp_nolock_end)
                   1683:
                   1684: LABEL(hp_retfence)
                   1685:        lfence
                   1686: LABEL(hp_retfence_end)
                   1687:
                   1688: LABEL(hp_clac)
                   1689:        clac
                   1690: LABEL(hp_clac_end)
                   1691:
                   1692: LABEL(hp_stac)
                   1693:        stac
                   1694: LABEL(hp_stac_end)
                   1695:
1.152     maxv     1696: #ifdef SVS
                   1697: LABEL(svs_enter)
1.164     joerg    1698:        movabs  SVS_UTLS+UTLS_KPDIRPA,%rax
1.152     maxv     1699:        movq    %rax,%cr3
                   1700:        movq    CPUVAR(KRSP0),%rsp
                   1701: LABEL(svs_enter_end)
                   1702:
                   1703: LABEL(svs_enter_altstack)
                   1704:        testb   $SEL_UPL,TF_CS(%rsp)
                   1705:        jz      1234f
1.164     joerg    1706:        movabs  SVS_UTLS+UTLS_KPDIRPA,%rax
1.152     maxv     1707:        movq    %rax,%cr3
                   1708: 1234:
                   1709: LABEL(svs_enter_altstack_end)
                   1710:
1.170     maxv     1711: LABEL(svs_enter_nmi)
                   1712:        movq    %cr3,%rax
                   1713:        movq    %rax,(FRAMESIZE+1*8)(%rsp)      /* nmistore->scratch */
                   1714:        movq    (FRAMESIZE+0*8)(%rsp),%rax      /* nmistore->cr3 */
                   1715:        movq    %rax,%cr3
                   1716: LABEL(svs_enter_nmi_end)
                   1717:
1.152     maxv     1718: LABEL(svs_leave)
                   1719:        movq    CPUVAR(URSP0),%rsp
                   1720:        movq    CPUVAR(UPDIRPA),%rax
                   1721:        movq    %rax,%cr3
                   1722: LABEL(svs_leave_end)
                   1723:
                   1724: LABEL(svs_leave_altstack)
                   1725:        testb   $SEL_UPL,TF_CS(%rsp)
                   1726:        jz      1234f
                   1727:        movq    CPUVAR(UPDIRPA),%rax
                   1728:        movq    %rax,%cr3
                   1729: 1234:
                   1730: LABEL(svs_leave_altstack_end)
1.153     maxv     1731:
1.170     maxv     1732: LABEL(svs_leave_nmi)
                   1733:        movq    (FRAMESIZE+1*8)(%rsp),%rax      /* nmistore->scratch */
                   1734:        movq    %rax,%cr3
                   1735: LABEL(svs_leave_nmi_end)
1.152     maxv     1736: #endif
1.160     maxv     1737:
                   1738:        /* IBRS <- 1 */
                   1739: LABEL(ibrs_enter)
                   1740:        movl    $MSR_IA32_SPEC_CTRL,%ecx
1.185     maxv     1741:        rdmsr
                   1742:        orl     $IA32_SPEC_CTRL_IBRS,%eax
1.160     maxv     1743:        wrmsr
                   1744: LABEL(ibrs_enter_end)
                   1745:
                   1746:        /* IBRS <- 0 */
                   1747: LABEL(ibrs_leave)
                   1748:        movl    $MSR_IA32_SPEC_CTRL,%ecx
1.185     maxv     1749:        rdmsr
                   1750:        andl    $~IA32_SPEC_CTRL_IBRS,%eax
1.160     maxv     1751:        wrmsr
                   1752: LABEL(ibrs_leave_end)
1.162     maxv     1753:
                   1754: LABEL(noibrs_enter)
                   1755:        NOIBRS_ENTER
                   1756: LABEL(noibrs_enter_end)
                   1757:
                   1758: LABEL(noibrs_leave)
                   1759:        NOIBRS_LEAVE
                   1760: LABEL(noibrs_leave_end)
1.181     maxv     1761:
                   1762: LABEL(mds_leave)
                   1763:        pushq   $GSEL(GDATA_SEL, SEL_KPL)
                   1764:        verw    (%rsp)
                   1765:        addq    $8,%rsp
                   1766: LABEL(mds_leave_end)
                   1767:
                   1768: LABEL(nomds_leave)
                   1769:        NOMDS_LEAVE
                   1770: LABEL(nomds_leave_end)

CVSweb <webmaster@jp.NetBSD.org>