[BACK]Return to locore.S CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / arch / amd64 / amd64

File: [cvs.NetBSD.org] / src / sys / arch / amd64 / amd64 / locore.S (download)

Revision 1.193, Tue Dec 10 02:06:07 2019 UTC (4 years, 3 months ago) by manu
Branch: MAIN
Changes since 1.192: +758 -1 lines

Add multiboot 2 support to amd64 kernel

/*	$NetBSD: locore.S,v 1.193 2019/12/10 02:06:07 manu Exp $	*/

/*
 * Copyright-o-rama!
 */

/*
 * Copyright (c) 1998, 2000, 2007, 2008, 2016 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum and by Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2007 Manuel Bouyer.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

/*
 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * Copyright (c) 2001 Wasabi Systems, Inc.
 * All rights reserved.
 *
 * Written by Frank van der Linden for Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed for the NetBSD Project by
 *      Wasabi Systems, Inc.
 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
 *    or promote products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)locore.s	7.3 (Berkeley) 5/13/91
 */

/* Override user-land alignment before including asm.h */
#define	ALIGN_DATA	.align	8
#define ALIGN_TEXT	.align 16,0x90
#define _ALIGN_TEXT	ALIGN_TEXT

#include <machine/asm.h>

#include "opt_copy_symtab.h"
#include "opt_ddb.h"
#include "opt_ddbparam.h"
#include "opt_modular.h"
#include "opt_realmem.h"

#include "opt_compat_netbsd.h"
#include "opt_compat_netbsd32.h"
#include "opt_multiboot.h"
#include "opt_xen.h"
#include "opt_svs.h"

#include "assym.h"
#include "lapic.h"
#include "ioapic.h"
#include "ksyms.h"

#include <sys/errno.h>
#include <sys/syscall.h>

#include <machine/pte.h>
#include <machine/segments.h>
#include <machine/specialreg.h>
#include <machine/trap.h>
#include <machine/bootinfo.h>
#include <machine/frameasm.h>
#include <machine/cputypes.h>

#ifndef XENPV
#include <arch/i386/include/multiboot.h>
#endif 

#define CODE_SEGMENT	0x08
#define DATA_SEGMENT	0x10

#if NLAPIC > 0
#include <machine/i82489reg.h>
#endif

/* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
#include <dev/isa/isareg.h>

#define	_RELOC(x)	((x) - KERNBASE)
#define	RELOC(x)	_RELOC(_C_LABEL(x))

/* 32bit version of PTE_NX */
#define PTE_NX32	0x80000000

#if L2_SLOT_KERNBASE > 0
#define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
#else
#define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
#endif

#if L3_SLOT_KERNBASE > 0
#define TABLE_L3_ENTRIES (2 * NKL3_KIMG_ENTRIES)
#else
#define TABLE_L3_ENTRIES NKL3_KIMG_ENTRIES
#endif

#define PROC0_PML4_OFF	0
#define PROC0_STK_OFF	(PROC0_PML4_OFF + 1 * PAGE_SIZE)
#define PROC0_PTP3_OFF	(PROC0_STK_OFF + UPAGES * PAGE_SIZE)
#define PROC0_PTP2_OFF	(PROC0_PTP3_OFF + NKL4_KIMG_ENTRIES * PAGE_SIZE)
#define PROC0_PTP1_OFF	(PROC0_PTP2_OFF + TABLE_L3_ENTRIES * PAGE_SIZE)
#define TABLESIZE \
  ((NKL4_KIMG_ENTRIES + TABLE_L3_ENTRIES + TABLE_L2_ENTRIES + 1 + UPAGES) \
    * PAGE_SIZE)

/* Amount of VA used to map the kernel, the syms and the preloaded modules */
#define BOOTMAP_VA_SIZE \
	(NKL2_KIMG_ENTRIES * (1 << L2_SHIFT) - TABLESIZE - IOM_SIZE)

/*
 * fillkpt - Fill in a kernel page table
 *	eax = pte (page frame | control | status)
 *	ebx = page table address
 *	ecx = number of pages to map
 *
 * Each entry is 8 (PDE_SIZE) bytes long: we must set the 4 upper bytes to 0.
 */
#define fillkpt	\
	cmpl	$0,%ecx			;	/* zero-sized? */	\
	je 	2f			; \
1:	movl	$0,(PDE_SIZE-4)(%ebx)	;	/* upper 32 bits: 0 */	\
	movl	%eax,(%ebx)		;	/* store phys addr */	\
	addl	$PDE_SIZE,%ebx		;	/* next PTE/PDE */	\
	addl	$PAGE_SIZE,%eax		;	/* next phys page */	\
	loop	1b			; \
2:					;

/*
 * fillkpt_nox - Same as fillkpt, but sets the NX/XD bit.
 */
#define fillkpt_nox \
	cmpl	$0,%ecx			;	/* zero-sized? */	\
	je 	2f			; \
	pushl	%ebp			; \
	movl	RELOC(nox_flag),%ebp	; \
1:	movl	%ebp,(PDE_SIZE-4)(%ebx)	;	/* upper 32 bits: NX */ \
	movl	%eax,(%ebx)		;	/* store phys addr */	\
	addl	$PDE_SIZE,%ebx		;	/* next PTE/PDE */	\
	addl	$PAGE_SIZE,%eax		;	/* next phys page */	\
	loop	1b			; \
	popl	%ebp			; \
2:					;

/*
 * fillkpt_blank - Fill in a kernel page table with blank entries
 *	ebx = page table address
 *	ecx = number of pages to map
 */
#define fillkpt_blank	\
	cmpl	$0,%ecx			;	/* zero-sized? */	\
	je 	2f			; \
1:	movl	$0,(PDE_SIZE-4)(%ebx)	;	/* upper 32 bits: 0 */	\
	movl	$0,(%ebx)		;	/* lower 32 bits: 0 */	\
	addl	$PDE_SIZE,%ebx		;	/* next PTE/PDE */	\
	loop	1b			; \
2:					;

/*
 * killkpt - Destroy a kernel page table (long mode)
 *	rbx = page table address
 *	rcx = number of pages to destroy
 */
#define killkpt \
1:	movq	$0,(%rbx)	; \
	addq	$PDE_SIZE,%rbx	; \
	loop	1b		;


#ifdef XEN
#define __ASSEMBLY__
#include <xen/include/public/elfnote.h>
#include <xen/include/public/xen.h>
#endif	/* XEN */

#ifdef XENPV	
#define ELFNOTE(name, type, desctype, descdata...) \
.pushsection .note.name			;	\
  .align 4				;	\
  .long 2f - 1f		/* namesz */	;	\
  .long 4f - 3f		/* descsz */	;	\
  .long type				;	\
1:.asciz #name				;	\
2:.align 4				;	\
3:desctype descdata			;	\
4:.align 4				;	\
.popsection

/*
 * Xen guest identifier and loader selection
 */
.section __xen_guest
	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz, "NetBSD")
	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz, "4.99")
	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz, "xen-3.0")
	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .quad,  KERNBASE)
	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .quad,  KERNBASE)
	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .quad,  start)
	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad,  hypercall_page)
	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .quad,  HYPERVISOR_VIRT_START)
	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz, "")
	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "yes")
	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .long,  PTE_P, PTE_P)\
	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz, "generic")     
	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long,  0)
#if NKSYMS > 0 || defined(DDB) || defined(MODULAR)
	ELFNOTE(Xen, XEN_ELFNOTE_BSD_SYMTAB,     .asciz, "yes")
#endif
#endif  /* XENPV */	

/*
 * Initialization
 */
	.data

	.globl	_C_LABEL(tablesize)
	.globl	_C_LABEL(nox_flag)
	.globl	_C_LABEL(cputype)
	.globl	_C_LABEL(cpuid_level)
	.globl	_C_LABEL(esym)
	.globl	_C_LABEL(eblob)
	.globl	_C_LABEL(atdevbase)
	.globl	_C_LABEL(PDPpaddr)
	.globl	_C_LABEL(boothowto)
	.globl	_C_LABEL(bootinfo)
	.globl	_C_LABEL(biosbasemem)
	.globl	_C_LABEL(biosextmem)
	.globl	_C_LABEL(lwp0uarea)
	.globl	do_mov_es
	.globl	do_mov_ds
	.globl	do_mov_fs
	.globl	do_mov_gs
	.globl	do_iret

	.type	_C_LABEL(tablesize), @object
_C_LABEL(tablesize):	.long	TABLESIZE
END(tablesize)
	.type	_C_LABEL(nox_flag), @object
LABEL(nox_flag)		.long	0	/* 32bit NOX flag, set if supported */
END(nox_flag)
	.type	_C_LABEL(cputype), @object
LABEL(cputype)		.long	0	/* are we 80486, Pentium, or.. */
END(cputype)
	.type	_C_LABEL(cpuid_level), @object
LABEL(cpuid_level)	.long	-1	/* max. level accepted by cpuid instr */
END(cpuid_level)
	.type	_C_LABEL(esym), @object
LABEL(esym)		.quad	0	/* ptr to end of syms */
END(esym)
	.type	_C_LABEL(eblob), @object
LABEL(eblob)		.quad	0	/* ptr to end of modules */
END(eblob)
	.type	_C_LABEL(atdevbase), @object
LABEL(atdevbase)	.quad	0	/* location of start of iomem in virt */
END(atdevbase)
	.type	_C_LABEL(PDPpaddr), @object
LABEL(PDPpaddr)		.quad	0	/* paddr of PTD, for libkvm */
END(PDPpaddr)
	.type	_C_LABEL(biosbasemem), @object
#ifndef REALBASEMEM
LABEL(biosbasemem)	.long	0	/* base memory reported by BIOS */
#else
LABEL(biosbasemem)	.long	REALBASEMEM
#endif
END(biosbasemem)
	.type	_C_LABEL(biosextmem), @object
#ifndef REALEXTMEM
LABEL(biosextmem)	.long	0	/* extended memory reported by BIOS */
#else
LABEL(biosextmem)	.long	REALEXTMEM
#endif
END(biosextmem)
	.type	_C_LABEL(lwp0uarea), @object
LABEL(lwp0uarea)	.quad	0
END(lwp0uarea)

#ifndef XENPV
	.globl	gdt64_lo
	.globl	gdt64_hi

#define GDT64_LIMIT gdt64_end-gdt64_start-1
/* Temporary gdt64, with base address in low memory */
	.type	_C_LABEL(gdt64_lo), @object
LABEL(gdt64_lo)
	.word	GDT64_LIMIT
	.quad	_RELOC(gdt64_start)
END(gdt64_lo)
.align 64

/* Temporary gdt64, with base address in high memory */
	.type	_C_LABEL(gdt64_hi), @object
LABEL(gdt64_hi)
	.word	GDT64_LIMIT
	.quad	gdt64_start
END(gdt64_hi)
.align 64
#undef GDT64_LIMIT

	.type	_C_LABEL(gdt64_start), @object
_C_LABEL(gdt64_start):
	.quad 0x0000000000000000	/* always empty */
	.quad 0x00af9a000000ffff	/* kernel CS */
	.quad 0x00cf92000000ffff	/* kernel DS */
END(gdt64_start)
gdt64_end:

	.type	_C_LABEL(farjmp64), @object
_C_LABEL(farjmp64):
	.long	_RELOC(longmode)
	.word	GSEL(GCODE_SEL, SEL_KPL)
END(farjmp64)

#endif	/* !XENPV */

	/* Space for the temporary stack */
	.size	tmpstk, tmpstk - .
	.space	512
tmpstk:

.section multiboot,"ax",@progbits
#if defined(MULTIBOOT)
	.align	8
	.globl	Multiboot2_Header
_C_LABEL(Multiboot2_Header):
	.int	MULTIBOOT2_HEADER_MAGIC
	.int	MULTIBOOT2_ARCHITECTURE_I386
	.int	Multiboot2_Header_end - Multiboot2_Header
	.int	-(MULTIBOOT2_HEADER_MAGIC + MULTIBOOT2_ARCHITECTURE_I386 \
		+ (Multiboot2_Header_end - Multiboot2_Header))

	.int	1	/* MULTIBOOT_HEADER_TAG_INFORMATION_REQUEST */
	.int	12	/* sizeof(multiboot_header_tag_information_request) */
			/* + sizeof(uint32_t) * requests */
	.int	4	/* MULTIBOOT_TAG_TYPE_BASIC_MEMINFO */
	.align	8

	.int	3	/* MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS */
	.int	16	/* sizeof(struct multiboot_tag_efi64) */
	.quad	(multiboot2_entry - KERNBASE)
	.align	8

	.int	9	/* MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS_EFI64 */
	.int	16	/* sizeof(struct multiboot_tag_efi64) */
	.quad	(multiboot2_entry - KERNBASE)
	.align	8

#if notyet
	/*
	 * Could be used to get an early console for debug,
	 * but this is broken.
	 */
	.int	7	/* MULTIBOOT_HEADER_TAG_EFI_BS */
	.int	8	/* sizeof(struct multiboot_tag) */
	.align	8
#endif

	.int	0	/* MULTIBOOT_HEADER_TAG_END */
	.int	8	/* sizeof(struct multiboot_tag) */
	.align	8
	.globl	Multiboot2_Header_end
_C_LABEL(Multiboot2_Header_end):
#endif	/* MULTIBOOT */

/*
 * Some hackage to deal with 64bit symbols in 32 bit mode.
 * This may not be needed if things are cleaned up a little.
 */

	.text
	.globl	_C_LABEL(kernel_text)
	.set	_C_LABEL(kernel_text),KERNTEXTOFF

ENTRY(start)
#ifndef XENPV
	.code32

	/* Warm boot */
	movw	$0x1234,0x472

#if defined(MULTIBOOT)
	jmp	.Lnative_loader


multiboot2_entry:
	.code64
	/*
	 * multiboot2 entry point. We are left here without
	 * stack and with no idea of where we were loaded in memory.
	 * The only inputs are
	 * %eax MULTIBOOT2_BOOTLOADER_MAGIC
	 * %ebx pointer to multiboot_info
	 *
	 * Here we will:
	 * - copy the kernel to 0x200000 (KERNTEXTOFF - KERNBASE)
	 *	as almost all the code in locore.S assume it is there. 
	 *	This is derived from 
	 *	src/sys/arch/i386/stand/efiboot/bootx64/startprog64.S
	 * - copy multiboot_info, as done in multiboot_pre_reloc() from
	 *	src/sys/arch/x86/x86/multiboot2.c
	 *	Unfortunately we cannot call that function as there is 
	 *	no simple way to build it as 32 bit code in a 64 bit kernel.
	 * - Copy ELF symbols, also as in multiboot_pre_reloc()
	 */

	cli

	/*
	 * Discover our load address and use it to get start address
	 */
	mov	$_RELOC(tmpstk),%rsp
	call	next
next:	pop	%r8
	sub	$(next - start), %r8

	/*
	 * Save multiboot_info for later. We cannot use	
	 * temporary stack for that since we are going to
	 * overwrite it.
	 */
	movl	%ebx, (multiboot2_info_ptr - start)(%r8)

	/*
	 * Get relocated multiboot2_loader entry point in %r9
	 */
	mov	$(KERNTEXTOFF - KERNBASE), %r9
	add	$(multiboot2_loader - kernel_text), %r9

	/* Copy kernel */
	mov	$(KERNTEXTOFF - KERNBASE), %rdi			/* dest */
	mov	%r8, %rsi		
	sub	$(start - kernel_text), %rsi			/* src */
	mov	$(__kernel_end - kernel_text), %rcx		/* size *.
	mov	%rcx, %r12		
	movq	%rdi, %r11		/* for misaligned check */

#if !defined(NO_OVERLAP)
	movq	%rdi, %r13
	subq	%rsi, %r13
#endif

	shrq	$3, %rcx		/* count for copy by words */
	jz	8f			/* j if less than 8 bytes */

	lea	-8(%rdi, %r12), %r14	/* target address of last 8 */
	mov	-8(%rsi, %r12), %r15	/* get last word */
#if !defined(NO_OVERLAP)
	cmpq	%r12, %r13		/* overlapping? */
	jb	10f
#endif

/*
 * Non-overlaping, copy forwards.
 * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
 * if %ecx is more than 76.
 * AMD might do something similar some day.
 */
	and	$7, %r11		/* destination misaligned ? */
	jnz	12f
	rep
	movsq
	mov	%r15, (%r14)		/* write last word */
	jmp	.Lcopy_done

/*
 * Destination misaligned
 * AMD say it is better to align the destination (not the source).
 * This will also re-align copies if the source and dest are both
 * misaligned by the same amount)
 * (I think Nehalem will use its accelerated copy if the source
 * and destination have the same alignment.)
 */
12:
	lea	-9(%r11, %r12), %rcx	/* post re-alignment count */
	neg	%r11			/* now -1 .. -7 */
	mov	(%rsi), %r12		/* get first word */
	mov	%rdi, %r13		/* target for first word */
	lea	8(%rsi, %r11), %rsi
	lea	8(%rdi, %r11), %rdi
	shr	$3, %rcx
	rep
	movsq
	mov	%r12, (%r13)		/* write first word */
	mov	%r15, (%r14)		/* write last word */
	jmp	.Lcopy_done

#if !defined(NO_OVERLAP)
/* Must copy backwards.
 * Reverse copy is probably easy to code faster than 'rep movds'
 * since that requires (IIRC) an extra clock every 3 iterations (AMD).
 * However I don't suppose anything cares that much!
 * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
 * The copy is aligned with the buffer start (more likely to
 * be a multiple of 8 than the end).
 */
10:
	lea	-8(%rsi, %rcx, 8), %rsi
	lea	-8(%rdi, %rcx, 8), %rdi
	std
	rep
	movsq
	cld
	mov	%r15, (%r14)	/* write last bytes */
	jmp	.Lcopy_done
#endif

/* Less than 8 bytes to copy, copy by bytes */
/* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
 * For longer transfers it is 50+ !
 */
8:	mov	%r12, %rcx

#if !defined(NO_OVERLAP)
	cmpq	%r12, %r13	/* overlapping? */
	jb	81f
#endif

	/* nope, copy forwards. */
	rep
	movsb
	jmp	.Lcopy_done

#if !defined(NO_OVERLAP)
/* Must copy backwards */
81:
	lea	-1(%rsi, %rcx), %rsi
	lea	-1(%rdi, %rcx), %rdi
	std
	rep
	movsb
	cld
#endif
	/* End of copy kernel */
.Lcopy_done:

	mov	%r8, %rdi	/* %rdi: loaded start address */
	mov	%r9, %rsi	/* %rsi: kernel entry address */

	/* Prepare jump address */
	lea	(multiboot2_loader32a - start)(%rdi), %rax
	movl	%eax, (multiboot2_loader32r - start)(%rdi)

	/* Setup GDT */
	lea	(gdt - start)(%rdi), %rax
	mov	%rax, (gdtrr - start)(%rdi)
	lgdt	(gdtr - start)(%rdi)

	/* Jump to set %cs */
	ljmp	*(multiboot2_loader32r - start)(%rdi)

	.align	4
	.code32
multiboot2_loader32a:
	movl	$DATA_SEGMENT, %eax
	movw	%ax, %ds
	movw	%ax, %es
	movw	%ax, %fs
	movw	%ax, %gs
	movw	%ax, %ss

	/* Already set new stack pointer */
	movl	%esp, %ebp

	/* Disable Paging in CR0 */
	movl	%cr0, %eax
	andl	$(~CR0_PG), %eax
	movl	%eax, %cr0

	/* Disable PAE in CR4 */
	movl	%cr4, %eax
	andl	$(~CR4_PAE), %eax
	movl	%eax, %cr4

	jmp	multiboot2_loader32b

	.align	4
multiboot2_loader32b:
	xor	%eax, %eax

	/* 
	* Reload multiboot info from target location
	*/	
	movl	_RELOC(multiboot2_info_ptr), %ebx
	call	*%esi

	.align	16
multiboot2_loader32r:
	.long	0
	.long	CODE_SEGMENT
	.align	16
gdt:
	.long	0, 0
	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
gdtr:
	.word	gdtr - gdt
gdtrr:
	.quad	0

multiboot2_info_ptr:
	.long	0
	
	.align 16
multiboot2_loader:
	/*
	 * Here we would like to call multiboot2_pre_reloc() but
	 * we do not yet run in long mode, which means we need
	 * a 32 bit version of that function. Unfortunately, 
	 * mixing 32-bit and 64-bit object file at link time
	 * does not work. As a result, we need to do the job
	 * of multiboot2_pre_reloc() here in assembly.
	 */
#if multiboot2_pre_reloc_would_be_built_as_ia32
	movl	$_RELOC(tmpstk),%esp
	mov	%ebx,%edi	/* Address of Multiboot information */
	call	_C_LABEL(multiboot2_pre_reloc)
#else
	/*
	 * Copy multiboot_info
	 */
	movl	$_RELOC(multiboot_info),%edi
	movl	%ebx,%esi
	movl	(%ebx),%ecx
	shr	$2,%ecx
	rep
	movsl

	/*
	 * Set multiboot2_enabled
	 */
	movl	$1,%eax
	movl	%eax,RELOC(multiboot2_enabled)

	/*
	 * Look for MULTIBOOT_TAG_TYPE_ELF_SECTIONS
	 */
	movl	$_RELOC(multiboot_info),%esi
	movl	(%esi),%ecx	/* multiboot_info size */
	movl	%esi,%edx
	addl	%ecx,%edx	/* %edx: end of multiboot_info */
	addl	$8,%esi		/* skip two words of multiboot_info header */
mbt_loop:
	movl	(%esi),%ebx	/* mbt->type */
	cmpl	$9,%ebx		/* 9 for MULTIBOOT_TAG_TYPE_ELF_SECTIONS */
	je	found_elf_sections

	movl	4(%esi),%eax	/* mbt->size */
	addl	%eax,%esi
	addl	$7,%esi		/* roundup(%esi,8) */
	andl	$~7,%esi

	cmpl	%edx,%esi
	jle	mbt_loop
	jmp	elf_sections_done

found_elf_sections:
	movl	$0,%eax
	movl	%esp,%ebp			/* %ebp is esymp */
	push	%eax
	push	$KERNBASE_LO			/* kernbase */
	push	$_RELOC(end)			/* void *end */
	push	%ebp				/* int **esymp */
	push	$_RELOC(has_syms)		/* bool *has_symsp */
	push	$_RELOC(Multiboot_Symbols)/* struct multiboot_symbol *ms */
	push	%esi		/* struct multiboot_tag_elf_sections *mbt_elf */
	call	multiboot2_copy_syms32

	/* Asjust esym as a 64 bit pointer if esymp was set */
	movl	(%ebp),%eax
	testl	%eax,%eax		/* esymp = NULL? */
	jz	elf_sections_done

	movl	$RELOC(esym),%ebp
	movl	%eax,(%ebp)
	movl	$KERNBASE_HI,4(%ebp)

	jmp	elf_sections_done

	/*
	 * This is multiboot2_copy_syms() from 
	 * src/sys/arch/x86/x86/multiboot2.c
	 * built with -m32 -mcmodel=32 -D_LOCORE_64
	 */
multiboot2_copy_syms32:
	push	%ebp
	mov	%esp,%ebp
	push	%edi
	push	%esi
	push	%ebx
	sub	$0x20,%esp
	mov	0x8(%ebp),%esi
	/* for (i = 0; i < mbt_elf->num && symtabp == NULL && */
	mov	0x8(%esi),%ebx
	test	%ebx,%ebx
	je	copy_syms_4ce
	add	$0x14,%esi
	mov	%esi,%eax
	xor	%edx,%edx
	jmp	copy_syms_3a0
copy_syms_395:
	cmp	%edx,%ebx
	jbe	copy_syms_4ce
copy_syms_39d:
	add	$0x40,%eax
copy_syms_3a0:
	add	$0x1,%edx
	/* 	if ((shdrp->sh_type == SHT_SYMTAB) && */
	cmpl	$0x2,0x4(%eax)
	jne	copy_syms_395
	/* 		shdrp->sh_link != SHN_UNDEF) { */
	mov	0x28(%eax),%ecx
	/* 	if ((shdrp->sh_type == SHT_SYMTAB) && */
	test	%ecx,%ecx
	je	copy_syms_395
	/* 			[shdrp->sh_link]; */
	shl	$0x6,%ecx
	/* 		shdrp2 = &((locore_Elf_Shdr *)mbt_elf->sections) */
	add	%esi,%ecx
	/* 		if (shdrp2->sh_type == SHT_STRTAB) { */
	cmpl	$0x3,0x4(%ecx)
	jne	copy_syms_395
	/* for (i = 0; i < mbt_elf->num && symtabp == NULL && */
	cmp	%ebx,%edx
	jae	copy_syms_6d1
	test	%eax,%eax
	je	copy_syms_608
	/* if (symtabp == NULL || strtabp == NULL) */
copy_syms_3cb:
	test	%ecx,%ecx
	lea	0x0(%esi),%esi
	je	copy_syms_4ce
	/* symaddr = symtabp->sh_addr; */
	mov	0x10(%eax),%edi
	mov	%edi,-0x10(%ebp)
	mov	0x14(%eax),%ebx
	mov	%ebx,-0x18(%ebp)
	/* straddr = strtabp->sh_addr; */
	mov	0x10(%ecx),%esi
	mov	%esi,-0x14(%ebp)
	mov	0x14(%ecx),%ebx
	mov	%ebx,-0x20(%ebp)
	/* symsize = symtabp->sh_size; */
	mov	0x20(%eax),%ebx
	/* strsize = strtabp->sh_size; */
	mov	0x20(%ecx),%eax
	mov	%eax,-0x1c(%ebp)
	cmp	0x18(%ebp),%edi
	jae	copy_syms_4d6
	cmp	%esi,0x18(%ebp)
	ja	copy_syms_4e0
	jae	copy_syms_54d
	/* cp2dst = (locore_Elf_Addr)(uintptr_t)endp + cp1size; */
copy_syms_40f:
	mov	-0x1c(%ebp),%ecx
	mov	%ecx,%eax
	xor	%edx,%edx
	/* cp1dst = (locore_Elf_Addr)(uintptr_t)endp; */
	mov	0x18(%ebp),%esi
	xor	%edi,%edi
	/* cp2dst = (locore_Elf_Addr)(uintptr_t)endp + cp1size; */
	add	%esi,%eax
	adc	%edi,%edx
	mov	%eax,-0x2c(%ebp)
	mov	%edx,-0x28(%ebp)
	/* (void)memcpy((void *)(uintptr_t)cp1dst, */
	mov	%ecx,%eax
	mov	0x18(%ebp),%edi
	mov	-0x14(%ebp),%esi
	cmp	$0x4,%ecx
	jae	copy_syms_5e8
copy_syms_436:
	test	$0x2,%al
	je	copy_syms_43c
	movsw	%ds:(%esi),%es:(%edi)
copy_syms_43c:
	test	$0x1,%al
	je	copy_syms_441
	movsb	%ds:(%esi),%es:(%edi)
	/* (void)memcpy((void *)(uintptr_t)cp2dst, */
copy_syms_441:
	mov	%ebx,%eax
	mov	0x18(%ebp),%edi
	mov	-0x1c(%ebp),%esi
	add	%esi,%edi
	mov	-0x10(%ebp),%esi
	cmp	$0x4,%ebx
	jae	copy_syms_5c4
copy_syms_457:
	test	$0x2,%al
	je	copy_syms_45d
	movsw	%ds:(%esi),%es:(%edi)
copy_syms_45d:
	test	$0x1,%al
	je	copy_syms_462
	movsb	%ds:(%esi),%es:(%edi)
	/* symstart = (cp1src == symaddr) ? cp1dst : cp2dst; */
copy_syms_462:
	mov	-0x18(%ebp),%edx
	mov	-0x20(%ebp),%edi
	xor	%edi,%edx
	mov	-0x10(%ebp),%eax
	mov	-0x14(%ebp),%ecx
	xor	%ecx,%eax
	or	%eax,%edx
	je	copy_syms_6ba
	mov	-0x2c(%ebp),%eax
	mov	%eax,-0x24(%ebp)
	mov	%ecx,-0x10(%ebp)
	mov	%edi,-0x18(%ebp)
	/* strstart = (cp1src == straddr) ? cp1dst : cp2dst; */
copy_syms_486:
	mov	-0x20(%ebp),%edx
	xor	-0x18(%ebp),%edx
	mov	-0x14(%ebp),%eax
	xor	-0x10(%ebp),%eax
	or	%eax,%edx
	je	copy_syms_545
copy_syms_49a:
	mov	-0x2c(%ebp),%esi
	/* ms->s_symstart = symstart + kernbase; */
copy_syms_49d:
	mov	-0x24(%ebp),%eax
	add	0x1c(%ebp),%eax
	mov	0xc(%ebp),%edi
	mov	%eax,(%edi)
	/* ms->s_symsize	= symsize; */
	mov	%edi,%eax
	mov	%ebx,0x4(%edi)
	/* ms->s_strstart = strstart + kernbase; */
	add	0x1c(%ebp),%esi
	mov	%esi,0x8(%edi)
	/* ms->s_strsize	= strsize; */
	mov	-0x1c(%ebp),%edi
	mov	%edi,0xc(%eax)
	/* *has_symsp = true; */
	mov	0x10(%ebp),%eax
	movb	$0x1,(%eax)
	/* *esymp = (int *)((uintptr_t)endp + symsize + strsize + kernbase); */
	mov	0x18(%ebp),%eax
	add	0x1c(%ebp),%eax
	add	%eax,%ebx
	add	%edi,%ebx
	mov	0x14(%ebp),%eax
	mov	%ebx,(%eax)
copy_syms_4ce:
	add	$0x20,%esp
	pop	%ebx
	pop	%esi
	pop	%edi
	pop	%ebp
	ret	
copy_syms_4d6:
	jbe	copy_syms_54d
	mov	-0x14(%ebp),%eax
	cmp	%eax,0x18(%ebp)
	jbe	copy_syms_54d
	/* cp1dst = (locore_Elf_Addr)(uintptr_t)endp; */
copy_syms_4e0:
	mov	0x18(%ebp),%eax
	mov	%eax,-0x24(%ebp)
	/* cp2dst = (locore_Elf_Addr)(uintptr_t)endp + cp1size; */
	mov	%ebx,%eax
	xor	%edx,%edx
	/* cp1dst = (locore_Elf_Addr)(uintptr_t)endp; */
	mov	0x18(%ebp),%esi
	xor	%edi,%edi
	/* cp2dst = (locore_Elf_Addr)(uintptr_t)endp + cp1size; */
	add	%esi,%eax
	adc	%edi,%edx
	mov	%eax,-0x2c(%ebp)
	mov	%edx,-0x28(%ebp)
	/* (void)memcpy((void *)(uintptr_t)cp1dst, */
	mov	%ebx,%eax
	mov	0x18(%ebp),%edi
	mov	-0x10(%ebp),%esi
	cmp	$0x4,%ebx
	jae	copy_syms_5a8
copy_syms_50a:
	test	$0x2,%al
	jne	copy_syms_57b
	test	$0x1,%al
	jne	copy_syms_578
	/* (void)memcpy((void *)(uintptr_t)cp2dst, */
copy_syms_512:
	mov	-0x1c(%ebp),%ecx
	mov	%ecx,%eax
	mov	0x18(%ebp),%edi
	add	%ebx,%edi
	mov	-0x14(%ebp),%esi
	cmp	$0x4,%ecx
	jae	copy_syms_584
copy_syms_524:
	test	$0x2,%al
	jne	copy_syms_56c
	test	$0x1,%al
	je	copy_syms_486
copy_syms_530:
	movsb	%ds:(%esi),%es:(%edi)
	/* strstart = (cp1src == straddr) ? cp1dst : cp2dst; */
	mov	-0x20(%ebp),%edx
	xor	-0x18(%ebp),%edx
	mov	-0x14(%ebp),%eax
	xor	-0x10(%ebp),%eax
	or	%eax,%edx
	jne	copy_syms_49a
copy_syms_545:
	mov	0x18(%ebp),%esi
	jmp	copy_syms_49d
	/* 	if (symaddr < straddr) { */
copy_syms_54d:
	mov	-0x20(%ebp),%edi
	cmp	%edi,-0x18(%ebp)
	jb	copy_syms_4e0
	ja	copy_syms_40f
	mov	-0x14(%ebp),%edi
	cmp	%edi,-0x10(%ebp)
	jb	copy_syms_4e0
	jmp	copy_syms_40f
	/* (void)memcpy((void *)(uintptr_t)cp2dst, */
copy_syms_56c:
	movsw	%ds:(%esi),%es:(%edi)
	test	$0x1,%al
	je	copy_syms_486
	jmp	copy_syms_530
	/* (void)memcpy((void *)(uintptr_t)cp1dst, */
copy_syms_578:
	movsb	%ds:(%esi),%es:(%edi)
	jmp	copy_syms_512
copy_syms_57b:
	movsw	%ds:(%esi),%es:(%edi)
	test	$0x1,%al
	nop
	je	copy_syms_512
	jmp	copy_syms_578
	/* (void)memcpy((void *)(uintptr_t)cp2dst, */
copy_syms_584:
	test	$0x1,%edi
	jne	copy_syms_650
copy_syms_590:
	test	$0x2,%edi
	jne	copy_syms_63c
copy_syms_59c:
	mov	%eax,%ecx
	shr	$0x2,%ecx
	rep movsl %ds:(%esi),%es:(%edi)
	jmp	copy_syms_524
	/* (void)memcpy((void *)(uintptr_t)cp1dst, */
copy_syms_5a8:
	test	$0x1,%edi
	jne	copy_syms_626
copy_syms_5b0:
	test	$0x2,%edi
	jne	copy_syms_615
copy_syms_5b8:
	mov	%eax,%ecx
	shr	$0x2,%ecx
	rep movsl %ds:(%esi),%es:(%edi)
	jmp	copy_syms_50a
	/* (void)memcpy((void *)(uintptr_t)cp2dst, */
copy_syms_5c4:
	test	$0x1,%edi
	jne	copy_syms_666
copy_syms_5d0:
	test	$0x2,%edi
	jne	copy_syms_6a6
copy_syms_5dc:
	mov	%eax,%ecx
	shr	$0x2,%ecx
	rep movsl %ds:(%esi),%es:(%edi)
	jmp	copy_syms_457
	/* (void)memcpy((void *)(uintptr_t)cp1dst, */
copy_syms_5e8:
	test	$0x1,%edi
	jne	copy_syms_68d
copy_syms_5f4:
	test	$0x2,%edi
	jne	copy_syms_679
copy_syms_5fc:
	mov	%eax,%ecx
	shr	$0x2,%ecx
	rep movsl %ds:(%esi),%es:(%edi)
	jmp	copy_syms_436
	/* for (i = 0; i < mbt_elf->num && symtabp == NULL && */
copy_syms_608:
	test	%ecx,%ecx
	jne	copy_syms_4ce
	jmp	copy_syms_39d
	/* (void)memcpy((void *)(uintptr_t)cp1dst, */
copy_syms_615:
	movzwl (%esi),%edx
	mov	%dx,(%edi)
	add	$0x2,%edi
	add	$0x2,%esi
	sub	$0x2,%eax
	jmp	copy_syms_5b8
copy_syms_626:
	movzbl (%esi),%eax
	mov	%al,(%edi)
	mov	0x18(%ebp),%eax
	lea	0x1(%eax),%edi
	add	$0x1,%esi
	lea	-0x1(%ebx),%eax
	jmp	copy_syms_5b0
	/* (void)memcpy((void *)(uintptr_t)cp2dst, */
copy_syms_63c:
	movzwl (%esi),%edx
	mov	%dx,(%edi)
	add	$0x2,%edi
	add	$0x2,%esi
	sub	$0x2,%eax
	jmp	copy_syms_59c
copy_syms_650:
	movzbl (%esi),%eax
	mov	%al,(%edi)
	add	$0x1,%edi
	add	$0x1,%esi
	mov	-0x1c(%ebp),%eax
	sub	$0x1,%eax
	jmp	copy_syms_590
copy_syms_666:
	movzbl (%esi),%eax
	mov	%al,(%edi)
	add	$0x1,%edi
	add	$0x1,%esi
	lea	-0x1(%ebx),%eax
	jmp	copy_syms_5d0
	/* (void)memcpy((void *)(uintptr_t)cp1dst, */
copy_syms_679:
	movzwl (%esi),%edx
	mov	%dx,(%edi)
	add	$0x2,%edi
	add	$0x2,%esi
	sub	$0x2,%eax
	jmp	copy_syms_5fc
copy_syms_68d:
	movzbl (%esi),%eax
	mov	%al,(%edi)
	mov	0x18(%ebp),%eax
	lea	0x1(%eax),%edi
	add	$0x1,%esi
	mov	-0x1c(%ebp),%eax
	sub	$0x1,%eax
	jmp	copy_syms_5f4
	/* (void)memcpy((void *)(uintptr_t)cp2dst, */
copy_syms_6a6:
	movzwl (%esi),%edx
	mov	%dx,(%edi)
	add	$0x2,%edi
	add	$0x2,%esi
	sub	$0x2,%eax
	jmp	copy_syms_5dc
copy_syms_6ba:
	mov	-0x14(%ebp),%eax
	mov	%eax,-0x10(%ebp)
	mov	-0x20(%ebp),%eax
	mov	%eax,-0x18(%ebp)
	/* cp1dst = (locore_Elf_Addr)(uintptr_t)endp; */
	mov	0x18(%ebp),%eax
	mov	%eax,-0x24(%ebp)
	jmp	copy_syms_486
	/* if (symtabp == NULL || strtabp == NULL) */
copy_syms_6d1:
	test	%eax,%eax
	jne	copy_syms_3cb
	jmp	copy_syms_4ce
elf_sections_done:
#endif

	jmp	.Lbegin


#endif /* MULTIBOOT */

.Lnative_loader:
	/*
	 * Load parameters from the stack (32 bits):
	 *     boothowto, [bootdev], bootinfo, esym, biosextmem, biosbasemem
	 * We are not interested in 'bootdev'.
	 */

	/* Load 'boothowto' */
	movl	4(%esp),%eax
	movl	%eax,RELOC(boothowto)

	/* Load 'bootinfo' */
	movl	12(%esp),%eax
	testl	%eax,%eax		/* bootinfo = NULL? */
	jz	.Lbootinfo_finished

	movl	(%eax),%ebx		/* bootinfo::bi_nentries */
	movl	$RELOC(bootinfo),%ebp
	movl	%ebp,%edx
	addl	$BOOTINFO_MAXSIZE,%ebp
	movl	%ebx,(%edx)
	addl	$4,%edx

.Lbootinfo_entryloop:
	testl	%ebx,%ebx		/* no remaining entries? */
	jz	.Lbootinfo_finished

	addl	$4,%eax
	movl	(%eax),%ecx		/* address of entry */
	pushl	%edi
	pushl	%esi
	pushl	%eax

	movl	(%ecx),%eax		/* btinfo_common::len (size of entry) */
	movl	%edx,%edi
	addl	%eax,%edx		/* update dest pointer */
	cmpl	%ebp,%edx		/* beyond bootinfo+BOOTINFO_MAXSIZE? */
	jg	.Lbootinfo_overflow

	movl	%ecx,%esi
	movl	%eax,%ecx

	/*
	 * If any modules were loaded, record where they end. 'eblob' is used
	 * later to compute the initial bootstrap tables.
	 */
	cmpl	$BTINFO_MODULELIST,4(%esi) /* btinfo_common::type */
	jne	.Lbootinfo_copy

	/* Skip the modules if we won't have enough VA to map them */
	movl	12(%esi),%eax		/* btinfo_modulelist::endpa */
	addl	$PGOFSET,%eax		/* roundup to a page */
	andl	$~PGOFSET,%eax
	cmpl	$BOOTMAP_VA_SIZE,%eax
	jg	.Lbootinfo_skip
	movl	%eax,RELOC(eblob)
	addl	$KERNBASE_LO,RELOC(eblob)
	adcl	$KERNBASE_HI,RELOC(eblob)+4

.Lbootinfo_copy:
	rep
	movsb				/* copy esi -> edi */
	jmp	.Lbootinfo_next

.Lbootinfo_skip:
	subl	%ecx,%edx		/* revert dest pointer */

.Lbootinfo_next:
	popl	%eax
	popl	%esi
	popl	%edi
	subl	$1,%ebx			/* decrement the # of entries */
	jmp	.Lbootinfo_entryloop

.Lbootinfo_overflow:
	/*
	 * Cleanup for overflow case. Pop the registers, and correct the number
	 * of entries.
	 */
	popl	%eax
	popl	%esi
	popl	%edi
	movl	$RELOC(bootinfo),%ebp
	movl	%ebp,%edx
	subl	%ebx,(%edx)		/* correct the number of entries */
.Lbootinfo_finished:

	/* Load 'esym' */
	movl	16(%esp),%eax
	testl	%eax,%eax		/* esym = NULL? */
	jz	1f

	addl	$KERNBASE_LO,%eax

1:
	movl	$RELOC(esym),%ebp
	movl	%eax,(%ebp)
	movl	$KERNBASE_HI,4(%ebp)

	/* Load 'biosextmem' */
	movl	$RELOC(biosextmem),%ebp
	movl	(%ebp),%eax
	testl	%eax,%eax		/* already set? */
	jnz	.Lbiosextmem_finished

	movl	20(%esp),%eax
	movl	%eax,(%ebp)

.Lbiosextmem_finished:
	/* Load 'biosbasemem' */
	movl	$RELOC(biosbasemem),%ebp
	movl	(%ebp),%eax
	testl	%eax,%eax		/* already set? */
	jnz	.Lbiosbasemem_finished

	movl	24(%esp),%eax
	movl	%eax,(%ebp)

.Lbiosbasemem_finished:
	/*
	 * Done with the parameters!
	 */

.Lbegin:
	/* First, reset the PSL. */
	pushl	$PSL_MBO
	popfl

	xorl	%eax,%eax
	cpuid
	movl	%eax,RELOC(cpuid_level)

	/*
	 * Finished with old stack; load new %esp now instead of later so we
	 * can trace this code without having to worry about the trace trap
	 * clobbering the memory test or the zeroing of the bss+bootstrap page
	 * tables.
	 *
	 * The boot program should check:
	 *	text+data <= &stack_variable - more_space_for_stack
	 *	text+data+bss+pad+space_for_page_tables <= end_of_memory
	 *
	 * XXX: the gdt is in the carcass of the boot program so clearing
	 * the rest of memory is still not possible.
	 */
	movl	$RELOC(tmpstk),%esp

	/*
	 * Retrieve the NX/XD flag. We use the 32bit version of PTE_NX.
	 */
	movl	$0x80000001,%eax
	cpuid
	andl	$CPUID_NOX,%edx
	jz	.Lno_NOX
	movl	$PTE_NX32,RELOC(nox_flag)
.Lno_NOX:

/*
 * There are four levels of pages in amd64: PML4 -> PDP -> PD -> PT. They will
 * be referred to as: L4 -> L3 -> L2 -> L1.
 *
 * Virtual address space of the kernel:
 * +------+--------+------+-----+--------+---------------------+----------
 * | TEXT | RODATA | DATA | BSS | [SYMS] | [PRELOADED MODULES] | L4 ->
 * +------+--------+------+-----+--------+---------------------+----------
 *                             (1)      (2)                   (3)
 *
 * --------------+-----+-----+----+-------------+
 * -> PROC0 STK -> L3 -> L2 -> L1 | ISA I/O MEM |
 * --------------+-----+-----+----+-------------+
 *                               (4)
 *
 * PROC0 STK is obviously not linked as a page level. It just happens to be
 * caught between L4 and L3.
 *
 * (PROC0 STK + L4 + L3 + L2 + L1) is later referred to as BOOTSTRAP TABLES.
 *
 * ISA I/O MEM has no physical page allocated here, just virtual addresses.
 *
 * Important note: the kernel segments are properly 4k-aligned
 * (see kern.ldscript), so there's no need to enforce alignment.
 */

	/* Find end of kernel image; brings us on (1). */
	movl	$RELOC(__kernel_end),%edi

#if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB)
	/* Save the symbols (if loaded); brinds us on (2). */
	movl	RELOC(esym),%eax
	testl	%eax,%eax
	jz	1f
	subl	$KERNBASE_LO,%eax	/* XXX */
	movl	%eax,%edi
1:
#endif
	/* Skip over any modules/blobs; brings us on (3). */
	movl	RELOC(eblob),%eax
	testl	%eax,%eax
	jz	1f
	subl	$KERNBASE_LO,%eax	/* XXX */
	movl	%eax,%edi
1:

	/* We are on (3). Align up for BOOTSTRAP TABLES. */
	movl	%edi,%esi
	addl	$PGOFSET,%esi
	andl	$~PGOFSET,%esi

	/* We are on the BOOTSTRAP TABLES. Save L4's physical address. */
	movl	$RELOC(PDPpaddr),%ebp
	movl	%esi,(%ebp)
	movl	$0,4(%ebp)

	/* Now, zero out the BOOTSTRAP TABLES (before filling them in). */
	movl	%esi,%edi
	xorl	%eax,%eax
	cld
	movl	$TABLESIZE,%ecx
	shrl	$2,%ecx
	rep
	stosl				/* copy eax -> edi */

/*
 * Build the page tables and levels. We go from L1 to L4, and link the levels
 * together. Note: RELOC computes &addr - KERNBASE in 32 bits; the value can't
 * be > 4G, or we can't deal with it anyway, since we are in 32bit mode.
 */
	/*
	 * Build L1.
	 */
	leal	(PROC0_PTP1_OFF)(%esi),%ebx

	/* Skip the area below the kernel text. */
	movl	$(KERNTEXTOFF_LO - KERNBASE_LO),%ecx
	shrl	$PGSHIFT,%ecx
	fillkpt_blank

	/* Map the kernel text RX. */
	movl	$(KERNTEXTOFF_LO - KERNBASE_LO),%eax	/* start of TEXT */
	movl	$RELOC(__rodata_start),%ecx
	subl	%eax,%ecx
	shrl	$PGSHIFT,%ecx
	orl	$(PTE_P),%eax
	fillkpt

	/* Map the kernel rodata R. */
	movl	$RELOC(__rodata_start),%eax
	movl	$RELOC(__data_start),%ecx
	subl	%eax,%ecx
	shrl	$PGSHIFT,%ecx
	orl	$(PTE_P),%eax
	fillkpt_nox

	/* Map the kernel data+bss RW. */
	movl	$RELOC(__data_start),%eax
	movl	$RELOC(__kernel_end),%ecx
	subl	%eax,%ecx
	shrl	$PGSHIFT,%ecx
	orl	$(PTE_P|PTE_W),%eax
	fillkpt_nox

	/* Map [SYMS]+[PRELOADED MODULES] RW. */
	movl	$RELOC(__kernel_end),%eax
	movl	%esi,%ecx		/* start of BOOTSTRAP TABLES */
	subl	%eax,%ecx
	shrl	$PGSHIFT,%ecx
	orl	$(PTE_P|PTE_W),%eax
	fillkpt_nox

	/* Map the BOOTSTRAP TABLES RW. */
	movl	%esi,%eax		/* start of BOOTSTRAP TABLES */
	movl	$TABLESIZE,%ecx		/* length of BOOTSTRAP TABLES */
	shrl	$PGSHIFT,%ecx
	orl	$(PTE_P|PTE_W),%eax
	fillkpt_nox

	/* We are on (4). Map ISA I/O MEM RW. */
	movl	$IOM_BEGIN,%eax
	movl	$IOM_SIZE,%ecx	/* size of ISA I/O MEM */
	shrl	$PGSHIFT,%ecx
	orl	$(PTE_P|PTE_W/*|PTE_PCD*/),%eax
	fillkpt_nox

	/*
	 * Build L2. Linked to L1.
	 */
	leal	(PROC0_PTP2_OFF)(%esi),%ebx
	leal	(PROC0_PTP1_OFF)(%esi),%eax
	orl	$(PTE_P|PTE_W),%eax
	movl	$(NKL2_KIMG_ENTRIES+1),%ecx
	fillkpt

#if L2_SLOT_KERNBASE > 0
	/* If needed, set up level 2 entries for actual kernel mapping */
	leal	(PROC0_PTP2_OFF + L2_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
	leal	(PROC0_PTP1_OFF)(%esi),%eax
	orl	$(PTE_P|PTE_W),%eax
	movl	$(NKL2_KIMG_ENTRIES+1),%ecx
	fillkpt
#endif

	/*
	 * Build L3. Linked to L2.
	 */
	leal	(PROC0_PTP3_OFF)(%esi),%ebx
	leal	(PROC0_PTP2_OFF)(%esi),%eax
	orl	$(PTE_P|PTE_W),%eax
	movl	$NKL3_KIMG_ENTRIES,%ecx
	fillkpt

#if L3_SLOT_KERNBASE > 0
	/* If needed, set up level 3 entries for actual kernel mapping */
	leal	(PROC0_PTP3_OFF + L3_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
	leal	(PROC0_PTP2_OFF)(%esi),%eax
	orl	$(PTE_P|PTE_W),%eax
	movl	$NKL3_KIMG_ENTRIES,%ecx
	fillkpt
#endif

	/*
	 * Build L4 for identity mapping. Linked to L3.
	 */
	leal	(PROC0_PML4_OFF)(%esi),%ebx
	leal	(PROC0_PTP3_OFF)(%esi),%eax
	orl	$(PTE_P|PTE_W),%eax
	movl	$NKL4_KIMG_ENTRIES,%ecx
	fillkpt

	/* Set up L4 entries for actual kernel mapping */
	leal	(PROC0_PML4_OFF + L4_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
	leal	(PROC0_PTP3_OFF)(%esi),%eax
	orl	$(PTE_P|PTE_W),%eax
	movl	$NKL4_KIMG_ENTRIES,%ecx
	fillkpt

	/*
	 * Startup checklist:
	 * 1. Enable PAE (and SSE while here).
	 */
	movl	%cr4,%eax
	orl	$(CR4_PAE|CR4_OSFXSR|CR4_OSXMMEXCPT),%eax
	movl	%eax,%cr4

	/*
	 * 2. Set Long Mode Enable in EFER. Also enable the syscall extensions,
	 *    and NOX if available.
	 */
	movl	$MSR_EFER,%ecx
	rdmsr
	xorl	%eax,%eax	/* XXX */
	orl	$(EFER_LME|EFER_SCE),%eax
	movl	RELOC(nox_flag),%ebx
	cmpl	$0,%ebx
	je 	.Lskip_NOX
	orl	$(EFER_NXE),%eax
.Lskip_NOX:
	wrmsr

	/*
	 * 3. Load %cr3 with pointer to PML4.
	 */
	movl	%esi,%eax
	movl	%eax,%cr3

	/*
	 * 4. Enable paging and the rest of it.
	 */
	movl	%cr0,%eax
	orl	$(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP|CR0_AM),%eax
	movl	%eax,%cr0
	jmp	compat
compat:

	/*
	 * 5. Not quite done yet, we're now in a compatibility segment, in
	 *    legacy mode. We must jump to a long mode segment. Need to set up
	 *    a temporary GDT with a long mode segment in it to do that.
	 */
	movl	$RELOC(gdt64_lo),%eax
	lgdt	(%eax)
	movl	$RELOC(farjmp64),%eax
	ljmp	*(%eax)

	.code64
longmode:
	/*
	 * 6. Finally, we're in long mode. However, we're still in the identity
	 *    mapped area (could not jump out of that earlier because it would
	 *    have been a > 32bit jump). We can do that now, so here we go.
	 */
	movabsq	$longmode_hi,%rax
	jmp	*%rax

longmode_hi:

	/*
	 * We left the identity mapped area. Base address of
	 * the temporary gdt64 should now be in high memory.
	 */
	movq	$RELOC(gdt64_hi),%rax
	lgdt	(%rax)

	/*
	 * We have arrived. There's no need anymore for the identity mapping in
	 * low memory, remove it.
	 */
	movq	$KERNBASE,%r8

#if L2_SLOT_KERNBASE > 0
	movq	$(NKL2_KIMG_ENTRIES+1),%rcx
	leaq	(PROC0_PTP2_OFF)(%rsi),%rbx	/* old, phys address */
	addq	%r8,%rbx			/* new, virt address */
	killkpt
#endif

#if L3_SLOT_KERNBASE > 0
	movq	$NKL3_KIMG_ENTRIES,%rcx
	leaq	(PROC0_PTP3_OFF)(%rsi),%rbx	/* old, phys address */
	addq	%r8,%rbx			/* new, virt address */
	killkpt
#endif

	movq	$NKL4_KIMG_ENTRIES,%rcx
	leaq	(PROC0_PML4_OFF)(%rsi),%rbx	/* old, phys address of PML4 */
	addq	%r8,%rbx			/* new, virt address of PML4 */
	killkpt

	/* Relocate atdevbase. */
	movq	$(TABLESIZE+KERNBASE),%rdx
	addq	%rsi,%rdx
	movq	%rdx,_C_LABEL(atdevbase)(%rip)

	/* Set up bootstrap stack. */
	leaq	(PROC0_STK_OFF)(%rsi),%rax
	addq	%r8,%rax
	movq	%rax,_C_LABEL(lwp0uarea)(%rip)
	leaq	(USPACE-FRAMESIZE)(%rax),%rsp
	xorq	%rbp,%rbp			/* mark end of frames */

#if defined(MULTIBOOT)
	/* It is now safe to parse the Multiboot information structure
	 * we saved before from C code.  Note that we cannot delay its
	 * parsing any more because initgdt (called below) needs to make
	 * use of this information.
	 */
	pushq	%rsi
	call	_C_LABEL(multiboot2_post_reloc)
	popq	%rsi
#endif 
	xorw	%ax,%ax
	movw	%ax,%gs
	movw	%ax,%fs

	/* The first physical page available. */
	leaq	(TABLESIZE)(%rsi),%rdi

#else	/* XENPV */
	/* First, reset the PSL. */
	pushq	$2
	popfq

	cld

	/*
	 * Xen info:
	 * - %rsi -> start_info struct
	 * - %rsp -> stack, *theoretically* the last used page by Xen bootstrap
	 */
	movq	%rsi,%rbx

	/* Clear BSS. */
	xorq	%rax,%rax
	movq	$_C_LABEL(__bss_start),%rdi
	movq	$_C_LABEL(_end),%rcx
	subq	%rdi,%rcx
	rep
	stosb

	/* Copy start_info to a safe place. */
	movq	%rbx,%rsi
	movq	$_C_LABEL(start_info_union),%rdi
	movq	$64,%rcx
	rep
	movsq

	/*
	 * Memory layout at start of the day:
	 * - Kernel image
	 * - Page frames list
	 * - start_info struct. we copied it, so it can be recycled.
	 * - xenstore
	 * - console
	 * - Xen bootstrap page tables
	 * - kernel stack. provided by Xen
	 * - guaranteed 512kB padding
	 *
	 * As we want to rebuild our page tables and place our stack
	 * in proc0 struct, all data starting from after console can be
	 * discarded after we've done a little setup.
	 */

	/*
	 * We want our own page tables, and will rebuild them. We will reclaim
	 * the Xen space later, INCLUDING the stack. So we need to switch to a
	 * temporary one now.
	 */
	movq	$tmpstk,%rax
	subq	$8,%rax
	movq	%rax,%rsp

	xorl	%eax,%eax
	cpuid
	movl	%eax,_C_LABEL(cpuid_level)

	movq	$cpu_info_primary,%rdi
	movq	%rdi,CPU_INFO_SELF(%rdi) /* ci->ci_self = ci */
	movq	$1,%rsi
	call	cpu_init_msrs	/* cpu_init_msrs(ci, true); */

	call	xen_locore

	/*
	 * The first VA available is returned by xen_locore in %rax. We
	 * use it as the UAREA, and set up the stack here.
	 */
	movq	%rax,%rsi
	movq	%rsi,_C_LABEL(lwp0uarea)(%rip)
	leaq	(USPACE-FRAMESIZE)(%rsi),%rsp
	xorq	%rbp,%rbp

	/* Clear segment registers. */
	xorw	%ax,%ax
	movw	%ax,%gs
	movw	%ax,%fs

	/* Set first_avail after the DUMMY PAGE (see xen_locore). */
	movq	%rsi,%rdi
	addq	$(USPACE+PAGE_SIZE),%rdi
	subq	$KERNBASE,%rdi	/* init_x86_64 wants a physical address */
#endif	/* XENPV */

	pushq	%rdi
	call	_C_LABEL(init_bootspace)
	call	_C_LABEL(init_slotspace)
	popq	%rdi
	call	_C_LABEL(init_x86_64)
	call 	_C_LABEL(main)
END(start)

#if defined(XEN)
/* space for the hypercall call page */
#define HYPERCALL_PAGE_OFFSET 0x1000
.align HYPERCALL_PAGE_OFFSET
ENTRY(hypercall_page) /* Returns -1, on HYPERVISOR_xen_version() */
.skip	(__HYPERVISOR_xen_version*32), 0x90
	movq	$-1, %rax
	retq
.align HYPERCALL_PAGE_OFFSET, 0x90
END(hypercall_page)
#endif /* XEN */

/*
 * int setjmp(label_t *)
 *
 * Used primarily by DDB.
 */
ENTRY(setjmp)
	/*
	 * Only save registers that must be preserved across function
	 * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15)
	 * and %rip.
	 */
	movq	%rdi,%rax
	movq	%rbx,(%rax)
	movq	%rsp,8(%rax)
	movq	%rbp,16(%rax)
	movq	%r12,24(%rax)
	movq	%r13,32(%rax)
	movq	%r14,40(%rax)
	movq	%r15,48(%rax)
	movq	(%rsp),%rdx
	movq	%rdx,56(%rax)
	xorl	%eax,%eax
	ret
END(setjmp)

/*
 * int longjmp(label_t *)
 *
 * Used primarily by DDB.
 */
ENTRY(longjmp)
	movq	%rdi,%rax
	movq	(%rax),%rbx
	movq	8(%rax),%rsp
	movq	16(%rax),%rbp
	movq	24(%rax),%r12
	movq	32(%rax),%r13
	movq	40(%rax),%r14
	movq	48(%rax),%r15
	movq	56(%rax),%rdx
	movq	%rdx,(%rsp)
	movl	$1,%eax
	ret
END(longjmp)

/*
 * void dumpsys(void)
 *
 * Mimic cpu_switchto() for postmortem debugging.
 */
ENTRY(dumpsys)
	/* Build a fake switch frame. */
	pushq	%rbx
	pushq	%r12
	pushq	%r13
	pushq	%r14
	pushq	%r15

	/* Save a context. */
	movq	$dumppcb, %rax
	movq	%rsp, PCB_RSP(%rax)
	movq	%rbp, PCB_RBP(%rax)

	call	_C_LABEL(dodumpsys)

	addq	$(5*8), %rsp	/* sizeof(switchframe) - sizeof(%rip) */
	ret
END(dumpsys)

/*
 * struct lwp *cpu_switchto(struct lwp *oldlwp, struct lwp *newlwp,
 *     bool returning)
 *
 *	1. if (oldlwp != NULL), save its context.
 *	2. then, restore context of newlwp.
 *
 * Note that the stack frame layout is known to "struct switchframe" in
 * <machine/frame.h> and to the code in cpu_lwp_fork() which initializes
 * it for a new lwp.
 */
ENTRY(cpu_switchto)
	pushq	%rbx
	pushq	%r12
	pushq	%r13
	pushq	%r14
	pushq	%r15

	movq	%rdi,%r13	/* oldlwp */
	movq	%rsi,%r12	/* newlwp */

	testq	%r13,%r13	/* oldlwp = NULL ? */
	jz	.Lskip_save

	/* Save old context. */
	movq	L_PCB(%r13),%rax
	movq	%rsp,PCB_RSP(%rax)
	movq	%rbp,PCB_RBP(%rax)
.Lskip_save:

	/* Switch to newlwp's stack. */
	movq	L_PCB(%r12),%r14
	movq	PCB_RSP(%r14),%rsp
	movq	PCB_RBP(%r14),%rbp

	/*
	 * Set curlwp.  This must be globally visible in order to permit
	 * non-interlocked mutex release.
	 */
	movq	%r12,%rcx
	xchgq	%rcx,CPUVAR(CURLWP)

	/* Skip the rest if returning to a pinned LWP. */
	testb	%dl,%dl		/* returning = true ? */
	jnz	.Lswitch_return

#ifdef SVS
	movb	_C_LABEL(svs_enabled),%dl
	testb	%dl,%dl
	jz	.Lskip_svs
	callq	_C_LABEL(svs_lwp_switch)
.Lskip_svs:
#endif

#ifndef XEN
	movq	%r13,%rdi
	movq	%r12,%rsi
	callq	_C_LABEL(speculation_barrier)
#endif

	/* Switch ring0 stack */
#ifdef SVS
	movb	_C_LABEL(svs_enabled),%al
	testb	%al,%al
	jz	.Lno_svs_switch

	movq	CPUVAR(RSP0),%rax
	movq	CPUVAR(TSS),%rdi
	movq	%rax,TSS_RSP0(%rdi)
	jmp	.Lring0_switched

.Lno_svs_switch:
#endif

#if !defined(XENPV)
	movq	PCB_RSP0(%r14),%rax
	movq	CPUVAR(TSS),%rdi
	movq	%rax,TSS_RSP0(%rdi)
#else
	movq	%r14,%rdi
	callq	_C_LABEL(x86_64_switch_context)
#endif
.Lring0_switched:

	/* Switch the dbregs. */
	movq	%r13,%rdi
	movq	%r12,%rsi
	callq	_C_LABEL(x86_dbregs_switch)

	/* Switch the FPU. */
	movq	%r13,%rdi
	movq	%r12,%rsi
	callq	_C_LABEL(fpu_switch)

	/* Don't bother with the rest if switching to a system process. */
	testl	$LW_SYSTEM,L_FLAG(%r12)
	jnz	.Lswitch_return

	/* Is this process using RAS (restartable atomic sequences)? */
	movq	L_PROC(%r12),%rdi
	cmpq	$0,P_RASLIST(%rdi)
	je	.Lno_RAS

	/* Handle restartable atomic sequences (RAS). */
	movq	L_MD_REGS(%r12),%rbx
	movq	TF_RIP(%rbx),%rsi
	call	_C_LABEL(ras_lookup)
	cmpq	$-1,%rax
	je	.Lno_RAS
	movq	%rax,TF_RIP(%rbx)
.Lno_RAS:

#ifndef XENPV
	/* Raise the IPL to IPL_HIGH. Dropping the priority is deferred until
	 * mi_switch(), when cpu_switchto() returns. XXX Still needed? */
	movl	$IPL_HIGH,CPUVAR(ILEVEL)

	/* The 32bit LWPs are handled differently. */
	testl	$PCB_COMPAT32,PCB_FLAGS(%r14)
	jnz	.Llwp_32bit

.Llwp_64bit:
	/* Set default 64bit values in %ds, %es, %fs and %gs. */
	movq	$GSEL(GUDATA_SEL, SEL_UPL),%rax
	movw	%ax,%ds
	movw	%ax,%es
	xorq	%rax,%rax
	movw	%ax,%fs
	CLI(cx)
	SWAPGS
	movw	%ax,%gs
	SWAPGS
	STI(cx)

	/* Zero out GDT descriptors. */
	movq	CPUVAR(GDT),%rcx
	movq	%rax,(GUFS_SEL*8)(%rcx)
	movq	%rax,(GUGS_SEL*8)(%rcx)

	/* Reload 64-bit %fs/%gs MSRs. */
	movl	$MSR_FSBASE,%ecx
	movl	PCB_FS(%r14),%eax
	movl	4+PCB_FS(%r14),%edx
	wrmsr
	movl	$MSR_KERNELGSBASE,%ecx
	movl	PCB_GS(%r14),%eax
	movl	4+PCB_GS(%r14),%edx
	wrmsr

	jmp	.Lswitch_return

.Llwp_32bit:
	/* Reload %fs/%gs GDT descriptors. */
	movq	CPUVAR(GDT),%rcx
	movq	PCB_FS(%r14),%rax
	movq	%rax,(GUFS_SEL*8)(%rcx)
	movq	PCB_GS(%r14),%rax
	movq	%rax,(GUGS_SEL*8)(%rcx)

	/* Set default 32bit values in %ds, %es, %fs and %gs. */
	movq	L_MD_REGS(%r12),%rbx
	movq	$GSEL(GUDATA32_SEL, SEL_UPL),%rax
	movw	%ax,%ds
	movw	%ax,%es
	movw	%ax,%fs
	CLI(ax)
	SWAPGS
	movw	%ax,%gs
	SWAPGS
	STI(ax)
#else
	movq	%r12,%rdi
	callq	_C_LABEL(x86_64_tls_switch)
#endif

.Lswitch_return:
	/* Return to the new LWP, returning 'oldlwp' in %rax. */
	KMSAN_INIT_RET(8)
	movq	%r13,%rax
	popq	%r15
	popq	%r14
	popq	%r13
	popq	%r12
	popq	%rbx
	ret
END(cpu_switchto)

/*
 * void savectx(struct pcb *pcb);
 *
 * Update pcb, saving current processor state.
 */
ENTRY(savectx)
	/* Save stack pointers. */
	movq	%rsp,PCB_RSP(%rdi)
	movq	%rbp,PCB_RBP(%rdi)
	ret
END(savectx)

/*
 * Syscall handler.
 */
ENTRY(handle_syscall)
	STI(si)

	movq	CPUVAR(CURLWP),%r14
	incq	CPUVAR(NSYSCALL)	/* count it atomically */
	movq	%rsp,L_MD_REGS(%r14)	/* save pointer to frame */
	movq	L_PROC(%r14),%r15
	andl	$~MDL_IRET,L_MD_FLAGS(%r14)   /* Allow sysret return */
	movq	%rsp,%rdi		/* Pass frame as arg0 */
	call	*P_MD_SYSCALL(%r15)
.Lsyscall_checkast:
	/*
	 * Disable interrupts to avoid new ASTs (etc) being added and
	 * to ensure we don't take an interrupt with some of the user
	 * registers loaded.
	 */
	CLI(si)
	/* Check for ASTs on exit to user mode. */
	movl	L_MD_ASTPENDING(%r14),%eax
	orl	CPUVAR(WANT_PMAPLOAD),%eax
	jnz	9f

#ifdef DIAGNOSTIC
	cmpl	$IPL_NONE,CPUVAR(ILEVEL)
	jne	.Lspl_error
#endif

	HANDLE_DEFERRED_FPU

	/*
	 * Decide if we need to take a slow path. That's the case when we
	 * want to reload %cs and %ss on a 64bit LWP (MDL_IRET set), or when
	 * we're returning to a 32bit LWP (MDL_COMPAT32 set).
	 *
	 * In either case, we jump into intrfastexit and return to userland
	 * with the iret instruction.
	 */
	testl	$(MDL_IRET|MDL_COMPAT32),L_MD_FLAGS(%r14)
	jnz	intrfastexit

	jmp	syscall_sysret

#ifdef DIAGNOSTIC
.Lspl_error:
	movabsq	$4f,%rdi
	movl	CPUVAR(ILEVEL),%esi
	call	_C_LABEL(panic)
4:	.asciz	"spl not lowered on syscall, ilevel=%x"
#endif

/* AST pending or pmap load needed */
9:
	cmpl	$0,CPUVAR(WANT_PMAPLOAD)
	jz	10f
	STI(si)
	call	_C_LABEL(do_pmap_load)
	jmp	.Lsyscall_checkast	/* re-check ASTs */
10:
	CLEAR_ASTPENDING(%r14)
	STI(si)
	/* Pushed T_ASTFLT into tf_trapno on entry. */
	movq	%rsp,%rdi
	KMSAN_INIT_ARG(8)
	call	_C_LABEL(trap)
	jmp	.Lsyscall_checkast	/* re-check ASTs */
END(handle_syscall)

/*
 * void lwp_trampoline(void);
 *
 * This is a trampoline function pushed run by newly created LWPs
 * in order to do additional setup in their context.
 */
ENTRY(lwp_trampoline)
	movq	%rbp,%rsi
	movq	%rbp,%r14	/* for .Lsyscall_checkast */
	movq	%rax,%rdi
	xorq	%rbp,%rbp
	KMSAN_INIT_ARG(16)
	call	_C_LABEL(lwp_startup)
	movq	%r13,%rdi
	KMSAN_INIT_ARG(8)
	call	*%r12
	jmp	.Lsyscall_checkast
END(lwp_trampoline)

/*
 * Entry points of the 'syscall' instruction, 64bit and 32bit mode.
 */

#define SP(x)	(x)-(TF_SS+8)(%rax)

.macro	SYSCALL_ENTRY	name,is_svs
IDTVEC(\name)
#ifndef XENPV
	/*
	 * The user %rip is in %rcx and the user %rflags in %r11. The kernel %cs
	 * and %ss are loaded, but nothing else is.
	 *
	 * The 'swapgs' instruction gives us access to cpu-specific memory where
	 * we can save a user register and then read the LWP's kernel stack
	 * pointer.
	 *
	 * This code doesn't seem to set %ds, this may not matter since it is
	 * ignored in 64bit mode, OTOH the syscall instruction sets %ss and that
	 * is ignored as well.
	 */
	swapgs

	/* Get the LWP's kernel stack pointer in %rax */
	.if	\is_svs
		movabs	%rax,SVS_UTLS+UTLS_SCRATCH
		movabs	SVS_UTLS+UTLS_RSP0,%rax
	.else
		movq	%rax,CPUVAR(SCRATCH)
		movq	CPUVAR(CURLWP),%rax
		movq	L_PCB(%rax),%rax
		movq	PCB_RSP0(%rax),%rax
	.endif

	/* Make stack look like an 'int nn' frame */
	movq	$(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS)	/* user %ss */
	movq	%rsp,SP(TF_RSP)				/* user %rsp */
	movq	%r11,SP(TF_RFLAGS)			/* user %rflags */
	movq	$(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS)	/* user %cs */
	movq	%rcx,SP(TF_RIP)				/* user %rip */
	leaq	SP(0),%rsp		/* %rsp now valid after frame */

	/* Restore %rax */
	.if	\is_svs
		movabs	SVS_UTLS+UTLS_SCRATCH,%rax
	.else
		movq	CPUVAR(SCRATCH),%rax
	.endif

	movq	$2,TF_ERR(%rsp)		/* syscall instruction size */
	movq	$T_ASTFLT,TF_TRAPNO(%rsp)
#else
	/* Xen already switched to kernel stack */
	addq	$0x10,%rsp	/* gap to match cs:rip */
	pushq	$2		/* error code */
	pushq	$T_ASTFLT
	subq	$TF_REGSIZE,%rsp
	cld
#endif
	INTR_SAVE_GPRS
	IBRS_ENTER
	movw	$GSEL(GUDATA_SEL, SEL_UPL),TF_DS(%rsp)
	movw	$GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp)
	movw	$0,TF_FS(%rsp)
	movw	$0,TF_GS(%rsp)
	.if	\is_svs
		SVS_ENTER
	.endif
	KMSAN_ENTER
	jmp	handle_syscall
IDTVEC_END(\name)
.endm

SYSCALL_ENTRY	syscall,is_svs=0

	TEXT_USER_BEGIN

#ifdef SVS
SYSCALL_ENTRY	syscall_svs,is_svs=1
#endif

IDTVEC(syscall32)
	sysret		/* go away please */
IDTVEC_END(syscall32)

	TEXT_USER_END

/*
 * osyscall()
 *
 * Trap gate entry for int $80 syscall, also used by sigreturn.
 */
	TEXT_USER_BEGIN
IDTVEC(osyscall)
#ifdef XENPV
	movq (%rsp),%rcx
	movq 8(%rsp),%r11
	addq $0x10,%rsp
#endif
	pushq	$2		/* size of instruction for restart */
	pushq	$T_ASTFLT	/* trap # for doing ASTs */
	INTRENTRY
	jmp	handle_syscall
IDTVEC_END(osyscall)
	TEXT_USER_END

/*
 * Return to userland via 'sysret'.
 */
	TEXT_USER_BEGIN
	_ALIGN_TEXT
LABEL(syscall_sysret)
	KMSAN_LEAVE
	MDS_LEAVE
	SVS_LEAVE
	IBRS_LEAVE
	INTR_RESTORE_GPRS
	SWAPGS
#ifndef XENPV
	movq	TF_RIP(%rsp),%rcx	/* %rip for sysret */
	movq	TF_RFLAGS(%rsp),%r11	/* %flags for sysret */
	movq	TF_RSP(%rsp),%rsp
	sysretq
#else
	addq	$TF_RIP,%rsp
	pushq	$256	/* VGCF_IN_SYSCALL */
	jmp	HYPERVISOR_iret
#endif
END(syscall_sysret)
	TEXT_USER_END

/*
 * bool sse2_idlezero_page(void *pg)
 *
 * Zero a page without polluting the cache.  Preemption must be
 * disabled by the caller. Abort if a preemption is pending.
 * Returns true if the page is zeroed, false if not.
 */
ENTRY(sse2_idlezero_page)
	pushq	%rbp
	movq	%rsp,%rbp
	movl	$(PAGE_SIZE/64), %ecx
	xorq	%rax, %rax
	.align	16
1:
	cmpl	$0, CPUVAR(RESCHED)
	jnz	2f
	movnti	%rax, 0(%rdi)
	movnti	%rax, 8(%rdi)
	movnti	%rax, 16(%rdi)
	movnti	%rax, 24(%rdi)
	movnti	%rax, 32(%rdi)
	movnti	%rax, 40(%rdi)
	movnti	%rax, 48(%rdi)
	movnti	%rax, 56(%rdi)
	addq	$64, %rdi
	decl	%ecx
	jnz	1b
	sfence
	incl	%eax
	popq	%rbp
	KMSAN_INIT_RET(1)
	ret
2:
	sfence
	popq	%rbp
	KMSAN_INIT_RET(1)
	ret
END(sse2_idlezero_page)

/*
 * void pagezero(vaddr_t va)
 *
 * Zero a page.
 */
ENTRY(pagezero)
	pushq	%rbp
	movq	%rsp,%rbp
	movq	$(PAGE_SIZE / 8),%rcx
	xorq	%rax,%rax
	rep
	stosq
	leave
	ret
END(pagezero)

	TEXT_USER_BEGIN

/*
 * In intrfastexit, we advance %rsp at the beginning. We then access the
 * segment registers in the trapframe with TF_BACKW (backwards). See the
 * documentation in amd64_trap.S for an explanation.
 */

#define TF_BACKW(val, reg)	(val - (TF_REGSIZE+16))(reg)

	_ALIGN_TEXT
	.type intrfastexit,@function
LABEL(intrfastexit)
	NOT_XEN(cli;)
	KMSAN_LEAVE

	testb	$SEL_UPL,TF_CS(%rsp)
	jz	.Lkexit

	MDS_LEAVE
	SVS_LEAVE
	IBRS_LEAVE
	INTR_RESTORE_GPRS
	addq	$(TF_REGSIZE+16),%rsp	/* iret frame */
	SWAPGS

	cmpw	$LSEL(LUCODE_SEL, SEL_UPL),TF_BACKW(TF_CS, %rsp)
	je	do_iret
	cmpw	$GSEL(GUCODE_SEL, SEL_UPL),TF_BACKW(TF_CS, %rsp)
	je	do_iret
#ifdef XENPV
	cmpw	$FLAT_RING3_CS64,TF_BACKW(TF_CS, %rsp)
	je	do_iret
#endif

do_mov_es:
	movw	TF_BACKW(TF_ES, %rsp),%es
do_mov_ds:
	movw	TF_BACKW(TF_DS, %rsp),%ds
do_mov_fs:
	movw	TF_BACKW(TF_FS, %rsp),%fs
#ifndef XENPV
do_mov_gs:
	movw	TF_BACKW(TF_GS, %rsp),%gs
#endif

do_iret:
	iretq

.Lkexit:
	INTR_RESTORE_GPRS
	addq	$(TF_REGSIZE+16),%rsp	/* iret frame */
	iretq
END(intrfastexit)

	TEXT_USER_END

#ifdef SVS
	.globl	svs_enter, svs_enter_end
	.globl	svs_enter_altstack, svs_enter_altstack_end
	.globl	svs_leave, svs_leave_end
	.globl	svs_leave_altstack, svs_leave_altstack_end

LABEL(svs_enter)
	movabs	SVS_UTLS+UTLS_KPDIRPA,%rax
	movq	%rax,%cr3
	movq	CPUVAR(KRSP0),%rsp
LABEL(svs_enter_end)

LABEL(svs_enter_altstack)
	testb	$SEL_UPL,TF_CS(%rsp)
	jz	1234f
	movabs	SVS_UTLS+UTLS_KPDIRPA,%rax
	movq	%rax,%cr3
1234:
LABEL(svs_enter_altstack_end)

LABEL(svs_enter_nmi)
	movq	%cr3,%rax
	movq	%rax,(FRAMESIZE+1*8)(%rsp)	/* nmistore->scratch */
	movq	(FRAMESIZE+0*8)(%rsp),%rax	/* nmistore->cr3 */
	movq	%rax,%cr3
LABEL(svs_enter_nmi_end)

LABEL(svs_leave)
	movq	CPUVAR(URSP0),%rsp
	movq	CPUVAR(UPDIRPA),%rax
	movq	%rax,%cr3
LABEL(svs_leave_end)

LABEL(svs_leave_altstack)
	testb	$SEL_UPL,TF_CS(%rsp)
	jz	1234f
	movq	CPUVAR(UPDIRPA),%rax
	movq	%rax,%cr3
1234:
LABEL(svs_leave_altstack_end)

LABEL(svs_leave_nmi)
	movq	(FRAMESIZE+1*8)(%rsp),%rax	/* nmistore->scratch */
	movq	%rax,%cr3
LABEL(svs_leave_nmi_end)
#endif

	.globl	ibrs_enter, ibrs_enter_end
	.globl	ibrs_leave, ibrs_leave_end

	/* IBRS <- 1 */
LABEL(ibrs_enter)
	movl	$MSR_IA32_SPEC_CTRL,%ecx
	rdmsr
	orl	$IA32_SPEC_CTRL_IBRS,%eax
	wrmsr
LABEL(ibrs_enter_end)

	/* IBRS <- 0 */
LABEL(ibrs_leave)
	movl	$MSR_IA32_SPEC_CTRL,%ecx
	rdmsr
	andl	$~IA32_SPEC_CTRL_IBRS,%eax
	wrmsr
LABEL(ibrs_leave_end)

LABEL(noibrs_enter)
	NOIBRS_ENTER
LABEL(noibrs_enter_end)

LABEL(noibrs_leave)
	NOIBRS_LEAVE
LABEL(noibrs_leave_end)

	.globl	mds_leave, mds_leave_end

LABEL(mds_leave)
	pushq	$GSEL(GDATA_SEL, SEL_KPL)
	verw	(%rsp)
	addq	$8,%rsp
LABEL(mds_leave_end)

LABEL(nomds_leave)
	NOMDS_LEAVE
LABEL(nomds_leave_end)