File: [cvs.NetBSD.org] / src / sys / arch / powerpc / include / cpu.h (download)
Revision 1.119, Sat Aug 14 17:51:19 2021 UTC (2 years, 7 months ago) by ryo
Branch: MAIN
Changes since 1.118: +5 -1
lines
Improved the performance of kernel profiling on MULTIPROCESSOR, and possible to get profiling data for each CPU.
In the current implementation, locks are acquired at the entrance of the mcount
internal function, so the higher the number of cores, the more lock conflict
occurs, making profiling performance in a MULTIPROCESSOR environment unusable
and slow. Profiling buffers has been changed to be reserved for each CPU,
improving profiling performance in MP by several to several dozen times.
- Eliminated cpu_simple_lock in mcount internal function, using per-CPU buffers.
- Add ci_gmon member to struct cpu_info of each MP arch.
- Add kern.profiling.percpu node in sysctl tree.
- Add new -c <cpuid> option to kgmon(8) to specify the cpuid, like openbsd.
For compatibility, if the -c option is not specified, the entire system can be
operated as before, and the -p option will get the total profiling data for
all CPUs.
|
/* $NetBSD: cpu.h,v 1.119 2021/08/14 17:51:19 ryo Exp $ */
/*
* Copyright (C) 1999 Wolfgang Solfrank.
* Copyright (C) 1999 TooLs GmbH.
* Copyright (C) 1995-1997 Wolfgang Solfrank.
* Copyright (C) 1995-1997 TooLs GmbH.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by TooLs GmbH.
* 4. The name of TooLs GmbH may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _POWERPC_CPU_H_
#define _POWERPC_CPU_H_
struct cache_info {
int dcache_size;
int dcache_line_size;
int icache_size;
int icache_line_size;
};
#if defined(_KERNEL) || defined(_KMEMUSER)
#if defined(_KERNEL_OPT)
#include "opt_gprof.h"
#include "opt_modular.h"
#include "opt_multiprocessor.h"
#include "opt_ppcarch.h"
#endif
#ifdef _KERNEL
#include <sys/intr.h>
#include <sys/device_if.h>
#include <sys/evcnt.h>
#include <sys/param.h>
#include <sys/kernel.h>
#endif
#include <sys/cpu_data.h>
#ifdef _KERNEL
#define CI_SAVETEMP (0*CPUSAVE_LEN)
#define CI_SAVEDDB (1*CPUSAVE_LEN)
#define CI_SAVEIPKDB (2*CPUSAVE_LEN) /* obsolete */
#define CI_SAVEMMU (3*CPUSAVE_LEN)
#define CI_SAVEMAX (4*CPUSAVE_LEN)
#define CPUSAVE_LEN 8
#if defined(PPC_BOOKE) && !defined(MODULAR) && !defined(_MODULE)
#define CPUSAVE_SIZE 128
#else
#define CPUSAVE_SIZE (CI_SAVEMAX*CPUSAVE_LEN)
CTASSERT(CPUSAVE_SIZE >= 128);
#endif
#define CPUSAVE_R28 0 /* where r28 gets saved */
#define CPUSAVE_R29 1 /* where r29 gets saved */
#define CPUSAVE_R30 2 /* where r30 gets saved */
#define CPUSAVE_R31 3 /* where r31 gets saved */
#define CPUSAVE_DEAR 4 /* where IBM4XX SPR_DEAR gets saved */
#define CPUSAVE_DAR 4 /* where OEA SPR_DAR gets saved */
#define CPUSAVE_ESR 5 /* where IBM4XX SPR_ESR gets saved */
#define CPUSAVE_DSISR 5 /* where OEA SPR_DSISR gets saved */
#define CPUSAVE_SRR0 6 /* where SRR0 gets saved */
#define CPUSAVE_SRR1 7 /* where SRR1 gets saved */
#endif /* _KERNEL */
struct cpu_info {
struct cpu_data ci_data; /* MI per-cpu data */
#ifdef _KERNEL
device_t ci_dev; /* device of corresponding cpu */
struct cpu_softc *ci_softc; /* private cpu info */
struct lwp *ci_curlwp; /* current owner of the processor */
struct lwp *ci_onproc; /* current user LWP / kthread */
struct pcb *ci_curpcb;
struct pmap *ci_curpm;
#if defined(PPC_OEA) || defined(PPC_OEA601) || defined(PPC_OEA64) || \
defined(PPC_OEA64_BRIDGE) || defined(MODULAR) || defined(_MODULE)
void *ci_battable; /* BAT table in use by this CPU */
#endif
struct lwp *ci_softlwps[SOFTINT_COUNT];
int ci_cpuid; /* from SPR_PIR */
int ci_want_resched;
volatile uint64_t ci_lastintr;
volatile u_long ci_lasttb;
volatile int ci_tickspending;
volatile int ci_cpl;
volatile int ci_iactive;
volatile int ci_idepth;
union {
#if !defined(PPC_BOOKE) && !defined(_MODULE)
volatile imask_t un1_ipending;
#define ci_ipending ci_un1.un1_ipending
#endif
uint64_t un1_pad64;
} ci_un1;
volatile uint32_t ci_pending_ipis;
int ci_mtx_oldspl;
int ci_mtx_count;
#if defined(PPC_IBM4XX) || \
((defined(MODULAR) || defined(_MODULE)) && !defined(_LP64))
char *ci_intstk;
#endif
register_t ci_savearea[CPUSAVE_SIZE];
#if defined(PPC_BOOKE) || \
((defined(MODULAR) || defined(_MODULE)) && !defined(_LP64))
uint32_t ci_pmap_asid_cur;
union pmap_segtab *ci_pmap_segtabs[2];
#define ci_pmap_kern_segtab ci_pmap_segtabs[0]
#define ci_pmap_user_segtab ci_pmap_segtabs[1]
struct pmap_tlb_info *ci_tlb_info;
#endif /* PPC_BOOKE || ((MODULAR || _MODULE) && !_LP64) */
struct cache_info ci_ci;
void *ci_sysmon_cookie;
void (*ci_idlespin)(void);
uint32_t ci_khz;
struct evcnt ci_ev_clock; /* clock intrs */
struct evcnt ci_ev_statclock; /* stat clock */
struct evcnt ci_ev_traps; /* calls to trap() */
struct evcnt ci_ev_kdsi; /* kernel DSI traps */
struct evcnt ci_ev_udsi; /* user DSI traps */
struct evcnt ci_ev_udsi_fatal; /* user DSI trap failures */
struct evcnt ci_ev_kisi; /* kernel ISI traps */
struct evcnt ci_ev_isi; /* user ISI traps */
struct evcnt ci_ev_isi_fatal; /* user ISI trap failures */
struct evcnt ci_ev_pgm; /* user PGM traps */
struct evcnt ci_ev_debug; /* user debug traps */
struct evcnt ci_ev_fpu; /* FPU traps */
struct evcnt ci_ev_fpusw; /* FPU context switch */
struct evcnt ci_ev_ali; /* Alignment traps */
struct evcnt ci_ev_ali_fatal; /* Alignment fatal trap */
struct evcnt ci_ev_scalls; /* system call traps */
struct evcnt ci_ev_vec; /* Altivec traps */
struct evcnt ci_ev_vecsw; /* Altivec context switches */
struct evcnt ci_ev_umchk; /* user MCHK events */
struct evcnt ci_ev_ipi; /* IPIs received */
struct evcnt ci_ev_tlbmiss_soft; /* tlb miss (no trap) */
struct evcnt ci_ev_dtlbmiss_hard; /* data tlb miss (trap) */
struct evcnt ci_ev_itlbmiss_hard; /* instruction tlb miss (trap) */
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
#endif /* _KERNEL */
};
#endif /* _KERNEL || _KMEMUSER */
#ifdef _KERNEL
#if defined(MULTIPROCESSOR) && !defined(_MODULE)
struct cpu_hatch_data {
int hatch_running;
device_t hatch_self;
struct cpu_info *hatch_ci;
uint32_t hatch_tbu;
uint32_t hatch_tbl;
#if defined(PPC_OEA64_BRIDGE) || defined (_ARCH_PPC64)
uint64_t hatch_hid0;
uint64_t hatch_hid1;
uint64_t hatch_hid4;
uint64_t hatch_hid5;
#else
uint32_t hatch_hid0;
#endif
uint32_t hatch_pir;
#if defined(PPC_OEA) || defined(PPC_OEA64_BRIDGE)
uintptr_t hatch_asr;
uintptr_t hatch_sdr1;
uint32_t hatch_sr[16];
uintptr_t hatch_ibatu[8], hatch_ibatl[8];
uintptr_t hatch_dbatu[8], hatch_dbatl[8];
#endif
#if defined(PPC_BOOKE)
vaddr_t hatch_sp;
u_int hatch_tlbidx;
#endif
};
struct cpuset_info {
kcpuset_t *cpus_running;
kcpuset_t *cpus_hatched;
kcpuset_t *cpus_paused;
kcpuset_t *cpus_resumed;
kcpuset_t *cpus_halted;
};
extern struct cpuset_info cpuset_info;
#endif /* MULTIPROCESSOR && !_MODULE */
#if defined(MULTIPROCESSOR) || defined(_MODULE)
#define cpu_number() (curcpu()->ci_index + 0)
#define CPU_IS_PRIMARY(ci) ((ci)->ci_cpuid == 0)
#define CPU_INFO_ITERATOR int
#define CPU_INFO_FOREACH(cii, ci) \
cii = 0, ci = &cpu_info[0]; cii < (ncpu ? ncpu : 1); cii++, ci++
#else
#define cpu_number() 0
#define CPU_IS_PRIMARY(ci) true
#define CPU_INFO_ITERATOR int
#define CPU_INFO_FOREACH(cii, ci) \
(void)cii, ci = curcpu(); ci != NULL; ci = NULL
#endif /* MULTIPROCESSOR || _MODULE */
extern struct cpu_info cpu_info[];
static __inline struct cpu_info * curcpu(void) __pure;
static __inline struct cpu_info *
curcpu(void)
{
struct cpu_info *ci;
__asm volatile ("mfsprg0 %0" : "=r"(ci));
return ci;
}
#ifdef __clang__
#define curlwp (curcpu()->ci_curlwp)
#else
register struct lwp *powerpc_curlwp __asm("r13");
#define curlwp powerpc_curlwp
#endif
#define curpcb (curcpu()->ci_curpcb)
#define curpm (curcpu()->ci_curpm)
static __inline register_t
mfmsr(void)
{
register_t msr;
__asm volatile ("mfmsr %0" : "=r"(msr));
return msr;
}
static __inline void
mtmsr(register_t msr)
{
//KASSERT(msr & PSL_CE);
//KASSERT(msr & PSL_DE);
__asm volatile ("mtmsr %0" : : "r"(msr));
}
#if !defined(_MODULE)
static __inline uint32_t
mftbl(void)
{
uint32_t tbl;
__asm volatile (
#ifdef PPC_IBM403
" mftblo %[tbl]" "\n"
#elif defined(PPC_BOOKE)
" mfspr %[tbl],268" "\n"
#else
" mftbl %[tbl]" "\n"
#endif
: [tbl] "=r" (tbl));
return tbl;
}
static __inline uint64_t
mftb(void)
{
uint64_t tb;
#ifdef _ARCH_PPC64
__asm volatile ("mftb %0" : "=r"(tb));
#else
int tmp;
__asm volatile (
#ifdef PPC_IBM403
"1: mftbhi %[tb]" "\n"
" mftblo %L[tb]" "\n"
" mftbhi %[tmp]" "\n"
#elif defined(PPC_BOOKE)
"1: mfspr %[tb],269" "\n"
" mfspr %L[tb],268" "\n"
" mfspr %[tmp],269" "\n"
#else
"1: mftbu %[tb]" "\n"
" mftb %L[tb]" "\n"
" mftbu %[tmp]" "\n"
#endif
" cmplw %[tb],%[tmp]" "\n"
" bne- 1b" "\n"
: [tb] "=r" (tb), [tmp] "=r"(tmp)
:: "cr0");
#endif
return tb;
}
static __inline uint32_t
mfrtcl(void)
{
uint32_t rtcl;
__asm volatile ("mfrtcl %0" : "=r"(rtcl));
return rtcl;
}
static __inline void
mfrtc(uint32_t *rtcp)
{
uint32_t tmp;
__asm volatile (
"1: mfrtcu %[rtcu]" "\n"
" mfrtcl %[rtcl]" "\n"
" mfrtcu %[tmp]" "\n"
" cmplw %[rtcu],%[tmp]" "\n"
" bne- 1b"
: [rtcu] "=r"(rtcp[0]), [rtcl] "=r"(rtcp[1]), [tmp] "=r"(tmp)
:: "cr0");
}
static __inline uint64_t
rtc_nanosecs(void)
{
/*
* 601 RTC/DEC registers share clock of 7.8125 MHz, 128 ns per tick.
* DEC has max of 25 bits, FFFFFF => 2.14748352 seconds.
* RTCU is seconds, 32 bits.
* RTCL is nano-seconds, 23 bit counter from 0 - 999,999,872 (999,999,999 - 128 ns)
*/
uint64_t cycles;
uint32_t tmp[2];
mfrtc(tmp);
cycles = tmp[0] * 1000000000;
cycles += (tmp[1] >> 7);
return cycles;
}
#endif /* !_MODULE */
static __inline uint32_t
mfpvr(void)
{
uint32_t pvr;
__asm volatile ("mfpvr %0" : "=r"(pvr));
return (pvr);
}
#ifdef _MODULE
extern const char __CPU_MAXNUM;
/*
* Make with 0xffff to force a R_PPC_ADDR16_LO without the
* corresponding R_PPC_ADDR16_HI relocation.
*/
#define CPU_MAXNUM (((uintptr_t)&__CPU_MAXNUM)&0xffff)
#endif /* _MODULE */
#if !defined(_MODULE)
extern char *booted_kernel;
extern int powersave;
extern int cpu_timebase;
extern int cpu_printfataltraps;
struct cpu_info *
cpu_attach_common(device_t, int);
void cpu_setup(device_t, struct cpu_info *);
void cpu_identify(char *, size_t);
void cpu_probe_cache(void);
void dcache_wb_page(vaddr_t);
void dcache_wbinv_page(vaddr_t);
void dcache_inv_page(vaddr_t);
void dcache_zero_page(vaddr_t);
void icache_inv_page(vaddr_t);
void dcache_wb(vaddr_t, vsize_t);
void dcache_wbinv(vaddr_t, vsize_t);
void dcache_inv(vaddr_t, vsize_t);
void icache_inv(vaddr_t, vsize_t);
void * mapiodev(paddr_t, psize_t, bool);
void unmapiodev(vaddr_t, vsize_t);
int emulate_mxmsr(struct lwp *, struct trapframe *, uint32_t);
#ifdef MULTIPROCESSOR
int md_setup_trampoline(volatile struct cpu_hatch_data *,
struct cpu_info *);
void md_presync_timebase(volatile struct cpu_hatch_data *);
void md_start_timebase(volatile struct cpu_hatch_data *);
void md_sync_timebase(volatile struct cpu_hatch_data *);
void md_setup_interrupts(void);
int cpu_spinup(device_t, struct cpu_info *);
register_t
cpu_hatch(void);
void cpu_spinup_trampoline(void);
void cpu_boot_secondary_processors(void);
void cpu_halt(void);
void cpu_halt_others(void);
void cpu_pause(struct trapframe *);
void cpu_pause_others(void);
void cpu_resume(cpuid_t);
void cpu_resume_others(void);
int cpu_is_paused(int);
void cpu_debug_dump(void);
#endif /* MULTIPROCESSOR */
#endif /* !_MODULE */
#define cpu_proc_fork(p1, p2)
#ifndef __HIDE_DELAY
#define DELAY(n) delay(n)
void delay(unsigned int);
#endif /* __HIDE_DELAY */
#define CLKF_USERMODE(cf) cpu_clkf_usermode(cf)
#define CLKF_PC(cf) cpu_clkf_pc(cf)
#define CLKF_INTR(cf) cpu_clkf_intr(cf)
bool cpu_clkf_usermode(const struct clockframe *);
vaddr_t cpu_clkf_pc(const struct clockframe *);
bool cpu_clkf_intr(const struct clockframe *);
#define LWP_PC(l) cpu_lwp_pc(l)
vaddr_t cpu_lwp_pc(struct lwp *);
void cpu_ast(struct lwp *, struct cpu_info *);
void * cpu_uarea_alloc(bool);
bool cpu_uarea_free(void *);
void cpu_signotify(struct lwp *);
void cpu_need_proftick(struct lwp *);
void cpu_fixup_stubs(void);
#if !defined(PPC_IBM4XX) && !defined(PPC_BOOKE) && !defined(_MODULE)
int cpu_get_dfs(void);
void cpu_set_dfs(int);
void oea_init(void (*)(void));
void oea_startup(const char *);
void oea_dumpsys(void);
void oea_install_extint(void (*)(void));
paddr_t kvtop(void *);
extern paddr_t msgbuf_paddr;
extern int cpu_altivec;
#endif
#endif /* _KERNEL */
/* XXX The below breaks unified pmap on ppc32 */
#if !defined(CACHELINESIZE) && !defined(_MODULE) \
&& (defined(_KERNEL) || defined(_STANDALONE))
#if defined(PPC_IBM403)
#define CACHELINESIZE 16
#define MAXCACHELINESIZE 16
#elif defined (PPC_OEA64_BRIDGE)
#define CACHELINESIZE 128
#define MAXCACHELINESIZE 128
#else
#define CACHELINESIZE 32
#define MAXCACHELINESIZE 32
#endif /* PPC_OEA64_BRIDGE */
#endif
void __syncicache(void *, size_t);
/*
* CTL_MACHDEP definitions.
*/
#define CPU_CACHELINE 1
#define CPU_TIMEBASE 2
#define CPU_CPUTEMP 3
#define CPU_PRINTFATALTRAPS 4
#define CPU_CACHEINFO 5
#define CPU_ALTIVEC 6
#define CPU_MODEL 7
#define CPU_POWERSAVE 8 /* int: use CPU powersave mode */
#define CPU_BOOTED_DEVICE 9 /* string: device we booted from */
#define CPU_BOOTED_KERNEL 10 /* string: kernel we booted */
#define CPU_EXECPROT 11 /* bool: PROT_EXEC works */
#define CPU_FPU 12
#endif /* _POWERPC_CPU_H_ */