version 1.4, 2007/08/04 11:03:02 |
version 1.4.2.1, 2007/11/06 23:32:08 |
Line 96 __KERNEL_RCSID(0, "$NetBSD$"); |
|
Line 96 __KERNEL_RCSID(0, "$NetBSD$"); |
|
#include <sys/kauth.h> |
#include <sys/kauth.h> |
#include <sys/lockdebug.h> |
#include <sys/lockdebug.h> |
#include <sys/kmem.h> |
#include <sys/kmem.h> |
|
#include <sys/intr.h> |
|
|
#include <uvm/uvm_extern.h> |
#include <uvm/uvm_extern.h> |
|
|
/* |
/* |
* Run queues. |
* Run queues. |
* |
* |
* We have 32 run queues in descending priority of 0..31. We maintain |
* We maintain bitmasks of non-empty queues in order speed up finding |
* a bitmask of non-empty queues in order speed up finding the first |
* the first runnable process. Since there can be (by definition) few |
* runnable process. The bitmask is maintained only by machine-dependent |
* real time LWPs in the the system, we maintain them on a linked list, |
* code, allowing the most efficient instructions to be used to find the |
* sorted by priority. |
* first non-empty queue. |
|
*/ |
*/ |
|
|
#define RUNQUE_NQS 32 /* number of runqueues */ |
#define PPB_SHIFT 5 |
#define PPQ (128 / RUNQUE_NQS) /* priorities per queue */ |
#define PPB_MASK 31 |
|
|
typedef struct subqueue { |
#define NUM_Q (NPRI_KERNEL + NPRI_USER) |
TAILQ_HEAD(, lwp) sq_queue; |
#define NUM_PPB (1 << PPB_SHIFT) |
} subqueue_t; |
#define NUM_B (NUM_Q / NUM_PPB) |
|
|
typedef struct runqueue { |
typedef struct runqueue { |
subqueue_t rq_subqueues[RUNQUE_NQS]; /* run queues */ |
TAILQ_HEAD(, lwp) rq_fixedpri; /* realtime, kthread */ |
uint32_t rq_bitmap; /* bitmap of non-empty queues */ |
u_int rq_count; /* total # jobs */ |
|
uint32_t rq_bitmap[NUM_B]; /* bitmap of queues */ |
|
TAILQ_HEAD(, lwp) rq_queue[NUM_Q]; /* user+kernel */ |
} runqueue_t; |
} runqueue_t; |
|
|
static runqueue_t global_queue; |
static runqueue_t global_queue; |
|
|
static void updatepri(struct lwp *); |
static void updatepri(struct lwp *); |
static void resetpriority(struct lwp *); |
static void resetpriority(struct lwp *); |
static void resetprocpriority(struct proc *); |
|
|
fixpt_t decay_cpu(fixpt_t, fixpt_t); |
|
|
extern unsigned int sched_pstats_ticks; /* defined in kern_synch.c */ |
extern unsigned int sched_pstats_ticks; /* defined in kern_synch.c */ |
|
|
Line 133 kmutex_t sched_mutex; |
|
Line 138 kmutex_t sched_mutex; |
|
/* Number of hardclock ticks per sched_tick() */ |
/* Number of hardclock ticks per sched_tick() */ |
int rrticks; |
int rrticks; |
|
|
|
const int schedppq = 1; |
|
|
/* |
/* |
* Force switch among equal priority processes every 100ms. |
* Force switch among equal priority processes every 100ms. |
* Called from hardclock every hz/10 == rrticks hardclock ticks. |
* Called from hardclock every hz/10 == rrticks hardclock ticks. |
|
* |
|
* There's no need to lock anywhere in this routine, as it's |
|
* CPU-local and runs at IPL_SCHED (called from clock interrupt). |
*/ |
*/ |
/* ARGSUSED */ |
/* ARGSUSED */ |
void |
void |
Line 145 sched_tick(struct cpu_info *ci) |
|
Line 155 sched_tick(struct cpu_info *ci) |
|
|
|
spc->spc_ticks = rrticks; |
spc->spc_ticks = rrticks; |
|
|
spc_lock(ci); |
if (CURCPU_IDLE_P()) |
if (!CURCPU_IDLE_P()) { |
return; |
if (spc->spc_flags & SPCF_SEENRR) { |
|
/* |
|
* The process has already been through a roundrobin |
|
* without switching and may be hogging the CPU. |
|
* Indicate that the process should yield. |
|
*/ |
|
spc->spc_flags |= SPCF_SHOULDYIELD; |
|
} else |
|
spc->spc_flags |= SPCF_SEENRR; |
|
} |
|
cpu_need_resched(curcpu(), 0); |
|
spc_unlock(ci); |
|
} |
|
|
|
#define NICE_WEIGHT 2 /* priorities per nice level */ |
if (spc->spc_flags & SPCF_SEENRR) { |
|
/* |
|
* The process has already been through a roundrobin |
|
* without switching and may be hogging the CPU. |
|
* Indicate that the process should yield. |
|
*/ |
|
spc->spc_flags |= SPCF_SHOULDYIELD; |
|
} else |
|
spc->spc_flags |= SPCF_SEENRR; |
|
|
|
cpu_need_resched(ci, 0); |
|
} |
|
|
|
/* |
|
* Why PRIO_MAX - 2? From setpriority(2): |
|
* |
|
* prio is a value in the range -20 to 20. The default priority is |
|
* 0; lower priorities cause more favorable scheduling. A value of |
|
* 19 or 20 will schedule a process only when nothing at priority <= |
|
* 0 is runnable. |
|
* |
|
* This gives estcpu influence over 18 priority levels, and leaves nice |
|
* with 40 levels. One way to think about it is that nice has 20 levels |
|
* either side of estcpu's 18. |
|
*/ |
#define ESTCPU_SHIFT 11 |
#define ESTCPU_SHIFT 11 |
#define ESTCPU_MAX ((NICE_WEIGHT * PRIO_MAX - PPQ) << ESTCPU_SHIFT) |
#define ESTCPU_MAX ((PRIO_MAX - 2) << ESTCPU_SHIFT) |
|
#define ESTCPU_ACCUM (1 << (ESTCPU_SHIFT - 1)) |
#define ESTCPULIM(e) min((e), ESTCPU_MAX) |
#define ESTCPULIM(e) min((e), ESTCPU_MAX) |
|
|
/* |
/* |
* Constants for digital decay and forget: |
* Constants for digital decay and forget: |
* 90% of (p_estcpu) usage in 5 * loadav time |
* 90% of (l_estcpu) usage in 5 * loadav time |
* 95% of (p_pctcpu) usage in 60 seconds (load insensitive) |
* 95% of (l_pctcpu) usage in 60 seconds (load insensitive) |
* Note that, as ps(1) mentions, this can let percentages |
* Note that, as ps(1) mentions, this can let percentages |
* total over 100% (I've seen 137.9% for 3 processes). |
* total over 100% (I've seen 137.9% for 3 processes). |
* |
* |
* Note that hardclock updates p_estcpu and p_cpticks independently. |
* Note that hardclock updates l_estcpu and l_cpticks independently. |
* |
* |
* We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. |
* We wish to decay away 90% of l_estcpu in (5 * loadavg) seconds. |
* That is, the system wants to compute a value of decay such |
* That is, the system wants to compute a value of decay such |
* that the following for loop: |
* that the following for loop: |
* for (i = 0; i < (5 * loadavg); i++) |
* for (i = 0; i < (5 * loadavg); i++) |
* p_estcpu *= decay; |
* l_estcpu *= decay; |
* will compute |
* will compute |
* p_estcpu *= 0.1; |
* l_estcpu *= 0.1; |
* for all values of loadavg: |
* for all values of loadavg: |
* |
* |
* Mathematically this loop can be expressed by saying: |
* Mathematically this loop can be expressed by saying: |
Line 234 sched_tick(struct cpu_info *ci) |
|
Line 255 sched_tick(struct cpu_info *ci) |
|
/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ |
/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ |
#define loadfactor(loadav) (2 * (loadav)) |
#define loadfactor(loadav) (2 * (loadav)) |
|
|
static fixpt_t |
fixpt_t |
decay_cpu(fixpt_t loadfac, fixpt_t estcpu) |
decay_cpu(fixpt_t loadfac, fixpt_t estcpu) |
{ |
{ |
|
|
Line 254 decay_cpu(fixpt_t loadfac, fixpt_t estcp |
|
Line 275 decay_cpu(fixpt_t loadfac, fixpt_t estcp |
|
} |
} |
|
|
/* |
/* |
* For all load averages >= 1 and max p_estcpu of (255 << ESTCPU_SHIFT), |
* For all load averages >= 1 and max l_estcpu of (255 << ESTCPU_SHIFT), |
* sleeping for at least seven times the loadfactor will decay p_estcpu to |
* sleeping for at least seven times the loadfactor will decay l_estcpu to |
* less than (1 << ESTCPU_SHIFT). |
* less than (1 << ESTCPU_SHIFT). |
* |
* |
* note that our ESTCPU_MAX is actually much smaller than (255 << ESTCPU_SHIFT). |
* note that our ESTCPU_MAX is actually much smaller than (255 << ESTCPU_SHIFT). |
Line 282 decay_cpu_batch(fixpt_t loadfac, fixpt_t |
|
Line 303 decay_cpu_batch(fixpt_t loadfac, fixpt_t |
|
* Periodically called from sched_pstats(); used to recalculate priorities. |
* Periodically called from sched_pstats(); used to recalculate priorities. |
*/ |
*/ |
void |
void |
sched_pstats_hook(struct proc *p, int minslp) |
sched_pstats_hook(struct lwp *l) |
{ |
{ |
struct lwp *l; |
fixpt_t loadfac; |
fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); |
int sleeptm; |
|
|
/* |
/* |
* If the process has slept the entire second, |
* If the LWP has slept an entire second, stop recalculating |
* stop recalculating its priority until it wakes up. |
* its priority until it wakes up. |
*/ |
*/ |
if (minslp <= 1) { |
if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP || |
p->p_estcpu = decay_cpu(loadfac, p->p_estcpu); |
l->l_stat == LSSUSPENDED) { |
|
l->l_slptime++; |
LIST_FOREACH(l, &p->p_lwps, l_sibling) { |
sleeptm = 1; |
if ((l->l_flag & LW_IDLE) != 0) |
} else { |
continue; |
sleeptm = 0x7fffffff; |
lwp_lock(l); |
} |
if (l->l_slptime <= 1 && l->l_priority >= PUSER) |
|
resetpriority(l); |
if (l->l_slptime <= sleeptm) { |
lwp_unlock(l); |
loadfac = 2 * (averunnable.ldavg[0]); |
} |
l->l_estcpu = decay_cpu(loadfac, l->l_estcpu); |
|
resetpriority(l); |
} |
} |
} |
} |
|
|
Line 311 sched_pstats_hook(struct proc *p, int mi |
|
Line 333 sched_pstats_hook(struct proc *p, int mi |
|
static void |
static void |
updatepri(struct lwp *l) |
updatepri(struct lwp *l) |
{ |
{ |
struct proc *p = l->l_proc; |
|
fixpt_t loadfac; |
fixpt_t loadfac; |
|
|
KASSERT(lwp_locked(l, NULL)); |
KASSERT(lwp_locked(l, NULL)); |
Line 320 updatepri(struct lwp *l) |
|
Line 341 updatepri(struct lwp *l) |
|
loadfac = loadfactor(averunnable.ldavg[0]); |
loadfac = loadfactor(averunnable.ldavg[0]); |
|
|
l->l_slptime--; /* the first time was done in sched_pstats */ |
l->l_slptime--; /* the first time was done in sched_pstats */ |
/* XXX NJWLWP */ |
l->l_estcpu = decay_cpu_batch(loadfac, l->l_estcpu, l->l_slptime); |
/* XXXSMP occasionally unlocked, should be per-LWP */ |
|
p->p_estcpu = decay_cpu_batch(loadfac, p->p_estcpu, l->l_slptime); |
|
resetpriority(l); |
resetpriority(l); |
} |
} |
|
|
/* |
|
* On some architectures, it's faster to use a MSB ordering for the priorites |
|
* than the traditional LSB ordering. |
|
*/ |
|
#define RQMASK(n) (0x00000001 << (n)) |
|
|
|
/* |
|
* The primitives that manipulate the run queues. whichqs tells which |
|
* of the 32 queues qs have processes in them. sched_enqueue() puts processes |
|
* into queues, sched_dequeue removes them from queues. The running process is |
|
* on no queue, other processes are on a queue related to p->p_priority, |
|
* divided by 4 actually to shrink the 0-127 range of priorities into the 32 |
|
* available queues. |
|
*/ |
|
#ifdef RQDEBUG |
|
static void |
|
runqueue_check(const runqueue_t *rq, int whichq, struct lwp *l) |
|
{ |
|
const subqueue_t * const sq = &rq->rq_subqueues[whichq]; |
|
const uint32_t bitmap = rq->rq_bitmap; |
|
struct lwp *l2; |
|
int found = 0; |
|
int die = 0; |
|
int empty = 1; |
|
|
|
TAILQ_FOREACH(l2, &sq->sq_queue, l_runq) { |
|
if (l2->l_stat != LSRUN) { |
|
printf("runqueue_check[%d]: lwp %p state (%d) " |
|
" != LSRUN\n", whichq, l2, l2->l_stat); |
|
} |
|
if (l2 == l) |
|
found = 1; |
|
empty = 0; |
|
} |
|
if (empty && (bitmap & RQMASK(whichq)) != 0) { |
|
printf("runqueue_check[%d]: bit set for empty run-queue %p\n", |
|
whichq, rq); |
|
die = 1; |
|
} else if (!empty && (bitmap & RQMASK(whichq)) == 0) { |
|
printf("runqueue_check[%d]: bit clear for non-empty " |
|
"run-queue %p\n", whichq, rq); |
|
die = 1; |
|
} |
|
if (l != NULL && (bitmap & RQMASK(whichq)) == 0) { |
|
printf("runqueue_check[%d]: bit clear for active lwp %p\n", |
|
whichq, l); |
|
die = 1; |
|
} |
|
if (l != NULL && empty) { |
|
printf("runqueue_check[%d]: empty run-queue %p with " |
|
"active lwp %p\n", whichq, rq, l); |
|
die = 1; |
|
} |
|
if (l != NULL && !found) { |
|
printf("runqueue_check[%d]: lwp %p not in runqueue %p!", |
|
whichq, l, rq); |
|
die = 1; |
|
} |
|
if (die) |
|
panic("runqueue_check: inconsistency found"); |
|
} |
|
#else /* RQDEBUG */ |
|
#define runqueue_check(a, b, c) /* nothing */ |
|
#endif /* RQDEBUG */ |
|
|
|
static void |
static void |
runqueue_init(runqueue_t *rq) |
runqueue_init(runqueue_t *rq) |
{ |
{ |
int i; |
int i; |
|
|
for (i = 0; i < RUNQUE_NQS; i++) |
for (i = 0; i < NUM_Q; i++) |
TAILQ_INIT(&rq->rq_subqueues[i].sq_queue); |
TAILQ_INIT(&rq->rq_queue[i]); |
|
for (i = 0; i < NUM_B; i++) |
|
rq->rq_bitmap[i] = 0; |
|
TAILQ_INIT(&rq->rq_fixedpri); |
|
rq->rq_count = 0; |
} |
} |
|
|
static void |
static void |
runqueue_enqueue(runqueue_t *rq, struct lwp *l) |
runqueue_enqueue(runqueue_t *rq, struct lwp *l) |
{ |
{ |
subqueue_t *sq; |
pri_t pri; |
const int whichq = lwp_eprio(l) / PPQ; |
lwp_t *l2; |
|
|
KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex)); |
KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex)); |
|
|
runqueue_check(rq, whichq, NULL); |
pri = lwp_eprio(l); |
rq->rq_bitmap |= RQMASK(whichq); |
rq->rq_count++; |
sq = &rq->rq_subqueues[whichq]; |
|
TAILQ_INSERT_TAIL(&sq->sq_queue, l, l_runq); |
if (pri >= PRI_KTHREAD) { |
runqueue_check(rq, whichq, l); |
TAILQ_FOREACH(l2, &rq->rq_fixedpri, l_runq) { |
|
if (lwp_eprio(l2) < pri) { |
|
TAILQ_INSERT_BEFORE(l2, l, l_runq); |
|
return; |
|
} |
|
} |
|
TAILQ_INSERT_TAIL(&rq->rq_fixedpri, l, l_runq); |
|
return; |
|
} |
|
|
|
rq->rq_bitmap[pri >> PPB_SHIFT] |= |
|
(0x80000000U >> (pri & PPB_MASK)); |
|
TAILQ_INSERT_TAIL(&rq->rq_queue[pri], l, l_runq); |
} |
} |
|
|
static void |
static void |
runqueue_dequeue(runqueue_t *rq, struct lwp *l) |
runqueue_dequeue(runqueue_t *rq, struct lwp *l) |
{ |
{ |
subqueue_t *sq; |
pri_t pri; |
const int whichq = lwp_eprio(l) / PPQ; |
|
|
|
KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex)); |
KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex)); |
|
|
runqueue_check(rq, whichq, l); |
pri = lwp_eprio(l); |
KASSERT((rq->rq_bitmap & RQMASK(whichq)) != 0); |
rq->rq_count--; |
sq = &rq->rq_subqueues[whichq]; |
|
TAILQ_REMOVE(&sq->sq_queue, l, l_runq); |
if (pri >= PRI_KTHREAD) { |
if (TAILQ_EMPTY(&sq->sq_queue)) |
TAILQ_REMOVE(&rq->rq_fixedpri, l, l_runq); |
rq->rq_bitmap &= ~RQMASK(whichq); |
return; |
runqueue_check(rq, whichq, NULL); |
} |
|
|
|
TAILQ_REMOVE(&rq->rq_queue[pri], l, l_runq); |
|
if (TAILQ_EMPTY(&rq->rq_queue[pri])) |
|
rq->rq_bitmap[pri >> PPB_SHIFT] ^= |
|
(0x80000000U >> (pri & PPB_MASK)); |
} |
} |
|
|
|
#if (NUM_B != 3) || (NUM_Q != 96) |
|
#error adjust runqueue_nextlwp |
|
#endif |
|
|
static struct lwp * |
static struct lwp * |
runqueue_nextlwp(runqueue_t *rq) |
runqueue_nextlwp(runqueue_t *rq) |
{ |
{ |
const uint32_t bitmap = rq->rq_bitmap; |
pri_t pri; |
int whichq; |
|
|
|
if (bitmap == 0) { |
KASSERT(rq->rq_count != 0); |
return NULL; |
|
} |
if (!TAILQ_EMPTY(&rq->rq_fixedpri)) |
whichq = ffs(bitmap) - 1; |
return TAILQ_FIRST(&rq->rq_fixedpri); |
return TAILQ_FIRST(&rq->rq_subqueues[whichq].sq_queue); |
|
|
if (rq->rq_bitmap[2] != 0) |
|
pri = 96 - ffs(rq->rq_bitmap[2]); |
|
else if (rq->rq_bitmap[1] != 0) |
|
pri = 64 - ffs(rq->rq_bitmap[1]); |
|
else |
|
pri = 32 - ffs(rq->rq_bitmap[0]); |
|
return TAILQ_FIRST(&rq->rq_queue[pri]); |
} |
} |
|
|
#if defined(DDB) |
#if defined(DDB) |
static void |
static void |
runqueue_print(const runqueue_t *rq, void (*pr)(const char *, ...)) |
runqueue_print(const runqueue_t *rq, void (*pr)(const char *, ...)) |
{ |
{ |
const uint32_t bitmap = rq->rq_bitmap; |
CPU_INFO_ITERATOR cii; |
struct lwp *l; |
struct cpu_info *ci; |
int i, first; |
lwp_t *l; |
|
int i; |
|
|
for (i = 0; i < RUNQUE_NQS; i++) { |
printf("PID\tLID\tPRI\tIPRI\tEPRI\tLWP\t\t NAME\n"); |
const subqueue_t *sq; |
|
first = 1; |
TAILQ_FOREACH(l, &rq->rq_fixedpri, l_runq) { |
sq = &rq->rq_subqueues[i]; |
(*pr)("%d\t%d\%d\t%d\t%d\t%016lx %s\n", |
TAILQ_FOREACH(l, &sq->sq_queue, l_runq) { |
l->l_proc->p_pid, l->l_lid, (int)l->l_priority, |
if (first) { |
(int)l->l_inheritedprio, lwp_eprio(l), |
(*pr)("%c%d", |
(long)l, l->l_proc->p_comm); |
(bitmap & RQMASK(i)) ? ' ' : '!', i); |
} |
first = 0; |
|
} |
for (i = NUM_Q - 1; i >= 0; i--) { |
(*pr)("\t%d.%d (%s) pri=%d usrpri=%d\n", |
TAILQ_FOREACH(l, &rq->rq_queue[i], l_runq) { |
l->l_proc->p_pid, |
(*pr)("%d\t%d\t%d\t%d\t%d\t%016lx %s\n", |
l->l_lid, l->l_proc->p_comm, |
l->l_proc->p_pid, l->l_lid, (int)l->l_priority, |
(int)l->l_priority, (int)l->l_usrpri); |
(int)l->l_inheritedprio, lwp_eprio(l), |
|
(long)l, l->l_proc->p_comm); |
} |
} |
} |
} |
|
|
|
printf("CPUIDX\tRESCHED\tCURPRI\tFLAGS\n"); |
|
for (CPU_INFO_FOREACH(cii, ci)) { |
|
printf("%d\t%d\t%d\t%04x\n", (int)ci->ci_index, |
|
(int)ci->ci_want_resched, |
|
(int)ci->ci_schedstate.spc_curpriority, |
|
(int)ci->ci_schedstate.spc_flags); |
|
} |
|
|
|
printf("NEXTLWP\n%016lx\n", (long)sched_nextlwp()); |
} |
} |
#endif /* defined(DDB) */ |
#endif /* defined(DDB) */ |
#undef RQMASK |
|
|
|
/* |
/* |
* Initialize the (doubly-linked) run queues |
* Initialize the (doubly-linked) run queues |
|
|
sched_curcpu_runnable_p(void) |
sched_curcpu_runnable_p(void) |
{ |
{ |
struct schedstate_percpu *spc; |
struct schedstate_percpu *spc; |
runqueue_t *rq; |
struct cpu_info *ci; |
|
int bits; |
spc = &curcpu()->ci_schedstate; |
|
rq = spc->spc_sched_info; |
|
|
|
|
ci = curcpu(); |
|
spc = &ci->ci_schedstate; |
|
#ifndef __HAVE_FAST_SOFTINTS |
|
bits = ci->ci_data.cpu_softints; |
|
bits |= ((runqueue_t *)spc->spc_sched_info)->rq_count; |
|
#else |
|
bits = ((runqueue_t *)spc->spc_sched_info)->rq_count; |
|
#endif |
if (__predict_true((spc->spc_flags & SPCF_OFFLINE) == 0)) |
if (__predict_true((spc->spc_flags & SPCF_OFFLINE) == 0)) |
return (global_queue.rq_bitmap | rq->rq_bitmap) != 0; |
bits |= global_queue.rq_count; |
return rq->rq_bitmap != 0; |
return bits != 0; |
} |
} |
|
|
void |
void |
sched_nice(struct proc *chgp, int n) |
sched_nice(struct proc *p, int n) |
{ |
{ |
|
struct lwp *l; |
|
|
|
KASSERT(mutex_owned(&p->p_smutex)); |
|
|
chgp->p_nice = n; |
p->p_nice = n; |
(void)resetprocpriority(chgp); |
LIST_FOREACH(l, &p->p_lwps, l_sibling) { |
|
lwp_lock(l); |
|
resetpriority(l); |
|
lwp_unlock(l); |
|
} |
} |
} |
|
|
/* |
/* |
* Compute the priority of a process when running in user mode. |
* Recompute the priority of an LWP. Arrange to reschedule if |
* Arrange to reschedule if the resulting priority is better |
* the resulting priority is better than that of the current LWP. |
* than that of the current process. |
|
*/ |
*/ |
static void |
static void |
resetpriority(struct lwp *l) |
resetpriority(struct lwp *l) |
{ |
{ |
unsigned int newpriority; |
pri_t pri; |
struct proc *p = l->l_proc; |
struct proc *p = l->l_proc; |
|
|
/* XXXSMP LOCK_ASSERT(mutex_owned(&p->p_stmutex)); */ |
KASSERT(lwp_locked(l, NULL)); |
LOCK_ASSERT(lwp_locked(l, NULL)); |
|
|
|
if ((l->l_flag & LW_SYSTEM) != 0) |
if (l->l_class != SCHED_OTHER) |
return; |
return; |
|
|
newpriority = PUSER + (p->p_estcpu >> ESTCPU_SHIFT) + |
/* See comments above ESTCPU_SHIFT definition. */ |
NICE_WEIGHT * (p->p_nice - NZERO); |
pri = (PRI_KERNEL - 1) - (l->l_estcpu >> ESTCPU_SHIFT) - p->p_nice; |
newpriority = min(newpriority, MAXPRI); |
pri = imax(pri, 0); |
lwp_changepri(l, newpriority); |
if (pri != l->l_priority) |
} |
lwp_changepri(l, pri); |
|
|
/* |
|
* Recompute priority for all LWPs in a process. |
|
*/ |
|
static void |
|
resetprocpriority(struct proc *p) |
|
{ |
|
struct lwp *l; |
|
|
|
KASSERT(mutex_owned(&p->p_stmutex)); |
|
|
|
LIST_FOREACH(l, &p->p_lwps, l_sibling) { |
|
lwp_lock(l); |
|
resetpriority(l); |
|
lwp_unlock(l); |
|
} |
|
} |
} |
|
|
/* |
/* |
* We adjust the priority of the current process. The priority of a process |
* We adjust the priority of the current process. The priority of a process |
* gets worse as it accumulates CPU time. The CPU usage estimator (p_estcpu) |
* gets worse as it accumulates CPU time. The CPU usage estimator (l_estcpu) |
* is increased here. The formula for computing priorities (in kern_synch.c) |
* is increased here. The formula for computing priorities (in kern_synch.c) |
* will compute a different value each time p_estcpu increases. This can |
* will compute a different value each time l_estcpu increases. This can |
* cause a switch, but unless the priority crosses a PPQ boundary the actual |
* cause a switch, but unless the priority crosses a PPQ boundary the actual |
* queue will not change. The CPU usage estimator ramps up quite quickly |
* queue will not change. The CPU usage estimator ramps up quite quickly |
* when the process is running (linearly), and decays away exponentially, at |
* when the process is running (linearly), and decays away exponentially, at |
Line 593 resetprocpriority(struct proc *p) |
|
Line 583 resetprocpriority(struct proc *p) |
|
void |
void |
sched_schedclock(struct lwp *l) |
sched_schedclock(struct lwp *l) |
{ |
{ |
struct proc *p = l->l_proc; |
|
|
if (l->l_class != SCHED_OTHER) |
|
return; |
|
|
KASSERT(!CURCPU_IDLE_P()); |
KASSERT(!CURCPU_IDLE_P()); |
mutex_spin_enter(&p->p_stmutex); |
l->l_estcpu = ESTCPULIM(l->l_estcpu + ESTCPU_ACCUM); |
p->p_estcpu = ESTCPULIM(p->p_estcpu + (1 << ESTCPU_SHIFT)); |
|
lwp_lock(l); |
lwp_lock(l); |
resetpriority(l); |
resetpriority(l); |
mutex_spin_exit(&p->p_stmutex); |
|
if ((l->l_flag & LW_SYSTEM) == 0 && l->l_priority >= PUSER) |
|
l->l_priority = l->l_usrpri; |
|
lwp_unlock(l); |
lwp_unlock(l); |
} |
} |
|
|
Line 614 sched_schedclock(struct lwp *l) |
|
Line 602 sched_schedclock(struct lwp *l) |
|
void |
void |
sched_proc_fork(struct proc *parent, struct proc *child) |
sched_proc_fork(struct proc *parent, struct proc *child) |
{ |
{ |
|
lwp_t *pl; |
|
|
KASSERT(mutex_owned(&parent->p_smutex)); |
KASSERT(mutex_owned(&parent->p_smutex)); |
|
|
child->p_estcpu = child->p_estcpu_inherited = parent->p_estcpu; |
pl = LIST_FIRST(&parent->p_lwps); |
|
child->p_estcpu_inherited = pl->l_estcpu; |
child->p_forktime = sched_pstats_ticks; |
child->p_forktime = sched_pstats_ticks; |
} |
} |
|
|
Line 631 sched_proc_exit(struct proc *parent, str |
|
Line 621 sched_proc_exit(struct proc *parent, str |
|
{ |
{ |
fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); |
fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); |
fixpt_t estcpu; |
fixpt_t estcpu; |
|
lwp_t *pl, *cl; |
|
|
/* XXX Only if parent != init?? */ |
/* XXX Only if parent != init?? */ |
|
|
mutex_spin_enter(&parent->p_stmutex); |
mutex_enter(&parent->p_smutex); |
|
pl = LIST_FIRST(&parent->p_lwps); |
|
cl = LIST_FIRST(&child->p_lwps); |
estcpu = decay_cpu_batch(loadfac, child->p_estcpu_inherited, |
estcpu = decay_cpu_batch(loadfac, child->p_estcpu_inherited, |
sched_pstats_ticks - child->p_forktime); |
sched_pstats_ticks - child->p_forktime); |
if (child->p_estcpu > estcpu) |
if (cl->l_estcpu > estcpu) { |
parent->p_estcpu = |
lwp_lock(pl); |
ESTCPULIM(parent->p_estcpu + child->p_estcpu - estcpu); |
pl->l_estcpu = ESTCPULIM(pl->l_estcpu + cl->l_estcpu - estcpu); |
mutex_spin_exit(&parent->p_stmutex); |
lwp_unlock(pl); |
|
} |
|
mutex_exit(&parent->p_smutex); |
} |
} |
|
|
void |
void |
|
|
sched_nextlwp(void) |
sched_nextlwp(void) |
{ |
{ |
struct schedstate_percpu *spc; |
struct schedstate_percpu *spc; |
|
runqueue_t *rq; |
lwp_t *l1, *l2; |
lwp_t *l1, *l2; |
|
|
spc = &curcpu()->ci_schedstate; |
spc = &curcpu()->ci_schedstate; |
|
|
/* For now, just pick the highest priority LWP. */ |
/* For now, just pick the highest priority LWP. */ |
l1 = runqueue_nextlwp(spc->spc_sched_info); |
rq = spc->spc_sched_info; |
if (__predict_false((spc->spc_flags & SPCF_OFFLINE) != 0)) |
l1 = NULL; |
|
if (rq->rq_count != 0) |
|
l1 = runqueue_nextlwp(rq); |
|
|
|
rq = &global_queue; |
|
if (__predict_false((spc->spc_flags & SPCF_OFFLINE) != 0) || |
|
rq->rq_count == 0) |
return l1; |
return l1; |
l2 = runqueue_nextlwp(&global_queue); |
l2 = runqueue_nextlwp(rq); |
|
|
if (l1 == NULL) |
if (l1 == NULL) |
return l2; |
return l2; |
if (l2 == NULL) |
if (l2 == NULL) |
return l1; |
return l1; |
if (lwp_eprio(l2) < lwp_eprio(l1)) |
if (lwp_eprio(l2) > lwp_eprio(l1)) |
return l2; |
return l2; |
else |
else |
return l1; |
return l1; |
} |
} |
|
|
/* Dummy */ |
struct cpu_info * |
|
sched_takecpu(struct lwp *l) |
|
{ |
|
|
|
return l->l_cpu; |
|
} |
|
|
|
void |
|
sched_wakeup(struct lwp *l) |
|
{ |
|
|
|
} |
|
|
void |
void |
sched_lwp_fork(struct lwp *l) |
sched_slept(struct lwp *l) |
{ |
{ |
|
|
} |
} |
|
|
void |
void |
|
sched_lwp_fork(struct lwp *l1, struct lwp *l2) |
|
{ |
|
|
|
l2->l_estcpu = l1->l_estcpu; |
|
} |
|
|
|
void |
sched_lwp_exit(struct lwp *l) |
sched_lwp_exit(struct lwp *l) |
{ |
{ |
|
|
} |
} |
|
|
/* SysCtl */ |
void |
|
sched_lwp_collect(struct lwp *t) |
|
{ |
|
lwp_t *l; |
|
|
|
/* Absorb estcpu value of collected LWP. */ |
|
l = curlwp; |
|
lwp_lock(l); |
|
l->l_estcpu += t->l_estcpu; |
|
lwp_unlock(l); |
|
} |
|
|
|
/* |
|
* sysctl setup. XXX This should be split with kern_synch.c. |
|
*/ |
SYSCTL_SETUP(sysctl_sched_setup, "sysctl kern.sched subtree setup") |
SYSCTL_SETUP(sysctl_sched_setup, "sysctl kern.sched subtree setup") |
{ |
{ |
const struct sysctlnode *node = NULL; |
const struct sysctlnode *node = NULL; |
Line 724 SYSCTL_SETUP(sysctl_sched_setup, "sysctl |
|
Line 758 SYSCTL_SETUP(sysctl_sched_setup, "sysctl |
|
NULL, 0, NULL, 0, |
NULL, 0, NULL, 0, |
CTL_KERN, CTL_CREATE, CTL_EOL); |
CTL_KERN, CTL_CREATE, CTL_EOL); |
|
|
if (node != NULL) { |
KASSERT(node != NULL); |
sysctl_createv(clog, 0, &node, NULL, |
|
CTLFLAG_PERMANENT, |
sysctl_createv(clog, 0, &node, NULL, |
CTLTYPE_STRING, "name", NULL, |
CTLFLAG_PERMANENT, |
NULL, 0, __UNCONST("4.4BSD"), 0, |
CTLTYPE_STRING, "name", NULL, |
CTL_CREATE, CTL_EOL); |
NULL, 0, __UNCONST("4.4BSD"), 0, |
} |
CTL_CREATE, CTL_EOL); |
|
sysctl_createv(clog, 0, &node, NULL, |
|
CTLFLAG_READWRITE, |
|
CTLTYPE_INT, "timesoftints", |
|
SYSCTL_DESCR("Track CPU time for soft interrupts"), |
|
NULL, 0, &softint_timing, 0, |
|
CTL_CREATE, CTL_EOL); |
} |
} |
|
|
#if defined(DDB) |
#if defined(DDB) |