src/sys/kern/sched_4bsd.c - diff

Return to sched_4bsd.c CVS log

Up to [cvs.NetBSD.org] / src / sys / kern

Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.

Diff for /src/sys/kern/sched_4bsd.c between version 1.4 and 1.4.2.1

-version 1.4, 2007/08/04 11:03:02
+version 1.4.2.1, 2007/11/06 23:32:08
 Line 96  __KERNEL_RCSID(0, "$NetBSD$");
 Line 96  __KERNEL_RCSID(0, "$NetBSD$");
 Line 96  __KERNEL_RCSID(0, "$NetBSD$");
  #include <sys/kauth.h>
  #include <sys/lockdebug.h>
  #include <sys/kmem.h>
+ #include <sys/intr.h>
  #include <uvm/uvm_extern.h>
  /*
   * Run queues.
   *
-  * We have 32 run queues in descending priority of 0..31.  We maintain
+  * We maintain bitmasks of non-empty queues in order speed up finding
-  * a bitmask of non-empty queues in order speed up finding the first
+  * the first runnable process.  Since there can be (by definition) few
-  * runnable process.  The bitmask is maintained only by machine-dependent
+  * real time LWPs in the the system, we maintain them on a linked list,
-  * code, allowing the most efficient instructions to be used to find the
+  * sorted by priority.
-  * first non-empty queue.
   */
- #define RUNQUE_NQS              32      /* number of runqueues */
+ #define PPB_SHIFT       5
- #define PPQ     (128 / RUNQUE_NQS)      /* priorities per queue */
+ #define PPB_MASK        31
- typedef struct subqueue {
+ #define NUM_Q           (NPRI_KERNEL + NPRI_USER)
-         TAILQ_HEAD(, lwp) sq_queue;
+ #define NUM_PPB         (1 << PPB_SHIFT)
- } subqueue_t;
+ #define NUM_B           (NUM_Q / NUM_PPB)
  typedef struct runqueue {
-         subqueue_t rq_subqueues[RUNQUE_NQS];    /* run queues */
+         TAILQ_HEAD(, lwp) rq_fixedpri;          /* realtime, kthread */
-         uint32_t rq_bitmap;     /* bitmap of non-empty queues */
+         u_int           rq_count;               /* total # jobs */
+         uint32_t        rq_bitmap[NUM_B];       /* bitmap of queues */
+         TAILQ_HEAD(, lwp) rq_queue[NUM_Q];      /* user+kernel */
  } runqueue_t;
  static runqueue_t global_queue;
  static void updatepri(struct lwp *);
  static void resetpriority(struct lwp *);
- static void resetprocpriority(struct proc *);
+ fixpt_t decay_cpu(fixpt_t, fixpt_t);
  extern unsigned int sched_pstats_ticks; /* defined in kern_synch.c */
-Line 133  kmutex_t sched_mutex;
+Line 138  kmutex_t sched_mutex;
 Line 133  kmutex_t sched_mutex;
 Line 138  kmutex_t sched_mutex;
  /* Number of hardclock ticks per sched_tick() */
  int rrticks;
+ const int schedppq = 1;
  /*
   * Force switch among equal priority processes every 100ms.
   * Called from hardclock every hz/10 == rrticks hardclock ticks.
+  *
+  * There's no need to lock anywhere in this routine, as it's
+  * CPU-local and runs at IPL_SCHED (called from clock interrupt).
   */
  /* ARGSUSED */
  void
-Line 145  sched_tick(struct cpu_info *ci)
+Line 155  sched_tick(struct cpu_info *ci)
 Line 145  sched_tick(struct cpu_info *ci)
 Line 155  sched_tick(struct cpu_info *ci)
          spc->spc_ticks = rrticks;
-         spc_lock(ci);
+         if (CURCPU_IDLE_P())
-         if (!CURCPU_IDLE_P()) {
+                 return;
-                 if (spc->spc_flags & SPCF_SEENRR) {
-                         /*
-                          * The process has already been through a roundrobin
-                          * without switching and may be hogging the CPU.
-                          * Indicate that the process should yield.
-                          */
-                         spc->spc_flags |= SPCF_SHOULDYIELD;
-                 } else
-                         spc->spc_flags |= SPCF_SEENRR;
-         }
-         cpu_need_resched(curcpu(), 0);
-         spc_unlock(ci);
- }
- #define NICE_WEIGHT 2                   /* priorities per nice level */
+         if (spc->spc_flags & SPCF_SEENRR) {
+                 /*
+                  * The process has already been through a roundrobin
+                  * without switching and may be hogging the CPU.
+                  * Indicate that the process should yield.
+                  */
+                 spc->spc_flags |= SPCF_SHOULDYIELD;
+         } else
+                 spc->spc_flags |= SPCF_SEENRR;
+         cpu_need_resched(ci, 0);
+ }
+ /*
+  * Why PRIO_MAX - 2? From setpriority(2):
+  *
+  *      prio is a value in the range -20 to 20.  The default priority is
+  *      0; lower priorities cause more favorable scheduling.  A value of
+  *      19 or 20 will schedule a process only when nothing at priority <=
+  *      0 is runnable.
+  *
+  * This gives estcpu influence over 18 priority levels, and leaves nice
+  * with 40 levels.  One way to think about it is that nice has 20 levels
+  * either side of estcpu's 18.
+  */
  #define ESTCPU_SHIFT    11
- #define ESTCPU_MAX      ((NICE_WEIGHT * PRIO_MAX - PPQ) << ESTCPU_SHIFT)
+ #define ESTCPU_MAX      ((PRIO_MAX - 2) << ESTCPU_SHIFT)
+ #define ESTCPU_ACCUM    (1 << (ESTCPU_SHIFT - 1))
  #define ESTCPULIM(e)    min((e), ESTCPU_MAX)
  /*
   * Constants for digital decay and forget:
-  *      90% of (p_estcpu) usage in 5 * loadav time
+  *      90% of (l_estcpu) usage in 5 * loadav time
-  *      95% of (p_pctcpu) usage in 60 seconds (load insensitive)
+  *      95% of (l_pctcpu) usage in 60 seconds (load insensitive)
   *          Note that, as ps(1) mentions, this can let percentages
   *          total over 100% (I've seen 137.9% for 3 processes).
   *
-  * Note that hardclock updates p_estcpu and p_cpticks independently.
+  * Note that hardclock updates l_estcpu and l_cpticks independently.
   *
-  * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
+  * We wish to decay away 90% of l_estcpu in (5 * loadavg) seconds.
   * That is, the system wants to compute a value of decay such
   * that the following for loop:
   *      for (i = 0; i < (5 * loadavg); i++)
-  *              p_estcpu *= decay;
+  *              l_estcpu *= decay;
   * will compute
-  *      p_estcpu *= 0.1;
+  *      l_estcpu *= 0.1;
   * for all values of loadavg:
   *
   * Mathematically this loop can be expressed by saying:
-Line 234  sched_tick(struct cpu_info *ci)
+Line 255  sched_tick(struct cpu_info *ci)
 Line 234  sched_tick(struct cpu_info *ci)
 Line 255  sched_tick(struct cpu_info *ci)
  /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
  #define loadfactor(loadav)      (2 * (loadav))
- static fixpt_t
+ fixpt_t
  decay_cpu(fixpt_t loadfac, fixpt_t estcpu)
  {
-Line 254  decay_cpu(fixpt_t loadfac, fixpt_t estcp
+Line 275  decay_cpu(fixpt_t loadfac, fixpt_t estcp
 Line 254  decay_cpu(fixpt_t loadfac, fixpt_t estcp
 Line 275  decay_cpu(fixpt_t loadfac, fixpt_t estcp
  }
  /*
-  * For all load averages >= 1 and max p_estcpu of (255 << ESTCPU_SHIFT),
+  * For all load averages >= 1 and max l_estcpu of (255 << ESTCPU_SHIFT),
-  * sleeping for at least seven times the loadfactor will decay p_estcpu to
+  * sleeping for at least seven times the loadfactor will decay l_estcpu to
   * less than (1 << ESTCPU_SHIFT).
   *
   * note that our ESTCPU_MAX is actually much smaller than (255 << ESTCPU_SHIFT).
-Line 282  decay_cpu_batch(fixpt_t loadfac, fixpt_t
+Line 303  decay_cpu_batch(fixpt_t loadfac, fixpt_t
 Line 282  decay_cpu_batch(fixpt_t loadfac, fixpt_t
 Line 303  decay_cpu_batch(fixpt_t loadfac, fixpt_t
   * Periodically called from sched_pstats(); used to recalculate priorities.
   */
  void
- sched_pstats_hook(struct proc *p, int minslp)
+ sched_pstats_hook(struct lwp *l)
  {
-         struct lwp *l;
+         fixpt_t loadfac;
-         fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
+         int sleeptm;
          /*
-          * If the process has slept the entire second,
+          * If the LWP has slept an entire second, stop recalculating
-          * stop recalculating its priority until it wakes up.
+          * its priority until it wakes up.
           */
-         if (minslp <= 1) {
+         if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
-                 p->p_estcpu = decay_cpu(loadfac, p->p_estcpu);
+             l->l_stat == LSSUSPENDED) {
+                 l->l_slptime++;
-                 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
+                 sleeptm = 1;
-                         if ((l->l_flag & LW_IDLE) != 0)
+         } else {
-                                 continue;
+                 sleeptm = 0x7fffffff;
-                         lwp_lock(l);
+         }
-                         if (l->l_slptime <= 1 && l->l_priority >= PUSER)
-                                 resetpriority(l);
+         if (l->l_slptime <= sleeptm) {
-                         lwp_unlock(l);
+                 loadfac = 2 * (averunnable.ldavg[0]);
-                 }
+                 l->l_estcpu = decay_cpu(loadfac, l->l_estcpu);
+                 resetpriority(l);
          }
  }
-Line 311  sched_pstats_hook(struct proc *p, int mi
+Line 333  sched_pstats_hook(struct proc *p, int mi
 Line 311  sched_pstats_hook(struct proc *p, int mi
 Line 333  sched_pstats_hook(struct proc *p, int mi
  static void
  updatepri(struct lwp *l)
  {
-         struct proc *p = l->l_proc;
          fixpt_t loadfac;
          KASSERT(lwp_locked(l, NULL));
-Line 320  updatepri(struct lwp *l)
+Line 341  updatepri(struct lwp *l)
 Line 320  updatepri(struct lwp *l)
 Line 341  updatepri(struct lwp *l)
          loadfac = loadfactor(averunnable.ldavg[0]);
          l->l_slptime--; /* the first time was done in sched_pstats */
-         /* XXX NJWLWP */
+         l->l_estcpu = decay_cpu_batch(loadfac, l->l_estcpu, l->l_slptime);
-         /* XXXSMP occasionally unlocked, should be per-LWP */
-         p->p_estcpu = decay_cpu_batch(loadfac, p->p_estcpu, l->l_slptime);
          resetpriority(l);
  }
- /*
-  * On some architectures, it's faster to use a MSB ordering for the priorites
-  * than the traditional LSB ordering.
-  */
- #define RQMASK(n) (0x00000001 << (n))
- /*
-  * The primitives that manipulate the run queues.  whichqs tells which
-  * of the 32 queues qs have processes in them.  sched_enqueue() puts processes
-  * into queues, sched_dequeue removes them from queues.  The running process is
-  * on no queue, other processes are on a queue related to p->p_priority,
-  * divided by 4 actually to shrink the 0-127 range of priorities into the 32
-  * available queues.
-  */
- #ifdef RQDEBUG
- static void
- runqueue_check(const runqueue_t *rq, int whichq, struct lwp *l)
- {
-         const subqueue_t * const sq = &rq->rq_subqueues[whichq];
-         const uint32_t bitmap = rq->rq_bitmap;
-         struct lwp *l2;
-         int found = 0;
-         int die = 0;
-         int empty = 1;
-         TAILQ_FOREACH(l2, &sq->sq_queue, l_runq) {
-                 if (l2->l_stat != LSRUN) {
-                         printf("runqueue_check[%d]: lwp %p state (%d) "
-                             " != LSRUN\n", whichq, l2, l2->l_stat);
-                 }
-                 if (l2 == l)
-                         found = 1;
-                 empty = 0;
-         }
-         if (empty && (bitmap & RQMASK(whichq)) != 0) {
-                 printf("runqueue_check[%d]: bit set for empty run-queue %p\n",
-                     whichq, rq);
-                 die = 1;
-         } else if (!empty && (bitmap & RQMASK(whichq)) == 0) {
-                 printf("runqueue_check[%d]: bit clear for non-empty "
-                     "run-queue %p\n", whichq, rq);
-                 die = 1;
-         }
-         if (l != NULL && (bitmap & RQMASK(whichq)) == 0) {
-                 printf("runqueue_check[%d]: bit clear for active lwp %p\n",
-                     whichq, l);
-                 die = 1;
-         }
-         if (l != NULL && empty) {
-                 printf("runqueue_check[%d]: empty run-queue %p with "
-                     "active lwp %p\n", whichq, rq, l);
-                 die = 1;
-         }
-         if (l != NULL && !found) {
-                 printf("runqueue_check[%d]: lwp %p not in runqueue %p!",
-                     whichq, l, rq);
-                 die = 1;
-         }
-         if (die)
-                 panic("runqueue_check: inconsistency found");
- }
- #else /* RQDEBUG */
- #define runqueue_check(a, b, c) /* nothing */
- #endif /* RQDEBUG */
  static void
  runqueue_init(runqueue_t *rq)
  {
          int i;
-         for (i = 0; i < RUNQUE_NQS; i++)
+         for (i = 0; i < NUM_Q; i++)
-                 TAILQ_INIT(&rq->rq_subqueues[i].sq_queue);
+                 TAILQ_INIT(&rq->rq_queue[i]);
+         for (i = 0; i < NUM_B; i++)
+                 rq->rq_bitmap[i] = 0;
+         TAILQ_INIT(&rq->rq_fixedpri);
+         rq->rq_count = 0;
  }
  static void
  runqueue_enqueue(runqueue_t *rq, struct lwp *l)
  {
-         subqueue_t *sq;
+         pri_t pri;
-         const int whichq = lwp_eprio(l) / PPQ;
+         lwp_t *l2;
          KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
-         runqueue_check(rq, whichq, NULL);
+         pri = lwp_eprio(l);
-         rq->rq_bitmap |= RQMASK(whichq);
+         rq->rq_count++;
-         sq = &rq->rq_subqueues[whichq];
-         TAILQ_INSERT_TAIL(&sq->sq_queue, l, l_runq);
+         if (pri >= PRI_KTHREAD) {
-         runqueue_check(rq, whichq, l);
+                 TAILQ_FOREACH(l2, &rq->rq_fixedpri, l_runq) {
+                         if (lwp_eprio(l2) < pri) {
+                                 TAILQ_INSERT_BEFORE(l2, l, l_runq);
+                                 return;
+                         }
+                 }
+                 TAILQ_INSERT_TAIL(&rq->rq_fixedpri, l, l_runq);
+                 return;
+         }
+         rq->rq_bitmap[pri >> PPB_SHIFT] |=
+             (0x80000000U >> (pri & PPB_MASK));
+         TAILQ_INSERT_TAIL(&rq->rq_queue[pri], l, l_runq);
  }
  static void
  runqueue_dequeue(runqueue_t *rq, struct lwp *l)
  {
-         subqueue_t *sq;
+         pri_t pri;
-         const int whichq = lwp_eprio(l) / PPQ;
          KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
-         runqueue_check(rq, whichq, l);
+         pri = lwp_eprio(l);
-         KASSERT((rq->rq_bitmap & RQMASK(whichq)) != 0);
+         rq->rq_count--;
-         sq = &rq->rq_subqueues[whichq];
-         TAILQ_REMOVE(&sq->sq_queue, l, l_runq);
+         if (pri >= PRI_KTHREAD) {
-         if (TAILQ_EMPTY(&sq->sq_queue))
+                 TAILQ_REMOVE(&rq->rq_fixedpri, l, l_runq);
-                 rq->rq_bitmap &= ~RQMASK(whichq);
+                 return;
-         runqueue_check(rq, whichq, NULL);
+         }
+         TAILQ_REMOVE(&rq->rq_queue[pri], l, l_runq);
+         if (TAILQ_EMPTY(&rq->rq_queue[pri]))
+                 rq->rq_bitmap[pri >> PPB_SHIFT] ^=
+                     (0x80000000U >> (pri & PPB_MASK));
  }
+ #if (NUM_B != 3) || (NUM_Q != 96)
+ #error adjust runqueue_nextlwp
+ #endif
  static struct lwp *
  runqueue_nextlwp(runqueue_t *rq)
  {
-         const uint32_t bitmap = rq->rq_bitmap;
+         pri_t pri;
-         int whichq;
-         if (bitmap == 0) {
+         KASSERT(rq->rq_count != 0);
-                 return NULL;
-         }
+         if (!TAILQ_EMPTY(&rq->rq_fixedpri))
-         whichq = ffs(bitmap) - 1;
+                 return TAILQ_FIRST(&rq->rq_fixedpri);
-         return TAILQ_FIRST(&rq->rq_subqueues[whichq].sq_queue);
+         if (rq->rq_bitmap[2] != 0)
+                 pri = 96 - ffs(rq->rq_bitmap[2]);
+         else if (rq->rq_bitmap[1] != 0)
+                 pri = 64 - ffs(rq->rq_bitmap[1]);
+         else
+                 pri = 32 - ffs(rq->rq_bitmap[0]);
+         return TAILQ_FIRST(&rq->rq_queue[pri]);
  }
  #if defined(DDB)
  static void
  runqueue_print(const runqueue_t *rq, void (*pr)(const char *, ...))
  {
-         const uint32_t bitmap = rq->rq_bitmap;
+         CPU_INFO_ITERATOR cii;
-         struct lwp *l;
+         struct cpu_info *ci;
-         int i, first;
+         lwp_t *l;
+         int i;
-         for (i = 0; i < RUNQUE_NQS; i++) {
+         printf("PID\tLID\tPRI\tIPRI\tEPRI\tLWP\t\t NAME\n");
-                 const subqueue_t *sq;
-                 first = 1;
+         TAILQ_FOREACH(l, &rq->rq_fixedpri, l_runq) {
-                 sq = &rq->rq_subqueues[i];
+                 (*pr)("%d\t%d\%d\t%d\t%d\t%016lx %s\n",
-                 TAILQ_FOREACH(l, &sq->sq_queue, l_runq) {
+                     l->l_proc->p_pid, l->l_lid, (int)l->l_priority,
-                         if (first) {
+                     (int)l->l_inheritedprio, lwp_eprio(l),
-                                 (*pr)("%c%d",
+                     (long)l, l->l_proc->p_comm);
-                                     (bitmap & RQMASK(i)) ? ' ' : '!', i);
+         }
-                                 first = 0;
-                         }
+         for (i = NUM_Q - 1; i >= 0; i--) {
-                         (*pr)("\t%d.%d (%s) pri=%d usrpri=%d\n",
+                 TAILQ_FOREACH(l, &rq->rq_queue[i], l_runq) {
-                             l->l_proc->p_pid,
+                         (*pr)("%d\t%d\t%d\t%d\t%d\t%016lx %s\n",
-                             l->l_lid, l->l_proc->p_comm,
+                             l->l_proc->p_pid, l->l_lid, (int)l->l_priority,
-                             (int)l->l_priority, (int)l->l_usrpri);
+                             (int)l->l_inheritedprio, lwp_eprio(l),
+                             (long)l, l->l_proc->p_comm);
                  }
          }
+         printf("CPUIDX\tRESCHED\tCURPRI\tFLAGS\n");
+         for (CPU_INFO_FOREACH(cii, ci)) {
+                 printf("%d\t%d\t%d\t%04x\n", (int)ci->ci_index,
+                     (int)ci->ci_want_resched,
+                     (int)ci->ci_schedstate.spc_curpriority,
+                     (int)ci->ci_schedstate.spc_flags);
+         }
+         printf("NEXTLWP\n%016lx\n", (long)sched_nextlwp());
  }
  #endif /* defined(DDB) */
- #undef RQMASK
  /*
   * Initialize the (doubly-linked) run queues
-Line 517  bool
+Line 512  bool
 Line 517  bool
 Line 512  bool
  sched_curcpu_runnable_p(void)
  {
          struct schedstate_percpu *spc;
-         runqueue_t *rq;
+         struct cpu_info *ci;
+         int bits;
-         spc = &curcpu()->ci_schedstate;
-         rq = spc->spc_sched_info;
+         ci = curcpu();
+         spc = &ci->ci_schedstate;
+ #ifndef __HAVE_FAST_SOFTINTS
+         bits = ci->ci_data.cpu_softints;
+         bits |= ((runqueue_t *)spc->spc_sched_info)->rq_count;
+ #else
+         bits = ((runqueue_t *)spc->spc_sched_info)->rq_count;
+ #endif
          if (__predict_true((spc->spc_flags & SPCF_OFFLINE) == 0))
-                 return (global_queue.rq_bitmap | rq->rq_bitmap) != 0;
+                 bits |= global_queue.rq_count;
-         return rq->rq_bitmap != 0;
+         return bits != 0;
  }
  void
- sched_nice(struct proc *chgp, int n)
+ sched_nice(struct proc *p, int n)
  {
+         struct lwp *l;
+         KASSERT(mutex_owned(&p->p_smutex));
-         chgp->p_nice = n;
+         p->p_nice = n;
-         (void)resetprocpriority(chgp);
+         LIST_FOREACH(l, &p->p_lwps, l_sibling) {
+                 lwp_lock(l);
+                 resetpriority(l);
+                 lwp_unlock(l);
+         }
  }
  /*
-  * Compute the priority of a process when running in user mode.
+  * Recompute the priority of an LWP.  Arrange to reschedule if
-  * Arrange to reschedule if the resulting priority is better
+  * the resulting priority is better than that of the current LWP.
-  * than that of the current process.
   */
  static void
  resetpriority(struct lwp *l)
  {
-         unsigned int newpriority;
+         pri_t pri;
          struct proc *p = l->l_proc;
-         /* XXXSMP LOCK_ASSERT(mutex_owned(&p->p_stmutex)); */
+         KASSERT(lwp_locked(l, NULL));
-         LOCK_ASSERT(lwp_locked(l, NULL));
-         if ((l->l_flag & LW_SYSTEM) != 0)
+         if (l->l_class != SCHED_OTHER)
                  return;
-         newpriority = PUSER + (p->p_estcpu >> ESTCPU_SHIFT) +
+         /* See comments above ESTCPU_SHIFT definition. */
-             NICE_WEIGHT * (p->p_nice - NZERO);
+         pri = (PRI_KERNEL - 1) - (l->l_estcpu >> ESTCPU_SHIFT) - p->p_nice;
-         newpriority = min(newpriority, MAXPRI);
+         pri = imax(pri, 0);
-         lwp_changepri(l, newpriority);
+         if (pri != l->l_priority)
- }
+                 lwp_changepri(l, pri);
- /*
-  * Recompute priority for all LWPs in a process.
-  */
- static void
- resetprocpriority(struct proc *p)
- {
-         struct lwp *l;
-         KASSERT(mutex_owned(&p->p_stmutex));
-         LIST_FOREACH(l, &p->p_lwps, l_sibling) {
-                 lwp_lock(l);
-                 resetpriority(l);
-                 lwp_unlock(l);
-         }
  }
  /*
   * We adjust the priority of the current process.  The priority of a process
-  * gets worse as it accumulates CPU time.  The CPU usage estimator (p_estcpu)
+  * gets worse as it accumulates CPU time.  The CPU usage estimator (l_estcpu)
   * is increased here.  The formula for computing priorities (in kern_synch.c)
-  * will compute a different value each time p_estcpu increases. This can
+  * will compute a different value each time l_estcpu increases. This can
   * cause a switch, but unless the priority crosses a PPQ boundary the actual
   * queue will not change.  The CPU usage estimator ramps up quite quickly
   * when the process is running (linearly), and decays away exponentially, at
-Line 593  resetprocpriority(struct proc *p)
+Line 583  resetprocpriority(struct proc *p)
 Line 593  resetprocpriority(struct proc *p)
 Line 583  resetprocpriority(struct proc *p)
  void
  sched_schedclock(struct lwp *l)
  {
-         struct proc *p = l->l_proc;
+         if (l->l_class != SCHED_OTHER)
+                 return;
          KASSERT(!CURCPU_IDLE_P());
-         mutex_spin_enter(&p->p_stmutex);
+         l->l_estcpu = ESTCPULIM(l->l_estcpu + ESTCPU_ACCUM);
-         p->p_estcpu = ESTCPULIM(p->p_estcpu + (1 << ESTCPU_SHIFT));
          lwp_lock(l);
          resetpriority(l);
-         mutex_spin_exit(&p->p_stmutex);
-         if ((l->l_flag & LW_SYSTEM) == 0 && l->l_priority >= PUSER)
-                 l->l_priority = l->l_usrpri;
          lwp_unlock(l);
  }
-Line 614  sched_schedclock(struct lwp *l)
+Line 602  sched_schedclock(struct lwp *l)
 Line 614  sched_schedclock(struct lwp *l)
 Line 602  sched_schedclock(struct lwp *l)
  void
  sched_proc_fork(struct proc *parent, struct proc *child)
  {
+         lwp_t *pl;
          KASSERT(mutex_owned(&parent->p_smutex));
-         child->p_estcpu = child->p_estcpu_inherited = parent->p_estcpu;
+         pl = LIST_FIRST(&parent->p_lwps);
+         child->p_estcpu_inherited = pl->l_estcpu;
          child->p_forktime = sched_pstats_ticks;
  }
-Line 631  sched_proc_exit(struct proc *parent, str
+Line 621  sched_proc_exit(struct proc *parent, str
 Line 631  sched_proc_exit(struct proc *parent, str
 Line 621  sched_proc_exit(struct proc *parent, str
  {
          fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
          fixpt_t estcpu;
+         lwp_t *pl, *cl;
          /* XXX Only if parent != init?? */
-         mutex_spin_enter(&parent->p_stmutex);
+         mutex_enter(&parent->p_smutex);
+         pl = LIST_FIRST(&parent->p_lwps);
+         cl = LIST_FIRST(&child->p_lwps);
          estcpu = decay_cpu_batch(loadfac, child->p_estcpu_inherited,
              sched_pstats_ticks - child->p_forktime);
-         if (child->p_estcpu > estcpu)
+         if (cl->l_estcpu > estcpu) {
-                 parent->p_estcpu =
+                 lwp_lock(pl);
-                     ESTCPULIM(parent->p_estcpu + child->p_estcpu - estcpu);
+                 pl->l_estcpu = ESTCPULIM(pl->l_estcpu + cl->l_estcpu - estcpu);
-         mutex_spin_exit(&parent->p_stmutex);
+                 lwp_unlock(pl);
+         }
+         mutex_exit(&parent->p_smutex);
  }
  void
-Line 673  struct lwp *
+Line 668  struct lwp *
 Line 673  struct lwp *
 Line 668  struct lwp *
  sched_nextlwp(void)
  {
          struct schedstate_percpu *spc;
+         runqueue_t *rq;
          lwp_t *l1, *l2;
          spc = &curcpu()->ci_schedstate;
          /* For now, just pick the highest priority LWP. */
-         l1 = runqueue_nextlwp(spc->spc_sched_info);
+         rq = spc->spc_sched_info;
-         if (__predict_false((spc->spc_flags & SPCF_OFFLINE) != 0))
+         l1 = NULL;
+         if (rq->rq_count != 0)
+                 l1 = runqueue_nextlwp(rq);
+         rq = &global_queue;
+         if (__predict_false((spc->spc_flags & SPCF_OFFLINE) != 0) ||
+             rq->rq_count == 0)
                  return l1;
-         l2 = runqueue_nextlwp(&global_queue);
+         l2 = runqueue_nextlwp(rq);
          if (l1 == NULL)
                  return l2;
          if (l2 == NULL)
                  return l1;
-         if (lwp_eprio(l2) < lwp_eprio(l1))
+         if (lwp_eprio(l2) > lwp_eprio(l1))
                  return l2;
          else
                  return l1;
  }
- /* Dummy */
+ struct cpu_info *
+ sched_takecpu(struct lwp *l)
+ {
+         return l->l_cpu;
+ }
+ void
+ sched_wakeup(struct lwp *l)
+ {
+ }
  void
- sched_lwp_fork(struct lwp *l)
+ sched_slept(struct lwp *l)
  {
  }
  void
+ sched_lwp_fork(struct lwp *l1, struct lwp *l2)
+ {
+         l2->l_estcpu = l1->l_estcpu;
+ }
+ void
  sched_lwp_exit(struct lwp *l)
  {
  }
- /* SysCtl */
+ void
+ sched_lwp_collect(struct lwp *t)
+ {
+         lwp_t *l;
+         /* Absorb estcpu value of collected LWP. */
+         l = curlwp;
+         lwp_lock(l);
+         l->l_estcpu += t->l_estcpu;
+         lwp_unlock(l);
+ }
+ /*
+  * sysctl setup.  XXX This should be split with kern_synch.c.
+  */
  SYSCTL_SETUP(sysctl_sched_setup, "sysctl kern.sched subtree setup")
  {
          const struct sysctlnode *node = NULL;
-Line 724  SYSCTL_SETUP(sysctl_sched_setup, "sysctl
+Line 758  SYSCTL_SETUP(sysctl_sched_setup, "sysctl
 Line 724  SYSCTL_SETUP(sysctl_sched_setup, "sysctl
 Line 758  SYSCTL_SETUP(sysctl_sched_setup, "sysctl
                  NULL, 0, NULL, 0,
                  CTL_KERN, CTL_CREATE, CTL_EOL);
-         if (node != NULL) {
+         KASSERT(node != NULL);
-                 sysctl_createv(clog, 0, &node, NULL,
-                         CTLFLAG_PERMANENT,
+         sysctl_createv(clog, 0, &node, NULL,
-                         CTLTYPE_STRING, "name", NULL,
+                 CTLFLAG_PERMANENT,
-                         NULL, 0, __UNCONST("4.4BSD"), 0,
+                 CTLTYPE_STRING, "name", NULL,
-                         CTL_CREATE, CTL_EOL);
+                 NULL, 0, __UNCONST("4.4BSD"), 0,
-         }
+                 CTL_CREATE, CTL_EOL);
+         sysctl_createv(clog, 0, &node, NULL,
+                 CTLFLAG_READWRITE,
+                 CTLTYPE_INT, "timesoftints",
+                 SYSCTL_DESCR("Track CPU time for soft interrupts"),
+                 NULL, 0, &softint_timing, 0,
+                 CTL_CREATE, CTL_EOL);
  }
  #if defined(DDB)

CVSweb <webmaster@jp.NetBSD.org>