src/sys/kern/kern_clock.c - annotate

Return to kern_clock.c CVS log
Up to [cvs.NetBSD.org] / src / sys / kern
Annotation of src/sys/kern/kern_clock.c, Revision 1.130.12.2

1.130.12.2! jdolecek    1: /*     $NetBSD$        */
1.52      thorpej     2:
                      3: /*-
1.118     ad          4:  * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
1.52      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
1.94      mycroft    10:  * This code is derived from software contributed to The NetBSD Foundation
                     11:  * by Charles M. Hannum.
1.52      thorpej    12:  *
                     13:  * Redistribution and use in source and binary forms, with or without
                     14:  * modification, are permitted provided that the following conditions
                     15:  * are met:
                     16:  * 1. Redistributions of source code must retain the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer.
                     18:  * 2. Redistributions in binary form must reproduce the above copyright
                     19:  *    notice, this list of conditions and the following disclaimer in the
                     20:  *    documentation and/or other materials provided with the distribution.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     23:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     24:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     25:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     26:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     27:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     28:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     29:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     30:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     31:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     32:  * POSSIBILITY OF SUCH DAMAGE.
                     33:  */
1.19      cgd        34:
                     35: /*-
                     36:  * Copyright (c) 1982, 1986, 1991, 1993
                     37:  *     The Regents of the University of California.  All rights reserved.
                     38:  * (c) UNIX System Laboratories, Inc.
                     39:  * All or some portions of this file are derived from material licensed
                     40:  * to the University of California by American Telephone and Telegraph
                     41:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     42:  * the permission of UNIX System Laboratories, Inc.
                     43:  *
                     44:  * Redistribution and use in source and binary forms, with or without
                     45:  * modification, are permitted provided that the following conditions
                     46:  * are met:
                     47:  * 1. Redistributions of source code must retain the above copyright
                     48:  *    notice, this list of conditions and the following disclaimer.
                     49:  * 2. Redistributions in binary form must reproduce the above copyright
                     50:  *    notice, this list of conditions and the following disclaimer in the
                     51:  *    documentation and/or other materials provided with the distribution.
1.87      agc        52:  * 3. Neither the name of the University nor the names of its contributors
1.19      cgd        53:  *    may be used to endorse or promote products derived from this software
                     54:  *    without specific prior written permission.
                     55:  *
                     56:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     57:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     58:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     59:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     60:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     61:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     62:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     63:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     64:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     65:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     66:  * SUCH DAMAGE.
                     67:  *
                     68:  *     @(#)kern_clock.c        8.5 (Berkeley) 1/21/94
                     69:  */
1.78      lukem      70:
                     71: #include <sys/cdefs.h>
1.130.12.2! jdolecek   72: __KERNEL_RCSID(0, "$NetBSD$");
1.44      jonathan   73:
1.130.12.2! jdolecek   74: #ifdef _KERNEL_OPT
1.130.12.1  tls        75: #include "opt_dtrace.h"
1.80      briggs     76: #include "opt_perfctrs.h"
1.130.12.2! jdolecek   77: #endif
1.19      cgd        78:
                     79: #include <sys/param.h>
                     80: #include <sys/systm.h>
                     81: #include <sys/callout.h>
                     82: #include <sys/kernel.h>
                     83: #include <sys/proc.h>
                     84: #include <sys/resourcevar.h>
1.25      christos   85: #include <sys/signalvar.h>
1.26      christos   86: #include <sys/sysctl.h>
1.27      jonathan   87: #include <sys/timex.h>
1.45      ross       88: #include <sys/sched.h>
1.82      thorpej    89: #include <sys/time.h>
1.99      kardel     90: #include <sys/timetc.h>
1.109     ad         91: #include <sys/cpu.h>
1.118     ad         92: #include <sys/atomic.h>
                     93:
1.19      cgd        94: #ifdef GPROF
                     95: #include <sys/gmon.h>
                     96: #endif
                     97:
1.130.12.1  tls        98: #ifdef KDTRACE_HOOKS
                     99: #include <sys/dtrace_bsd.h>
                    100: #include <sys/cpu.h>
                    101:
                    102: cyclic_clock_func_t    cyclic_clock_func[MAXCPUS];
                    103: #endif
                    104:
1.130.12.2! jdolecek  105: static int sysctl_kern_clockrate(SYSCTLFN_PROTO);
        !           106:
1.19      cgd       107: /*
                    108:  * Clock handling routines.
                    109:  *
                    110:  * This code is written to operate with two timers that run independently of
                    111:  * each other.  The main clock, running hz times per second, is used to keep
                    112:  * track of real time.  The second timer handles kernel and user profiling,
                    113:  * and does resource use estimation.  If the second timer is programmable,
                    114:  * it is randomized to avoid aliasing between the two clocks.  For example,
1.90      wiz       115:  * the randomization prevents an adversary from always giving up the CPU
1.19      cgd       116:  * just before its quantum expires.  Otherwise, it would never accumulate
1.90      wiz       117:  * CPU ticks.  The mean frequency of the second timer is stathz.
1.19      cgd       118:  *
                    119:  * If no second timer exists, stathz will be zero; in this case we drive
                    120:  * profiling and statistics off the main clock.  This WILL NOT be accurate;
                    121:  * do not do it unless absolutely necessary.
                    122:  *
                    123:  * The statistics clock may (or may not) be run at a higher rate while
                    124:  * profiling.  This profile clock runs at profhz.  We require that profhz
                    125:  * be an integral multiple of stathz.
                    126:  *
                    127:  * If the statistics clock is running fast, it must be divided by the ratio
                    128:  * profhz/stathz for statistics.  (For profiling, every tick counts.)
                    129:  */
                    130:
                    131: int    stathz;
                    132: int    profhz;
1.80      briggs    133: int    profsrc;
1.75      simonb    134: int    schedhz;
1.19      cgd       135: int    profprocs;
1.100     drochner  136: int    hardclock_ticks;
1.114     ad        137: static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */
1.70      sommerfe  138: static int psdiv;                      /* prof => stat divider */
1.22      cgd       139: int    psratio;                        /* ratio: prof / stat */
1.19      cgd       140:
1.99      kardel    141: static u_int get_intr_timecount(struct timecounter *);
                    142:
                    143: static struct timecounter intr_timecounter = {
                    144:        get_intr_timecount,     /* get_timecount */
                    145:        0,                      /* no poll_pps */
                    146:        ~0u,                    /* counter_mask */
                    147:        0,                      /* frequency */
                    148:        "clockinterrupt",       /* name */
1.102     christos  149:        0,                      /* quality - minimum implementation level for a clock */
                    150:        NULL,                   /* prev */
                    151:        NULL,                   /* next */
1.99      kardel    152: };
                    153:
                    154: static u_int
1.104     yamt      155: get_intr_timecount(struct timecounter *tc)
1.99      kardel    156: {
1.104     yamt      157:
1.100     drochner  158:        return (u_int)hardclock_ticks;
1.99      kardel    159: }
1.73      thorpej   160:
1.66      thorpej   161: /*
1.19      cgd       162:  * Initialize clock frequencies and start both clocks running.
                    163:  */
                    164: void
1.63      thorpej   165: initclocks(void)
1.19      cgd       166: {
1.130.12.2! jdolecek  167:        static struct sysctllog *clog;
1.55      augustss  168:        int i;
1.19      cgd       169:
                    170:        /*
                    171:         * Set divisors to 1 (normal case) and let the machine-specific
                    172:         * code do its bit.
                    173:         */
1.70      sommerfe  174:        psdiv = 1;
1.99      kardel    175:        /*
                    176:         * provide minimum default time counter
                    177:         * will only run at interrupt resolution
                    178:         */
                    179:        intr_timecounter.tc_frequency = hz;
                    180:        tc_init(&intr_timecounter);
1.19      cgd       181:        cpu_initclocks();
                    182:
                    183:        /*
1.108     yamt      184:         * Compute profhz and stathz, fix profhz if needed.
1.19      cgd       185:         */
                    186:        i = stathz ? stathz : hz;
                    187:        if (profhz == 0)
                    188:                profhz = i;
                    189:        psratio = profhz / i;
1.91      yamt      190:        if (schedhz == 0) {
                    191:                /* 16Hz is best */
1.114     ad        192:                hardscheddiv = hz / 16;
                    193:                if (hardscheddiv <= 0)
                    194:                        panic("hardscheddiv");
1.91      yamt      195:        }
1.31      mycroft   196:
1.130.12.2! jdolecek  197:        sysctl_createv(&clog, 0, NULL, NULL,
        !           198:                       CTLFLAG_PERMANENT,
        !           199:                       CTLTYPE_STRUCT, "clockrate",
        !           200:                       SYSCTL_DESCR("Kernel clock rates"),
        !           201:                       sysctl_kern_clockrate, 0, NULL,
        !           202:                       sizeof(struct clockinfo),
        !           203:                       CTL_KERN, KERN_CLOCKRATE, CTL_EOL);
        !           204:        sysctl_createv(&clog, 0, NULL, NULL,
        !           205:                       CTLFLAG_PERMANENT,
        !           206:                       CTLTYPE_INT, "hardclock_ticks",
        !           207:                       SYSCTL_DESCR("Number of hardclock ticks"),
        !           208:                       NULL, 0, &hardclock_ticks, sizeof(hardclock_ticks),
        !           209:                       CTL_KERN, KERN_HARDCLOCK_TICKS, CTL_EOL);
1.19      cgd       210: }
                    211:
                    212: /*
                    213:  * The real-time timer, interrupting hz times per second.
                    214:  */
                    215: void
1.63      thorpej   216: hardclock(struct clockframe *frame)
1.19      cgd       217: {
1.82      thorpej   218:        struct lwp *l;
1.120     ad        219:        struct cpu_info *ci;
1.19      cgd       220:
1.120     ad        221:        ci = curcpu();
1.114     ad        222:        l = ci->ci_data.cpu_onproc;
1.120     ad        223:
                    224:        timer_tick(l, CLKF_USERMODE(frame));
1.19      cgd       225:
                    226:        /*
                    227:         * If no separate statistics clock is available, run it from here.
                    228:         */
                    229:        if (stathz == 0)
                    230:                statclock(frame);
1.114     ad        231:        /*
                    232:         * If no separate schedclock is provided, call it here
                    233:         * at about 16 Hz.
                    234:         */
                    235:        if (schedhz == 0) {
                    236:                if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) {
                    237:                        schedclock(l);
                    238:                        ci->ci_schedstate.spc_schedticks = hardscheddiv;
                    239:                }
                    240:        }
1.108     yamt      241:        if ((--ci->ci_schedstate.spc_ticks) <= 0)
                    242:                sched_tick(ci);
1.93      perry     243:
1.123     ad        244:        if (CPU_IS_PRIMARY(ci)) {
1.121     ad        245:                hardclock_ticks++;
                    246:                tc_ticktock();
                    247:        }
1.19      cgd       248:
                    249:        /*
1.126     pooka     250:         * Update real-time timeout queue.
1.106     ad        251:         */
1.109     ad        252:        callout_hardclock();
1.130.12.1  tls       253:
                    254: #ifdef KDTRACE_HOOKS
                    255:        cyclic_clock_func_t func = cyclic_clock_func[cpu_index(ci)];
                    256:        if (func) {
                    257:                (*func)((struct clockframe *)frame);
                    258:        }
                    259: #endif
1.19      cgd       260: }
                    261:
                    262: /*
                    263:  * Start profiling on a process.
                    264:  *
                    265:  * Kernel profiling passes proc0 which never exits and hence
                    266:  * keeps the profile clock running constantly.
                    267:  */
                    268: void
1.63      thorpej   269: startprofclock(struct proc *p)
1.19      cgd       270: {
                    271:
1.109     ad        272:        KASSERT(mutex_owned(&p->p_stmutex));
1.105     ad        273:
                    274:        if ((p->p_stflag & PST_PROFIL) == 0) {
                    275:                p->p_stflag |= PST_PROFIL;
1.80      briggs    276:                /*
                    277:                 * This is only necessary if using the clock as the
                    278:                 * profiling source.
                    279:                 */
1.70      sommerfe  280:                if (++profprocs == 1 && stathz != 0)
                    281:                        psdiv = psratio;
1.19      cgd       282:        }
                    283: }
                    284:
                    285: /*
                    286:  * Stop profiling on a process.
                    287:  */
                    288: void
1.63      thorpej   289: stopprofclock(struct proc *p)
1.19      cgd       290: {
                    291:
1.109     ad        292:        KASSERT(mutex_owned(&p->p_stmutex));
1.105     ad        293:
                    294:        if (p->p_stflag & PST_PROFIL) {
                    295:                p->p_stflag &= ~PST_PROFIL;
1.80      briggs    296:                /*
                    297:                 * This is only necessary if using the clock as the
                    298:                 * profiling source.
                    299:                 */
1.70      sommerfe  300:                if (--profprocs == 0 && stathz != 0)
                    301:                        psdiv = 1;
1.19      cgd       302:        }
                    303: }
                    304:
1.80      briggs    305: #if defined(PERFCTRS)
                    306: /*
                    307:  * Independent profiling "tick" in case we're using a separate
                    308:  * clock or profiling event source.  Currently, that's just
                    309:  * performance counters--hence the wrapper.
                    310:  */
                    311: void
                    312: proftick(struct clockframe *frame)
                    313: {
                    314: #ifdef GPROF
1.93      perry     315:         struct gmonparam *g;
                    316:         intptr_t i;
1.80      briggs    317: #endif
1.105     ad        318:        struct lwp *l;
1.80      briggs    319:        struct proc *p;
                    320:
1.114     ad        321:        l = curcpu()->ci_data.cpu_onproc;
1.105     ad        322:        p = (l ? l->l_proc : NULL);
1.80      briggs    323:        if (CLKF_USERMODE(frame)) {
1.105     ad        324:                mutex_spin_enter(&p->p_stmutex);
                    325:                if (p->p_stflag & PST_PROFIL)
                    326:                        addupc_intr(l, CLKF_PC(frame));
                    327:                mutex_spin_exit(&p->p_stmutex);
1.80      briggs    328:        } else {
                    329: #ifdef GPROF
                    330:                g = &_gmonparam;
                    331:                if (g->state == GMON_PROF_ON) {
                    332:                        i = CLKF_PC(frame) - g->lowpc;
                    333:                        if (i < g->textsize) {
                    334:                                i /= HISTFRACTION * sizeof(*g->kcount);
                    335:                                g->kcount[i]++;
                    336:                        }
                    337:                }
                    338: #endif
1.111     ad        339: #ifdef LWP_PC
                    340:                if (p != NULL && (p->p_stflag & PST_PROFIL) != 0)
1.112     ad        341:                        addupc_intr(l, LWP_PC(l));
1.93      perry     342: #endif
1.80      briggs    343:        }
                    344: }
                    345: #endif
                    346:
1.108     yamt      347: void
                    348: schedclock(struct lwp *l)
                    349: {
                    350:        if ((l->l_flag & LW_IDLE) != 0)
                    351:                return;
                    352:
                    353:        sched_schedclock(l);
                    354: }
                    355:
1.19      cgd       356: /*
                    357:  * Statistics clock.  Grab profile sample, and if divider reaches 0,
                    358:  * do process and kernel statistics.
                    359:  */
                    360: void
1.63      thorpej   361: statclock(struct clockframe *frame)
1.19      cgd       362: {
                    363: #ifdef GPROF
1.55      augustss  364:        struct gmonparam *g;
1.68      eeh       365:        intptr_t i;
1.19      cgd       366: #endif
1.60      thorpej   367:        struct cpu_info *ci = curcpu();
                    368:        struct schedstate_percpu *spc = &ci->ci_schedstate;
1.55      augustss  369:        struct proc *p;
1.98      christos  370:        struct lwp *l;
1.19      cgd       371:
1.70      sommerfe  372:        /*
                    373:         * Notice changes in divisor frequency, and adjust clock
                    374:         * frequency accordingly.
                    375:         */
                    376:        if (spc->spc_psdiv != psdiv) {
                    377:                spc->spc_psdiv = psdiv;
                    378:                spc->spc_pscnt = psdiv;
                    379:                if (psdiv == 1) {
                    380:                        setstatclockrate(stathz);
                    381:                } else {
1.93      perry     382:                        setstatclockrate(profhz);
1.70      sommerfe  383:                }
                    384:        }
1.114     ad        385:        l = ci->ci_data.cpu_onproc;
1.108     yamt      386:        if ((l->l_flag & LW_IDLE) != 0) {
                    387:                /*
                    388:                 * don't account idle lwps as swapper.
                    389:                 */
                    390:                p = NULL;
                    391:        } else {
                    392:                p = l->l_proc;
1.105     ad        393:                mutex_spin_enter(&p->p_stmutex);
1.108     yamt      394:        }
                    395:
1.19      cgd       396:        if (CLKF_USERMODE(frame)) {
1.130.12.2! jdolecek  397:                KASSERT(p != NULL);
1.105     ad        398:                if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK)
                    399:                        addupc_intr(l, CLKF_PC(frame));
                    400:                if (--spc->spc_pscnt > 0) {
                    401:                        mutex_spin_exit(&p->p_stmutex);
1.19      cgd       402:                        return;
1.105     ad        403:                }
                    404:
1.19      cgd       405:                /*
                    406:                 * Came from user mode; CPU was in user state.
                    407:                 * If this process is being profiled record the tick.
                    408:                 */
                    409:                p->p_uticks++;
                    410:                if (p->p_nice > NZERO)
1.60      thorpej   411:                        spc->spc_cp_time[CP_NICE]++;
1.19      cgd       412:                else
1.60      thorpej   413:                        spc->spc_cp_time[CP_USER]++;
1.19      cgd       414:        } else {
                    415: #ifdef GPROF
                    416:                /*
                    417:                 * Kernel statistics are just like addupc_intr, only easier.
                    418:                 */
                    419:                g = &_gmonparam;
1.80      briggs    420:                if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
1.19      cgd       421:                        i = CLKF_PC(frame) - g->lowpc;
                    422:                        if (i < g->textsize) {
                    423:                                i /= HISTFRACTION * sizeof(*g->kcount);
                    424:                                g->kcount[i]++;
                    425:                        }
                    426:                }
                    427: #endif
1.82      thorpej   428: #ifdef LWP_PC
1.108     yamt      429:                if (p != NULL && profsrc == PROFSRC_CLOCK &&
                    430:                    (p->p_stflag & PST_PROFIL)) {
1.105     ad        431:                        addupc_intr(l, LWP_PC(l));
1.108     yamt      432:                }
1.72      mycroft   433: #endif
1.105     ad        434:                if (--spc->spc_pscnt > 0) {
                    435:                        if (p != NULL)
                    436:                                mutex_spin_exit(&p->p_stmutex);
1.19      cgd       437:                        return;
1.105     ad        438:                }
1.19      cgd       439:                /*
                    440:                 * Came from kernel mode, so we were:
                    441:                 * - handling an interrupt,
                    442:                 * - doing syscall or trap work on behalf of the current
                    443:                 *   user process, or
                    444:                 * - spinning in the idle loop.
                    445:                 * Whichever it is, charge the time as appropriate.
                    446:                 * Note that we charge interrupts to the current process,
                    447:                 * regardless of whether they are ``for'' that process,
                    448:                 * so that we know how much of its real time was spent
                    449:                 * in ``non-process'' (i.e., interrupt) work.
                    450:                 */
1.114     ad        451:                if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) {
1.108     yamt      452:                        if (p != NULL) {
1.19      cgd       453:                                p->p_iticks++;
1.108     yamt      454:                        }
1.60      thorpej   455:                        spc->spc_cp_time[CP_INTR]++;
1.19      cgd       456:                } else if (p != NULL) {
                    457:                        p->p_sticks++;
1.60      thorpej   458:                        spc->spc_cp_time[CP_SYS]++;
1.108     yamt      459:                } else {
1.60      thorpej   460:                        spc->spc_cp_time[CP_IDLE]++;
1.108     yamt      461:                }
1.19      cgd       462:        }
1.70      sommerfe  463:        spc->spc_pscnt = psdiv;
1.19      cgd       464:
1.97      elad      465:        if (p != NULL) {
1.125     rmind     466:                atomic_inc_uint(&l->l_cpticks);
1.105     ad        467:                mutex_spin_exit(&p->p_stmutex);
1.108     yamt      468:        }
1.19      cgd       469: }
1.130.12.2! jdolecek  470:
        !           471: /*
        !           472:  * sysctl helper routine for kern.clockrate. Assembles a struct on
        !           473:  * the fly to be returned to the caller.
        !           474:  */
        !           475: static int
        !           476: sysctl_kern_clockrate(SYSCTLFN_ARGS)
        !           477: {
        !           478:        struct clockinfo clkinfo;
        !           479:        struct sysctlnode node;
        !           480:
        !           481:        clkinfo.tick = tick;
        !           482:        clkinfo.tickadj = tickadj;
        !           483:        clkinfo.hz = hz;
        !           484:        clkinfo.profhz = profhz;
        !           485:        clkinfo.stathz = stathz ? stathz : hz;
        !           486:
        !           487:        node = *rnode;
        !           488:        node.sysctl_data = &clkinfo;
        !           489:        return (sysctl_lookup(SYSCTLFN_CALL(&node)));
        !           490: }
CVSweb <webmaster@jp.NetBSD.org>