Annotation of src/sys/kern/kern_lwp.c, Revision 1.248
1.248 ! riastrad 1: /* $NetBSD: kern_lwp.c,v 1.247 2022/03/10 12:21:25 riastradh Exp $ */
1.2 thorpej 2:
3: /*-
1.220 ad 4: * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020
5: * The NetBSD Foundation, Inc.
1.2 thorpej 6: * All rights reserved.
7: *
8: * This code is derived from software contributed to The NetBSD Foundation
1.52 ad 9: * by Nathan J. Williams, and Andrew Doran.
1.2 thorpej 10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30: * POSSIBILITY OF SUCH DAMAGE.
31: */
1.9 lukem 32:
1.52 ad 33: /*
34: * Overview
35: *
1.66 ad 36: * Lightweight processes (LWPs) are the basic unit or thread of
1.52 ad 37: * execution within the kernel. The core state of an LWP is described
1.66 ad 38: * by "struct lwp", also known as lwp_t.
1.52 ad 39: *
40: * Each LWP is contained within a process (described by "struct proc"),
41: * Every process contains at least one LWP, but may contain more. The
42: * process describes attributes shared among all of its LWPs such as a
43: * private address space, global execution state (stopped, active,
44: * zombie, ...), signal disposition and so on. On a multiprocessor
1.66 ad 45: * machine, multiple LWPs be executing concurrently in the kernel.
1.52 ad 46: *
47: * Execution states
48: *
49: * At any given time, an LWP has overall state that is described by
50: * lwp::l_stat. The states are broken into two sets below. The first
51: * set is guaranteed to represent the absolute, current state of the
52: * LWP:
1.101 rmind 53: *
54: * LSONPROC
55: *
56: * On processor: the LWP is executing on a CPU, either in the
57: * kernel or in user space.
58: *
59: * LSRUN
60: *
61: * Runnable: the LWP is parked on a run queue, and may soon be
62: * chosen to run by an idle processor, or by a processor that
63: * has been asked to preempt a currently runnning but lower
1.134 rmind 64: * priority LWP.
1.101 rmind 65: *
66: * LSIDL
67: *
1.238 ad 68: * Idle: the LWP has been created but has not yet executed, or
69: * it has ceased executing a unit of work and is waiting to be
70: * started again. This state exists so that the LWP can occupy
71: * a slot in the process & PID table, but without having to
72: * worry about being touched; lookups of the LWP by ID will
73: * fail while in this state. The LWP will become visible for
74: * lookup once its state transitions further. Some special
75: * kernel threads also (ab)use this state to indicate that they
76: * are idle (soft interrupts and idle LWPs).
1.101 rmind 77: *
78: * LSSUSPENDED:
79: *
80: * Suspended: the LWP has had its execution suspended by
1.52 ad 81: * another LWP in the same process using the _lwp_suspend()
82: * system call. User-level LWPs also enter the suspended
83: * state when the system is shutting down.
84: *
85: * The second set represent a "statement of intent" on behalf of the
86: * LWP. The LWP may in fact be executing on a processor, may be
1.66 ad 87: * sleeping or idle. It is expected to take the necessary action to
1.101 rmind 88: * stop executing or become "running" again within a short timeframe.
1.227 ad 89: * The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running.
1.101 rmind 90: * Importantly, it indicates that its state is tied to a CPU.
91: *
92: * LSZOMB:
93: *
94: * Dead or dying: the LWP has released most of its resources
1.129 ad 95: * and is about to switch away into oblivion, or has already
1.66 ad 96: * switched away. When it switches away, its few remaining
97: * resources can be collected.
1.101 rmind 98: *
99: * LSSLEEP:
100: *
101: * Sleeping: the LWP has entered itself onto a sleep queue, and
102: * has switched away or will switch away shortly to allow other
1.66 ad 103: * LWPs to run on the CPU.
1.101 rmind 104: *
105: * LSSTOP:
106: *
107: * Stopped: the LWP has been stopped as a result of a job
108: * control signal, or as a result of the ptrace() interface.
109: *
110: * Stopped LWPs may run briefly within the kernel to handle
111: * signals that they receive, but will not return to user space
112: * until their process' state is changed away from stopped.
113: *
114: * Single LWPs within a process can not be set stopped
115: * selectively: all actions that can stop or continue LWPs
116: * occur at the process level.
117: *
1.52 ad 118: * State transitions
119: *
1.66 ad 120: * Note that the LSSTOP state may only be set when returning to
121: * user space in userret(), or when sleeping interruptably. The
122: * LSSUSPENDED state may only be set in userret(). Before setting
123: * those states, we try to ensure that the LWPs will release all
124: * locks that they hold, and at a minimum try to ensure that the
125: * LWP can be set runnable again by a signal.
1.52 ad 126: *
127: * LWPs may transition states in the following ways:
128: *
129: * RUN -------> ONPROC ONPROC -----> RUN
1.129 ad 130: * > SLEEP
131: * > STOPPED
1.52 ad 132: * > SUSPENDED
133: * > ZOMB
1.129 ad 134: * > IDL (special cases)
1.52 ad 135: *
136: * STOPPED ---> RUN SUSPENDED --> RUN
1.129 ad 137: * > SLEEP
1.52 ad 138: *
139: * SLEEP -----> ONPROC IDL --------> RUN
1.101 rmind 140: * > RUN > SUSPENDED
141: * > STOPPED > STOPPED
1.129 ad 142: * > ONPROC (special cases)
1.52 ad 143: *
1.129 ad 144: * Some state transitions are only possible with kernel threads (eg
145: * ONPROC -> IDL) and happen under tightly controlled circumstances
146: * free of unwanted side effects.
1.66 ad 147: *
1.114 rmind 148: * Migration
149: *
150: * Migration of threads from one CPU to another could be performed
151: * internally by the scheduler via sched_takecpu() or sched_catchlwp()
152: * functions. The universal lwp_migrate() function should be used for
153: * any other cases. Subsystems in the kernel must be aware that CPU
154: * of LWP may change, while it is not locked.
155: *
1.52 ad 156: * Locking
157: *
158: * The majority of fields in 'struct lwp' are covered by a single,
1.66 ad 159: * general spin lock pointed to by lwp::l_mutex. The locks covering
1.52 ad 160: * each field are documented in sys/lwp.h.
161: *
1.66 ad 162: * State transitions must be made with the LWP's general lock held,
1.152 rmind 163: * and may cause the LWP's lock pointer to change. Manipulation of
1.66 ad 164: * the general lock is not performed directly, but through calls to
1.152 rmind 165: * lwp_lock(), lwp_unlock() and others. It should be noted that the
166: * adaptive locks are not allowed to be released while the LWP's lock
167: * is being held (unlike for other spin-locks).
1.52 ad 168: *
169: * States and their associated locks:
170: *
1.212 ad 171: * LSIDL, LSONPROC, LSZOMB, LSSUPENDED:
1.52 ad 172: *
1.212 ad 173: * Always covered by spc_lwplock, which protects LWPs not
174: * associated with any other sync object. This is a per-CPU
175: * lock and matches lwp::l_cpu.
1.52 ad 176: *
1.212 ad 177: * LSRUN:
1.52 ad 178: *
1.64 yamt 179: * Always covered by spc_mutex, which protects the run queues.
1.129 ad 180: * This is a per-CPU lock and matches lwp::l_cpu.
1.52 ad 181: *
182: * LSSLEEP:
183: *
1.212 ad 184: * Covered by a lock associated with the sleep queue (sometimes
1.221 ad 185: * a turnstile sleep queue) that the LWP resides on. This can
186: * be spc_lwplock for SOBJ_SLEEPQ_NULL (an "untracked" sleep).
1.52 ad 187: *
1.212 ad 188: * LSSTOP:
1.101 rmind 189: *
1.52 ad 190: * If the LWP was previously sleeping (l_wchan != NULL), then
1.66 ad 191: * l_mutex references the sleep queue lock. If the LWP was
1.52 ad 192: * runnable or on the CPU when halted, or has been removed from
1.66 ad 193: * the sleep queue since halted, then the lock is spc_lwplock.
1.52 ad 194: *
195: * The lock order is as follows:
196: *
1.212 ad 197: * sleepq -> turnstile -> spc_lwplock -> spc_mutex
1.52 ad 198: *
1.243 skrll 199: * Each process has a scheduler state lock (proc::p_lock), and a
1.52 ad 200: * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and
201: * so on. When an LWP is to be entered into or removed from one of the
1.103 ad 202: * following states, p_lock must be held and the process wide counters
1.52 ad 203: * adjusted:
204: *
205: * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED
206: *
1.129 ad 207: * (But not always for kernel threads. There are some special cases
1.212 ad 208: * as mentioned above: soft interrupts, and the idle loops.)
1.129 ad 209: *
1.52 ad 210: * Note that an LWP is considered running or likely to run soon if in
211: * one of the following states. This affects the value of p_nrlwps:
212: *
213: * LSRUN, LSONPROC, LSSLEEP
214: *
1.103 ad 215: * p_lock does not need to be held when transitioning among these
1.129 ad 216: * three states, hence p_lock is rarely taken for state transitions.
1.52 ad 217: */
218:
1.9 lukem 219: #include <sys/cdefs.h>
1.248 ! riastrad 220: __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.247 2022/03/10 12:21:25 riastradh Exp $");
1.8 martin 221:
1.84 yamt 222: #include "opt_ddb.h"
1.52 ad 223: #include "opt_lockdebug.h"
1.139 darran 224: #include "opt_dtrace.h"
1.2 thorpej 225:
1.47 hannken 226: #define _LWP_API_PRIVATE
227:
1.2 thorpej 228: #include <sys/param.h>
229: #include <sys/systm.h>
1.64 yamt 230: #include <sys/cpu.h>
1.2 thorpej 231: #include <sys/pool.h>
232: #include <sys/proc.h>
233: #include <sys/syscallargs.h>
1.57 dsl 234: #include <sys/syscall_stats.h>
1.37 ad 235: #include <sys/kauth.h>
1.52 ad 236: #include <sys/sleepq.h>
237: #include <sys/lockdebug.h>
238: #include <sys/kmem.h>
1.91 rmind 239: #include <sys/pset.h>
1.75 ad 240: #include <sys/intr.h>
1.78 ad 241: #include <sys/lwpctl.h>
1.81 ad 242: #include <sys/atomic.h>
1.131 ad 243: #include <sys/filedesc.h>
1.196 hannken 244: #include <sys/fstrans.h>
1.138 darran 245: #include <sys/dtrace_bsd.h>
1.141 darran 246: #include <sys/sdt.h>
1.203 kamil 247: #include <sys/ptrace.h>
1.157 rmind 248: #include <sys/xcall.h>
1.169 christos 249: #include <sys/uidinfo.h>
250: #include <sys/sysctl.h>
1.201 ozaki-r 251: #include <sys/psref.h>
1.208 maxv 252: #include <sys/msan.h>
1.232 maxv 253: #include <sys/kcov.h>
1.233 thorpej 254: #include <sys/cprng.h>
1.236 thorpej 255: #include <sys/futex.h>
1.138 darran 256:
1.2 thorpej 257: #include <uvm/uvm_extern.h>
1.80 skrll 258: #include <uvm/uvm_object.h>
1.2 thorpej 259:
1.152 rmind 260: static pool_cache_t lwp_cache __read_mostly;
261: struct lwplist alllwp __cacheline_aligned;
1.41 thorpej 262:
1.238 ad 263: static int lwp_ctor(void *, void *, int);
1.157 rmind 264: static void lwp_dtor(void *, void *);
265:
1.141 darran 266: /* DTrace proc provider probes */
1.180 christos 267: SDT_PROVIDER_DEFINE(proc);
268:
269: SDT_PROBE_DEFINE1(proc, kernel, , lwp__create, "struct lwp *");
270: SDT_PROBE_DEFINE1(proc, kernel, , lwp__start, "struct lwp *");
271: SDT_PROBE_DEFINE1(proc, kernel, , lwp__exit, "struct lwp *");
1.141 darran 272:
1.213 ad 273: struct turnstile turnstile0 __cacheline_aligned;
1.147 pooka 274: struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = {
275: #ifdef LWP0_CPU_INFO
276: .l_cpu = LWP0_CPU_INFO,
277: #endif
1.154 matt 278: #ifdef LWP0_MD_INITIALIZER
279: .l_md = LWP0_MD_INITIALIZER,
280: #endif
1.147 pooka 281: .l_proc = &proc0,
1.235 thorpej 282: .l_lid = 0, /* we own proc0's slot in the pid table */
1.147 pooka 283: .l_flag = LW_SYSTEM,
284: .l_stat = LSONPROC,
285: .l_ts = &turnstile0,
286: .l_syncobj = &sched_syncobj,
1.231 ad 287: .l_refcnt = 0,
1.147 pooka 288: .l_priority = PRI_USER + NPRI_USER - 1,
289: .l_inheritedprio = -1,
290: .l_class = SCHED_OTHER,
291: .l_psid = PS_NONE,
292: .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders),
293: .l_name = __UNCONST("swapper"),
294: .l_fd = &filedesc0,
295: };
296:
1.169 christos 297: static int sysctl_kern_maxlwp(SYSCTLFN_PROTO);
298:
299: /*
300: * sysctl helper routine for kern.maxlwp. Ensures that the new
301: * values are not too low or too high.
302: */
303: static int
304: sysctl_kern_maxlwp(SYSCTLFN_ARGS)
305: {
306: int error, nmaxlwp;
307: struct sysctlnode node;
308:
309: nmaxlwp = maxlwp;
310: node = *rnode;
311: node.sysctl_data = &nmaxlwp;
312: error = sysctl_lookup(SYSCTLFN_CALL(&node));
313: if (error || newp == NULL)
314: return error;
315:
316: if (nmaxlwp < 0 || nmaxlwp >= 65536)
317: return EINVAL;
318: if (nmaxlwp > cpu_maxlwp())
319: return EINVAL;
320: maxlwp = nmaxlwp;
321:
322: return 0;
323: }
324:
325: static void
326: sysctl_kern_lwp_setup(void)
327: {
1.242 maxv 328: sysctl_createv(NULL, 0, NULL, NULL,
1.169 christos 329: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
330: CTLTYPE_INT, "maxlwp",
331: SYSCTL_DESCR("Maximum number of simultaneous threads"),
332: sysctl_kern_maxlwp, 0, NULL, 0,
333: CTL_KERN, CTL_CREATE, CTL_EOL);
334: }
335:
1.41 thorpej 336: void
337: lwpinit(void)
338: {
339:
1.152 rmind 340: LIST_INIT(&alllwp);
1.144 pooka 341: lwpinit_specificdata();
1.246 thorpej 342: /*
343: * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu()
344: * calls will exit before memory of LWPs is returned to the pool, where
345: * KVA of LWP structure might be freed and re-used for other purposes.
346: * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu()
347: * callers, therefore a regular passive serialization barrier will
348: * do the job.
349: */
350: lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0,
351: PR_PSERIALIZE, "lwppl", NULL, IPL_NONE, lwp_ctor, lwp_dtor, NULL);
1.169 christos 352:
353: maxlwp = cpu_maxlwp();
354: sysctl_kern_lwp_setup();
1.41 thorpej 355: }
356:
1.147 pooka 357: void
358: lwp0_init(void)
359: {
360: struct lwp *l = &lwp0;
361:
362: KASSERT((void *)uvm_lwp_getuarea(l) != NULL);
363:
364: LIST_INSERT_HEAD(&alllwp, l, l_list);
365:
366: callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE);
367: callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l);
368: cv_init(&l->l_sigcv, "sigwait");
1.171 rmind 369: cv_init(&l->l_waitcv, "vfork");
1.147 pooka 370:
371: kauth_cred_hold(proc0.p_cred);
372: l->l_cred = proc0.p_cred;
373:
1.164 yamt 374: kdtrace_thread_ctor(NULL, l);
1.147 pooka 375: lwp_initspecific(l);
376:
377: SYSCALL_TIME_LWP_INIT(l);
378: }
379:
1.238 ad 380: /*
381: * Initialize the non-zeroed portion of an lwp_t.
382: */
383: static int
384: lwp_ctor(void *arg, void *obj, int flags)
385: {
386: lwp_t *l = obj;
387:
388: l->l_stat = LSIDL;
389: l->l_cpu = curcpu();
390: l->l_mutex = l->l_cpu->ci_schedstate.spc_lwplock;
391: l->l_ts = pool_get(&turnstile_pool, flags);
392:
393: if (l->l_ts == NULL) {
394: return ENOMEM;
395: } else {
396: turnstile_ctor(l->l_ts);
397: return 0;
398: }
399: }
400:
1.157 rmind 401: static void
1.245 thorpej 402: lwp_dtor(void *arg, void *obj)
403: {
404: lwp_t *l = obj;
405:
406: /*
407: * The value of l->l_cpu must still be valid at this point.
408: */
1.157 rmind 409: KASSERT(l->l_cpu != NULL);
1.238 ad 410:
411: /*
412: * We can't return turnstile0 to the pool (it didn't come from it),
413: * so if it comes up just drop it quietly and move on.
414: */
415: if (l->l_ts != &turnstile0)
416: pool_put(&turnstile_pool, l->l_ts);
1.157 rmind 417: }
418:
1.52 ad 419: /*
1.238 ad 420: * Set an LWP suspended.
1.52 ad 421: *
1.103 ad 422: * Must be called with p_lock held, and the LWP locked. Will unlock the
1.52 ad 423: * LWP before return.
424: */
1.2 thorpej 425: int
1.52 ad 426: lwp_suspend(struct lwp *curl, struct lwp *t)
1.2 thorpej 427: {
1.52 ad 428: int error;
1.2 thorpej 429:
1.103 ad 430: KASSERT(mutex_owned(t->l_proc->p_lock));
1.63 ad 431: KASSERT(lwp_locked(t, NULL));
1.33 chs 432:
1.52 ad 433: KASSERT(curl != t || curl->l_stat == LSONPROC);
1.2 thorpej 434:
1.52 ad 435: /*
436: * If the current LWP has been told to exit, we must not suspend anyone
437: * else or deadlock could occur. We won't return to userspace.
1.2 thorpej 438: */
1.109 rmind 439: if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) {
1.52 ad 440: lwp_unlock(t);
441: return (EDEADLK);
1.2 thorpej 442: }
443:
1.204 kamil 444: if ((t->l_flag & LW_DBGSUSPEND) != 0) {
445: lwp_unlock(t);
446: return 0;
447: }
448:
1.52 ad 449: error = 0;
1.2 thorpej 450:
1.52 ad 451: switch (t->l_stat) {
452: case LSRUN:
453: case LSONPROC:
1.56 pavel 454: t->l_flag |= LW_WSUSPEND;
1.52 ad 455: lwp_need_userret(t);
456: lwp_unlock(t);
457: break;
1.2 thorpej 458:
1.52 ad 459: case LSSLEEP:
1.56 pavel 460: t->l_flag |= LW_WSUSPEND;
1.2 thorpej 461:
462: /*
1.52 ad 463: * Kick the LWP and try to get it to the kernel boundary
464: * so that it will release any locks that it holds.
465: * setrunnable() will release the lock.
1.2 thorpej 466: */
1.56 pavel 467: if ((t->l_flag & LW_SINTR) != 0)
1.52 ad 468: setrunnable(t);
469: else
470: lwp_unlock(t);
471: break;
1.2 thorpej 472:
1.52 ad 473: case LSSUSPENDED:
474: lwp_unlock(t);
475: break;
1.17 manu 476:
1.52 ad 477: case LSSTOP:
1.56 pavel 478: t->l_flag |= LW_WSUSPEND;
1.52 ad 479: setrunnable(t);
480: break;
1.2 thorpej 481:
1.52 ad 482: case LSIDL:
483: case LSZOMB:
484: error = EINTR; /* It's what Solaris does..... */
485: lwp_unlock(t);
486: break;
1.2 thorpej 487: }
488:
1.69 rmind 489: return (error);
1.2 thorpej 490: }
491:
1.52 ad 492: /*
493: * Restart a suspended LWP.
494: *
1.103 ad 495: * Must be called with p_lock held, and the LWP locked. Will unlock the
1.52 ad 496: * LWP before return.
497: */
1.2 thorpej 498: void
499: lwp_continue(struct lwp *l)
500: {
501:
1.103 ad 502: KASSERT(mutex_owned(l->l_proc->p_lock));
1.63 ad 503: KASSERT(lwp_locked(l, NULL));
1.52 ad 504:
505: /* If rebooting or not suspended, then just bail out. */
1.56 pavel 506: if ((l->l_flag & LW_WREBOOT) != 0) {
1.52 ad 507: lwp_unlock(l);
1.2 thorpej 508: return;
1.10 fvdl 509: }
1.2 thorpej 510:
1.56 pavel 511: l->l_flag &= ~LW_WSUSPEND;
1.2 thorpej 512:
1.204 kamil 513: if (l->l_stat != LSSUSPENDED || (l->l_flag & LW_DBGSUSPEND) != 0) {
1.52 ad 514: lwp_unlock(l);
515: return;
1.2 thorpej 516: }
517:
1.52 ad 518: /* setrunnable() will release the lock. */
519: setrunnable(l);
1.2 thorpej 520: }
521:
1.52 ad 522: /*
1.142 christos 523: * Restart a stopped LWP.
524: *
525: * Must be called with p_lock held, and the LWP NOT locked. Will unlock the
526: * LWP before return.
527: */
528: void
529: lwp_unstop(struct lwp *l)
530: {
531: struct proc *p = l->l_proc;
1.167 rmind 532:
1.239 ad 533: KASSERT(mutex_owned(&proc_lock));
1.142 christos 534: KASSERT(mutex_owned(p->p_lock));
535:
536: lwp_lock(l);
537:
1.204 kamil 538: KASSERT((l->l_flag & LW_DBGSUSPEND) == 0);
539:
1.142 christos 540: /* If not stopped, then just bail out. */
541: if (l->l_stat != LSSTOP) {
542: lwp_unlock(l);
543: return;
544: }
545:
546: p->p_stat = SACTIVE;
547: p->p_sflag &= ~PS_STOPPING;
548:
549: if (!p->p_waited)
550: p->p_pptr->p_nstopchild--;
551:
552: if (l->l_wchan == NULL) {
553: /* setrunnable() will release the lock. */
554: setrunnable(l);
1.183 christos 555: } else if (p->p_xsig && (l->l_flag & LW_SINTR) != 0) {
1.163 christos 556: /* setrunnable() so we can receive the signal */
557: setrunnable(l);
1.142 christos 558: } else {
559: l->l_stat = LSSLEEP;
560: p->p_nrlwps++;
561: lwp_unlock(l);
562: }
563: }
564:
565: /*
1.52 ad 566: * Wait for an LWP within the current process to exit. If 'lid' is
567: * non-zero, we are waiting for a specific LWP.
568: *
1.103 ad 569: * Must be called with p->p_lock held.
1.52 ad 570: */
1.2 thorpej 571: int
1.173 rmind 572: lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting)
1.2 thorpej 573: {
1.173 rmind 574: const lwpid_t curlid = l->l_lid;
575: proc_t *p = l->l_proc;
1.223 ad 576: lwp_t *l2, *next;
1.173 rmind 577: int error;
1.2 thorpej 578:
1.103 ad 579: KASSERT(mutex_owned(p->p_lock));
1.52 ad 580:
581: p->p_nlwpwait++;
1.63 ad 582: l->l_waitingfor = lid;
1.52 ad 583:
584: for (;;) {
1.173 rmind 585: int nfound;
586:
1.52 ad 587: /*
588: * Avoid a race between exit1() and sigexit(): if the
589: * process is dumping core, then we need to bail out: call
590: * into lwp_userret() where we will be suspended until the
591: * deed is done.
592: */
593: if ((p->p_sflag & PS_WCORE) != 0) {
1.103 ad 594: mutex_exit(p->p_lock);
1.52 ad 595: lwp_userret(l);
1.173 rmind 596: KASSERT(false);
1.52 ad 597: }
598:
599: /*
600: * First off, drain any detached LWP that is waiting to be
601: * reaped.
602: */
603: while ((l2 = p->p_zomblwp) != NULL) {
604: p->p_zomblwp = NULL;
1.63 ad 605: lwp_free(l2, false, false);/* releases proc mutex */
1.103 ad 606: mutex_enter(p->p_lock);
1.52 ad 607: }
608:
609: /*
610: * Now look for an LWP to collect. If the whole process is
611: * exiting, count detached LWPs as eligible to be collected,
612: * but don't drain them here.
613: */
614: nfound = 0;
1.63 ad 615: error = 0;
1.223 ad 616:
617: /*
1.238 ad 618: * If given a specific LID, go via pid_table and make sure
1.223 ad 619: * it's not detached.
620: */
621: if (lid != 0) {
1.235 thorpej 622: l2 = proc_find_lwp(p, lid);
1.223 ad 623: if (l2 == NULL) {
624: error = ESRCH;
625: break;
626: }
627: KASSERT(l2->l_lid == lid);
628: if ((l2->l_prflag & LPR_DETACHED) != 0) {
629: error = EINVAL;
630: break;
631: }
632: } else {
633: l2 = LIST_FIRST(&p->p_lwps);
634: }
635: for (; l2 != NULL; l2 = next) {
636: next = (lid != 0 ? NULL : LIST_NEXT(l2, l_sibling));
637:
1.63 ad 638: /*
639: * If a specific wait and the target is waiting on
640: * us, then avoid deadlock. This also traps LWPs
641: * that try to wait on themselves.
642: *
643: * Note that this does not handle more complicated
644: * cycles, like: t1 -> t2 -> t3 -> t1. The process
645: * can still be killed so it is not a major problem.
646: */
647: if (l2->l_lid == lid && l2->l_waitingfor == curlid) {
648: error = EDEADLK;
649: break;
650: }
651: if (l2 == l)
1.52 ad 652: continue;
653: if ((l2->l_prflag & LPR_DETACHED) != 0) {
1.63 ad 654: nfound += exiting;
655: continue;
656: }
657: if (lid != 0) {
658: /*
659: * Mark this LWP as the first waiter, if there
660: * is no other.
661: */
662: if (l2->l_waiter == 0)
663: l2->l_waiter = curlid;
664: } else if (l2->l_waiter != 0) {
665: /*
666: * It already has a waiter - so don't
667: * collect it. If the waiter doesn't
668: * grab it we'll get another chance
669: * later.
670: */
671: nfound++;
1.52 ad 672: continue;
673: }
674: nfound++;
1.2 thorpej 675:
1.52 ad 676: /* No need to lock the LWP in order to see LSZOMB. */
677: if (l2->l_stat != LSZOMB)
678: continue;
1.2 thorpej 679:
1.63 ad 680: /*
681: * We're no longer waiting. Reset the "first waiter"
682: * pointer on the target, in case it was us.
683: */
684: l->l_waitingfor = 0;
685: l2->l_waiter = 0;
686: p->p_nlwpwait--;
1.2 thorpej 687: if (departed)
688: *departed = l2->l_lid;
1.75 ad 689: sched_lwp_collect(l2);
1.63 ad 690:
691: /* lwp_free() releases the proc lock. */
692: lwp_free(l2, false, false);
1.103 ad 693: mutex_enter(p->p_lock);
1.52 ad 694: return 0;
695: }
1.2 thorpej 696:
1.63 ad 697: if (error != 0)
698: break;
1.52 ad 699: if (nfound == 0) {
700: error = ESRCH;
701: break;
702: }
1.63 ad 703:
704: /*
1.173 rmind 705: * Note: since the lock will be dropped, need to restart on
706: * wakeup to run all LWPs again, e.g. there may be new LWPs.
1.63 ad 707: */
708: if (exiting) {
1.52 ad 709: KASSERT(p->p_nlwps > 1);
1.222 ad 710: error = cv_timedwait(&p->p_lwpcv, p->p_lock, 1);
1.173 rmind 711: break;
1.52 ad 712: }
1.63 ad 713:
714: /*
1.234 ad 715: * Break out if all LWPs are in _lwp_wait(). There are
716: * other ways to hang the process with _lwp_wait(), but the
717: * sleep is interruptable so little point checking for them.
1.63 ad 718: */
1.234 ad 719: if (p->p_nlwpwait == p->p_nlwps) {
1.52 ad 720: error = EDEADLK;
721: break;
1.2 thorpej 722: }
1.63 ad 723:
724: /*
725: * Sit around and wait for something to happen. We'll be
726: * awoken if any of the conditions examined change: if an
727: * LWP exits, is collected, or is detached.
728: */
1.103 ad 729: if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0)
1.52 ad 730: break;
1.2 thorpej 731: }
732:
1.63 ad 733: /*
734: * We didn't find any LWPs to collect, we may have received a
735: * signal, or some other condition has caused us to bail out.
736: *
737: * If waiting on a specific LWP, clear the waiters marker: some
738: * other LWP may want it. Then, kick all the remaining waiters
739: * so that they can re-check for zombies and for deadlock.
740: */
741: if (lid != 0) {
1.235 thorpej 742: l2 = proc_find_lwp(p, lid);
1.223 ad 743: KASSERT(l2 == NULL || l2->l_lid == lid);
744:
745: if (l2 != NULL && l2->l_waiter == curlid)
746: l2->l_waiter = 0;
1.63 ad 747: }
1.52 ad 748: p->p_nlwpwait--;
1.63 ad 749: l->l_waitingfor = 0;
750: cv_broadcast(&p->p_lwpcv);
751:
1.52 ad 752: return error;
1.2 thorpej 753: }
754:
1.223 ad 755: /*
1.52 ad 756: * Create a new LWP within process 'p2', using LWP 'l1' as a template.
757: * The new LWP is created in state LSIDL and must be set running,
758: * suspended, or stopped by the caller.
759: */
1.2 thorpej 760: int
1.134 rmind 761: lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
1.188 christos 762: void *stack, size_t stacksize, void (*func)(void *), void *arg,
763: lwp_t **rnewlwpp, int sclass, const sigset_t *sigmask,
764: const stack_t *sigstk)
1.2 thorpej 765: {
1.215 ad 766: struct lwp *l2;
1.2 thorpej 767:
1.107 ad 768: KASSERT(l1 == curlwp || l1->l_proc == &proc0);
769:
1.52 ad 770: /*
1.215 ad 771: * Enforce limits, excluding the first lwp and kthreads. We must
772: * use the process credentials here when adjusting the limit, as
773: * they are what's tied to the accounting entity. However for
774: * authorizing the action, we'll use the LWP's credentials.
1.169 christos 775: */
1.215 ad 776: mutex_enter(p2->p_lock);
1.169 christos 777: if (p2->p_nlwps != 0 && p2 != &proc0) {
1.215 ad 778: uid_t uid = kauth_cred_getuid(p2->p_cred);
1.169 christos 779: int count = chglwpcnt(uid, 1);
780: if (__predict_false(count >
781: p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) {
782: if (kauth_authorize_process(l1->l_cred,
783: KAUTH_PROCESS_RLIMIT, p2,
784: KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
785: &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR))
786: != 0) {
1.170 christos 787: (void)chglwpcnt(uid, -1);
1.215 ad 788: mutex_exit(p2->p_lock);
1.170 christos 789: return EAGAIN;
1.169 christos 790: }
791: }
792: }
793:
794: /*
1.52 ad 795: * First off, reap any detached LWP waiting to be collected.
796: * We can re-use its LWP structure and turnstile.
797: */
1.215 ad 798: if ((l2 = p2->p_zomblwp) != NULL) {
799: p2->p_zomblwp = NULL;
800: lwp_free(l2, true, false);
801: /* p2 now unlocked by lwp_free() */
1.238 ad 802: KASSERT(l2->l_ts != NULL);
1.75 ad 803: KASSERT(l2->l_inheritedprio == -1);
1.60 yamt 804: KASSERT(SLIST_EMPTY(&l2->l_pi_lenders));
1.238 ad 805: memset(&l2->l_startzero, 0, sizeof(*l2) -
806: offsetof(lwp_t, l_startzero));
1.215 ad 807: } else {
808: mutex_exit(p2->p_lock);
809: l2 = pool_cache_get(lwp_cache, PR_WAITOK);
1.238 ad 810: memset(&l2->l_startzero, 0, sizeof(*l2) -
811: offsetof(lwp_t, l_startzero));
1.215 ad 812: SLIST_INIT(&l2->l_pi_lenders);
1.52 ad 813: }
1.2 thorpej 814:
1.238 ad 815: /*
816: * Because of lockless lookup via pid_table, the LWP can be locked
817: * and inspected briefly even after it's freed, so a few fields are
818: * kept stable.
819: */
820: KASSERT(l2->l_stat == LSIDL);
821: KASSERT(l2->l_cpu != NULL);
822: KASSERT(l2->l_ts != NULL);
823: KASSERT(l2->l_mutex == l2->l_cpu->ci_schedstate.spc_lwplock);
824:
1.2 thorpej 825: l2->l_proc = p2;
1.231 ad 826: l2->l_refcnt = 0;
1.75 ad 827: l2->l_class = sclass;
1.116 ad 828:
829: /*
1.235 thorpej 830: * Allocate a process ID for this LWP. We need to do this now
831: * while we can still unwind if it fails. Beacuse we're marked
1.238 ad 832: * as LSIDL, no lookups by the ID will succeed.
1.235 thorpej 833: *
834: * N.B. this will always succeed for the first LWP in a process,
835: * because proc_alloc_lwpid() will usurp the slot. Also note
836: * that l2->l_proc MUST be valid so that lookups of the proc
837: * will succeed, even if the LWP itself is not visible.
838: */
839: if (__predict_false(proc_alloc_lwpid(p2, l2) == -1)) {
840: pool_cache_put(lwp_cache, l2);
841: return EAGAIN;
842: }
843:
844: /*
1.116 ad 845: * If vfork(), we want the LWP to run fast and on the same CPU
846: * as its parent, so that it can reuse the VM context and cache
847: * footprint on the local CPU.
848: */
849: l2->l_kpriority = ((flags & LWP_VFORK) ? true : false);
1.82 ad 850: l2->l_kpribase = PRI_KERNEL;
1.52 ad 851: l2->l_priority = l1->l_priority;
1.75 ad 852: l2->l_inheritedprio = -1;
1.185 christos 853: l2->l_protectprio = -1;
854: l2->l_auxprio = -1;
1.222 ad 855: l2->l_flag = 0;
1.88 ad 856: l2->l_pflag = LP_MPSAFE;
1.131 ad 857: TAILQ_INIT(&l2->l_ld_locks);
1.197 ozaki-r 858: l2->l_psrefs = 0;
1.208 maxv 859: kmsan_lwp_alloc(l2);
1.131 ad 860:
861: /*
1.156 pooka 862: * For vfork, borrow parent's lwpctl context if it exists.
863: * This also causes us to return via lwp_userret.
864: */
865: if (flags & LWP_VFORK && l1->l_lwpctl) {
866: l2->l_lwpctl = l1->l_lwpctl;
867: l2->l_flag |= LW_LWPCTL;
868: }
869:
870: /*
1.131 ad 871: * If not the first LWP in the process, grab a reference to the
872: * descriptor table.
873: */
1.97 ad 874: l2->l_fd = p2->p_fd;
1.131 ad 875: if (p2->p_nlwps != 0) {
876: KASSERT(l1->l_proc == p2);
1.136 rmind 877: fd_hold(l2);
1.131 ad 878: } else {
879: KASSERT(l1->l_proc != p2);
880: }
1.41 thorpej 881:
1.56 pavel 882: if (p2->p_flag & PK_SYSTEM) {
1.134 rmind 883: /* Mark it as a system LWP. */
1.56 pavel 884: l2->l_flag |= LW_SYSTEM;
1.52 ad 885: }
1.2 thorpej 886:
1.138 darran 887: kdtrace_thread_ctor(NULL, l2);
1.73 rmind 888: lwp_initspecific(l2);
1.75 ad 889: sched_lwp_fork(l1, l2);
1.37 ad 890: lwp_update_creds(l2);
1.70 ad 891: callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE);
892: callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2);
1.52 ad 893: cv_init(&l2->l_sigcv, "sigwait");
1.171 rmind 894: cv_init(&l2->l_waitcv, "vfork");
1.52 ad 895: l2->l_syncobj = &sched_syncobj;
1.201 ozaki-r 896: PSREF_DEBUG_INIT_LWP(l2);
1.2 thorpej 897:
898: if (rnewlwpp != NULL)
899: *rnewlwpp = l2;
900:
1.158 matt 901: /*
902: * PCU state needs to be saved before calling uvm_lwp_fork() so that
903: * the MD cpu_lwp_fork() can copy the saved state to the new LWP.
904: */
905: pcu_save_all(l1);
1.225 dogcow 906: #if PCU_UNIT_COUNT > 0
1.224 riastrad 907: l2->l_pcu_valid = l1->l_pcu_valid;
1.225 dogcow 908: #endif
1.158 matt 909:
1.137 rmind 910: uvm_lwp_setuarea(l2, uaddr);
1.190 skrll 911: uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2);
1.2 thorpej 912:
1.235 thorpej 913: mutex_enter(p2->p_lock);
1.52 ad 914: if ((flags & LWP_DETACHED) != 0) {
915: l2->l_prflag = LPR_DETACHED;
916: p2->p_ndlwps++;
917: } else
918: l2->l_prflag = 0;
919:
1.223 ad 920: if (l1->l_proc == p2) {
921: /*
922: * These flags are set while p_lock is held. Copy with
923: * p_lock held too, so the LWP doesn't sneak into the
924: * process without them being set.
925: */
1.222 ad 926: l2->l_flag |= (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE));
1.223 ad 927: } else {
928: /* fork(): pending core/exit doesn't apply to child. */
1.222 ad 929: l2->l_flag |= (l1->l_flag & LW_WREBOOT);
1.223 ad 930: }
1.222 ad 931:
1.188 christos 932: l2->l_sigstk = *sigstk;
933: l2->l_sigmask = *sigmask;
1.176 christos 934: TAILQ_INIT(&l2->l_sigpend.sp_info);
1.52 ad 935: sigemptyset(&l2->l_sigpend.sp_set);
1.174 dsl 936: LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling);
1.2 thorpej 937: p2->p_nlwps++;
1.149 yamt 938: p2->p_nrlwps++;
1.2 thorpej 939:
1.162 rmind 940: KASSERT(l2->l_affinity == NULL);
941:
1.210 ad 942: /* Inherit the affinity mask. */
943: if (l1->l_affinity) {
944: /*
945: * Note that we hold the state lock while inheriting
946: * the affinity to avoid race with sched_setaffinity().
947: */
948: lwp_lock(l1);
1.162 rmind 949: if (l1->l_affinity) {
1.210 ad 950: kcpuset_use(l1->l_affinity);
951: l2->l_affinity = l1->l_affinity;
1.117 christos 952: }
1.210 ad 953: lwp_unlock(l1);
1.91 rmind 954: }
1.223 ad 955:
956: /* This marks the end of the "must be atomic" section. */
1.128 rmind 957: mutex_exit(p2->p_lock);
958:
1.180 christos 959: SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0);
1.141 darran 960:
1.239 ad 961: mutex_enter(&proc_lock);
1.128 rmind 962: LIST_INSERT_HEAD(&alllwp, l2, l_list);
1.210 ad 963: /* Inherit a processor-set */
964: l2->l_psid = l1->l_psid;
1.239 ad 965: mutex_exit(&proc_lock);
1.91 rmind 966:
1.57 dsl 967: SYSCALL_TIME_LWP_INIT(l2);
968:
1.16 manu 969: if (p2->p_emul->e_lwp_fork)
970: (*p2->p_emul->e_lwp_fork)(l1, l2);
971:
1.2 thorpej 972: return (0);
973: }
974:
975: /*
1.212 ad 976: * Set a new LWP running. If the process is stopping, then the LWP is
977: * created stopped.
978: */
979: void
980: lwp_start(lwp_t *l, int flags)
981: {
982: proc_t *p = l->l_proc;
983:
984: mutex_enter(p->p_lock);
985: lwp_lock(l);
986: KASSERT(l->l_stat == LSIDL);
987: if ((flags & LWP_SUSPENDED) != 0) {
988: /* It'll suspend itself in lwp_userret(). */
989: l->l_flag |= LW_WSUSPEND;
990: }
991: if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) {
992: KASSERT(l->l_wchan == NULL);
993: l->l_stat = LSSTOP;
994: p->p_nrlwps--;
995: lwp_unlock(l);
996: } else {
997: setrunnable(l);
998: /* LWP now unlocked */
999: }
1000: mutex_exit(p->p_lock);
1001: }
1002:
1003: /*
1.64 yamt 1004: * Called by MD code when a new LWP begins execution. Must be called
1005: * with the previous LWP locked (so at splsched), or if there is no
1006: * previous LWP, at splsched.
1007: */
1008: void
1.178 matt 1009: lwp_startup(struct lwp *prev, struct lwp *new_lwp)
1.64 yamt 1010: {
1.227 ad 1011: kmutex_t *lock;
1.218 ad 1012:
1.178 matt 1013: KASSERTMSG(new_lwp == curlwp, "l %p curlwp %p prevlwp %p", new_lwp, curlwp, prev);
1.218 ad 1014: KASSERT(kpreempt_disabled());
1015: KASSERT(prev != NULL);
1.227 ad 1016: KASSERT((prev->l_pflag & LP_RUNNING) != 0);
1.218 ad 1017: KASSERT(curcpu()->ci_mtx_count == -2);
1018:
1.227 ad 1019: /*
1.247 riastrad 1020: * Immediately mark the previous LWP as no longer running and
1021: * unlock (to keep lock wait times short as possible). If a
1022: * zombie, don't touch after clearing LP_RUNNING as it could be
1023: * reaped by another CPU. Use atomic_store_release to ensure
1024: * this -- matches atomic_load_acquire in lwp_free.
1.227 ad 1025: */
1026: lock = prev->l_mutex;
1027: if (__predict_false(prev->l_stat == LSZOMB)) {
1.247 riastrad 1028: atomic_store_release(&prev->l_pflag,
1029: prev->l_pflag & ~LP_RUNNING);
1030: } else {
1031: prev->l_pflag &= ~LP_RUNNING;
1.227 ad 1032: }
1033: mutex_spin_exit(lock);
1.64 yamt 1034:
1.218 ad 1035: /* Correct spin mutex count after mi_switch(). */
1036: curcpu()->ci_mtx_count = 0;
1.141 darran 1037:
1.218 ad 1038: /* Install new VM context. */
1039: if (__predict_true(new_lwp->l_proc->p_vmspace)) {
1040: pmap_activate(new_lwp);
1.64 yamt 1041: }
1.218 ad 1042:
1043: /* We remain at IPL_SCHED from mi_switch() - reset it. */
1.181 skrll 1044: spl0();
1.161 christos 1045:
1.64 yamt 1046: LOCKDEBUG_BARRIER(NULL, 0);
1.218 ad 1047: SDT_PROBE(proc, kernel, , lwp__start, new_lwp, 0, 0, 0, 0);
1048:
1049: /* For kthreads, acquire kernel lock if not MPSAFE. */
1050: if (__predict_false((new_lwp->l_pflag & LP_MPSAFE) == 0)) {
1.178 matt 1051: KERNEL_LOCK(1, new_lwp);
1.65 ad 1052: }
1.64 yamt 1053: }
1054:
1055: /*
1.65 ad 1056: * Exit an LWP.
1.241 ad 1057: *
1058: * *** WARNING *** This can be called with (l != curlwp) in error paths.
1.2 thorpej 1059: */
1060: void
1061: lwp_exit(struct lwp *l)
1062: {
1063: struct proc *p = l->l_proc;
1.52 ad 1064: struct lwp *l2;
1.65 ad 1065: bool current;
1066:
1067: current = (l == curlwp);
1.2 thorpej 1068:
1.114 rmind 1069: KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL));
1.131 ad 1070: KASSERT(p == curproc);
1.2 thorpej 1071:
1.180 christos 1072: SDT_PROBE(proc, kernel, , lwp__exit, l, 0, 0, 0, 0);
1.141 darran 1073:
1.220 ad 1074: /* Verify that we hold no locks; for DIAGNOSTIC check kernel_lock. */
1.218 ad 1075: LOCKDEBUG_BARRIER(NULL, 0);
1.220 ad 1076: KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked");
1.16 manu 1077:
1.2 thorpej 1078: /*
1.52 ad 1079: * If we are the last live LWP in a process, we need to exit the
1080: * entire process. We do so with an exit status of zero, because
1081: * it's a "controlled" exit, and because that's what Solaris does.
1082: *
1083: * We are not quite a zombie yet, but for accounting purposes we
1084: * must increment the count of zombies here.
1.45 thorpej 1085: *
1086: * Note: the last LWP's specificdata will be deleted here.
1.2 thorpej 1087: */
1.103 ad 1088: mutex_enter(p->p_lock);
1.52 ad 1089: if (p->p_nlwps - p->p_nzlwps == 1) {
1.65 ad 1090: KASSERT(current == true);
1.172 matt 1091: KASSERT(p != &proc0);
1.184 christos 1092: exit1(l, 0, 0);
1.19 jdolecek 1093: /* NOTREACHED */
1.2 thorpej 1094: }
1.52 ad 1095: p->p_nzlwps++;
1.233 thorpej 1096:
1097: /*
1098: * Perform any required thread cleanup. Do this early so
1.235 thorpej 1099: * anyone wanting to look us up with lwp_getref_lwpid() will
1100: * fail to find us before we become a zombie.
1.233 thorpej 1101: *
1102: * N.B. this will unlock p->p_lock on our behalf.
1103: */
1104: lwp_thread_cleanup(l);
1.52 ad 1105:
1106: if (p->p_emul->e_lwp_exit)
1107: (*p->p_emul->e_lwp_exit)(l);
1.2 thorpej 1108:
1.131 ad 1109: /* Drop filedesc reference. */
1110: fd_free();
1111:
1.196 hannken 1112: /* Release fstrans private data. */
1113: fstrans_lwp_dtor(l);
1114:
1.45 thorpej 1115: /* Delete the specificdata while it's still safe to sleep. */
1.145 pooka 1116: lwp_finispecific(l);
1.45 thorpej 1117:
1.52 ad 1118: /*
1119: * Release our cached credentials.
1120: */
1.37 ad 1121: kauth_cred_free(l->l_cred);
1.70 ad 1122: callout_destroy(&l->l_timeout_ch);
1.65 ad 1123:
1124: /*
1.198 kamil 1125: * If traced, report LWP exit event to the debugger.
1126: *
1.52 ad 1127: * Remove the LWP from the global list.
1.151 chs 1128: * Free its LID from the PID namespace if needed.
1.52 ad 1129: */
1.239 ad 1130: mutex_enter(&proc_lock);
1.198 kamil 1131:
1.199 kamil 1132: if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_EXIT)) ==
1.198 kamil 1133: (PSL_TRACED|PSL_TRACELWP_EXIT)) {
1134: mutex_enter(p->p_lock);
1.202 kamil 1135: if (ISSET(p->p_sflag, PS_WEXIT)) {
1136: mutex_exit(p->p_lock);
1137: /*
1138: * We are exiting, bail out without informing parent
1139: * about a terminating LWP as it would deadlock.
1140: */
1141: } else {
1.203 kamil 1142: eventswitch(TRAP_LWP, PTRACE_LWP_EXIT, l->l_lid);
1.239 ad 1143: mutex_enter(&proc_lock);
1.202 kamil 1144: }
1.198 kamil 1145: }
1146:
1.52 ad 1147: LIST_REMOVE(l, l_list);
1.239 ad 1148: mutex_exit(&proc_lock);
1.19 jdolecek 1149:
1.52 ad 1150: /*
1151: * Get rid of all references to the LWP that others (e.g. procfs)
1152: * may have, and mark the LWP as a zombie. If the LWP is detached,
1153: * mark it waiting for collection in the proc structure. Note that
1154: * before we can do that, we need to free any other dead, deatched
1155: * LWP waiting to meet its maker.
1.231 ad 1156: *
1157: * All conditions need to be observed upon under the same hold of
1158: * p_lock, because if the lock is dropped any of them can change.
1.52 ad 1159: */
1.103 ad 1160: mutex_enter(p->p_lock);
1.231 ad 1161: for (;;) {
1.233 thorpej 1162: if (lwp_drainrefs(l))
1.231 ad 1163: continue;
1164: if ((l->l_prflag & LPR_DETACHED) != 0) {
1165: if ((l2 = p->p_zomblwp) != NULL) {
1166: p->p_zomblwp = NULL;
1167: lwp_free(l2, false, false);
1168: /* proc now unlocked */
1169: mutex_enter(p->p_lock);
1170: continue;
1171: }
1172: p->p_zomblwp = l;
1.52 ad 1173: }
1.231 ad 1174: break;
1.52 ad 1175: }
1.31 yamt 1176:
1.52 ad 1177: /*
1178: * If we find a pending signal for the process and we have been
1.151 chs 1179: * asked to check for signals, then we lose: arrange to have
1.52 ad 1180: * all other LWPs in the process check for signals.
1181: */
1.56 pavel 1182: if ((l->l_flag & LW_PENDSIG) != 0 &&
1.52 ad 1183: firstsig(&p->p_sigpend.sp_set) != 0) {
1184: LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
1185: lwp_lock(l2);
1.209 ad 1186: signotify(l2);
1.52 ad 1187: lwp_unlock(l2);
1188: }
1.31 yamt 1189: }
1190:
1.158 matt 1191: /*
1192: * Release any PCU resources before becoming a zombie.
1193: */
1194: pcu_discard_all(l);
1195:
1.52 ad 1196: lwp_lock(l);
1197: l->l_stat = LSZOMB;
1.162 rmind 1198: if (l->l_name != NULL) {
1.90 ad 1199: strcpy(l->l_name, "(zombie)");
1.128 rmind 1200: }
1.52 ad 1201: lwp_unlock(l);
1.2 thorpej 1202: p->p_nrlwps--;
1.52 ad 1203: cv_broadcast(&p->p_lwpcv);
1.78 ad 1204: if (l->l_lwpctl != NULL)
1205: l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED;
1.103 ad 1206: mutex_exit(p->p_lock);
1.52 ad 1207:
1208: /*
1209: * We can no longer block. At this point, lwp_free() may already
1210: * be gunning for us. On a multi-CPU system, we may be off p_lwps.
1211: *
1212: * Free MD LWP resources.
1213: */
1214: cpu_lwp_free(l, 0);
1.2 thorpej 1215:
1.65 ad 1216: if (current) {
1.218 ad 1217: /* Switch away into oblivion. */
1218: lwp_lock(l);
1219: spc_lock(l->l_cpu);
1220: mi_switch(l);
1221: panic("lwp_exit");
1.65 ad 1222: }
1.2 thorpej 1223: }
1224:
1.52 ad 1225: /*
1226: * Free a dead LWP's remaining resources.
1227: *
1228: * XXXLWP limits.
1229: */
1230: void
1.63 ad 1231: lwp_free(struct lwp *l, bool recycle, bool last)
1.52 ad 1232: {
1233: struct proc *p = l->l_proc;
1.100 ad 1234: struct rusage *ru;
1.52 ad 1235: ksiginfoq_t kq;
1236:
1.92 yamt 1237: KASSERT(l != curlwp);
1.160 yamt 1238: KASSERT(last || mutex_owned(p->p_lock));
1.92 yamt 1239:
1.177 christos 1240: /*
1241: * We use the process credentials instead of the lwp credentials here
1242: * because the lwp credentials maybe cached (just after a setuid call)
1243: * and we don't want pay for syncing, since the lwp is going away
1244: * anyway
1245: */
1.169 christos 1246: if (p != &proc0 && p->p_nlwps != 1)
1.177 christos 1247: (void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1);
1.218 ad 1248:
1.52 ad 1249: /*
1.238 ad 1250: * In the unlikely event that the LWP is still on the CPU,
1251: * then spin until it has switched away.
1.247 riastrad 1252: *
1253: * atomic_load_acquire matches atomic_store_release in
1254: * lwp_startup and mi_switch.
1.238 ad 1255: */
1.247 riastrad 1256: while (__predict_false((atomic_load_acquire(&l->l_pflag) & LP_RUNNING)
1257: != 0)) {
1.238 ad 1258: SPINLOCK_BACKOFF_HOOK;
1259: }
1260:
1261: /*
1262: * Now that the LWP's known off the CPU, reset its state back to
1263: * LSIDL, which defeats anything that might have gotten a hold on
1264: * the LWP via pid_table before the ID was freed. It's important
1265: * to do this with both the LWP locked and p_lock held.
1266: *
1267: * Also reset the CPU and lock pointer back to curcpu(), since the
1268: * LWP will in all likelyhood be cached with the current CPU in
1269: * lwp_cache when we free it and later allocated from there again
1270: * (avoid incidental lock contention).
1271: */
1272: lwp_lock(l);
1273: l->l_stat = LSIDL;
1274: l->l_cpu = curcpu();
1275: lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_lwplock);
1276:
1277: /*
1.223 ad 1278: * If this was not the last LWP in the process, then adjust counters
1279: * and unlock. This is done differently for the last LWP in exit1().
1.52 ad 1280: */
1281: if (!last) {
1282: /*
1283: * Add the LWP's run time to the process' base value.
1284: * This needs to co-incide with coming off p_lwps.
1285: */
1.86 yamt 1286: bintime_add(&p->p_rtime, &l->l_rtime);
1.64 yamt 1287: p->p_pctcpu += l->l_pctcpu;
1.100 ad 1288: ru = &p->p_stats->p_ru;
1289: ruadd(ru, &l->l_ru);
1290: ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw);
1291: ru->ru_nivcsw += l->l_nivcsw;
1.52 ad 1292: LIST_REMOVE(l, l_sibling);
1293: p->p_nlwps--;
1294: p->p_nzlwps--;
1295: if ((l->l_prflag & LPR_DETACHED) != 0)
1296: p->p_ndlwps--;
1.63 ad 1297:
1298: /*
1299: * Have any LWPs sleeping in lwp_wait() recheck for
1300: * deadlock.
1301: */
1302: cv_broadcast(&p->p_lwpcv);
1.103 ad 1303: mutex_exit(p->p_lock);
1.52 ad 1304:
1.238 ad 1305: /* Free the LWP ID. */
1.239 ad 1306: mutex_enter(&proc_lock);
1.238 ad 1307: proc_free_lwpid(p, l->l_lid);
1.239 ad 1308: mutex_exit(&proc_lock);
1.63 ad 1309: }
1.52 ad 1310:
1311: /*
1312: * Destroy the LWP's remaining signal information.
1313: */
1314: ksiginfo_queue_init(&kq);
1315: sigclear(&l->l_sigpend, NULL, &kq);
1316: ksiginfo_queue_drain(&kq);
1317: cv_destroy(&l->l_sigcv);
1.171 rmind 1318: cv_destroy(&l->l_waitcv);
1.2 thorpej 1319:
1.19 jdolecek 1320: /*
1.162 rmind 1321: * Free lwpctl structure and affinity.
1322: */
1323: if (l->l_lwpctl) {
1324: lwp_ctl_free(l);
1325: }
1326: if (l->l_affinity) {
1327: kcpuset_unuse(l->l_affinity, NULL);
1328: l->l_affinity = NULL;
1329: }
1330:
1331: /*
1.238 ad 1332: * Free remaining data structures and the LWP itself unless the
1333: * caller wants to recycle.
1.19 jdolecek 1334: */
1.90 ad 1335: if (l->l_name != NULL)
1336: kmem_free(l->l_name, MAXCOMLEN);
1.135 rmind 1337:
1.208 maxv 1338: kmsan_lwp_free(l);
1.232 maxv 1339: kcov_lwp_free(l);
1.52 ad 1340: cpu_lwp_free2(l);
1.19 jdolecek 1341: uvm_lwp_exit(l);
1.134 rmind 1342:
1.60 yamt 1343: KASSERT(SLIST_EMPTY(&l->l_pi_lenders));
1.75 ad 1344: KASSERT(l->l_inheritedprio == -1);
1.155 matt 1345: KASSERT(l->l_blcnt == 0);
1.138 darran 1346: kdtrace_thread_dtor(NULL, l);
1.52 ad 1347: if (!recycle)
1.87 ad 1348: pool_cache_put(lwp_cache, l);
1.2 thorpej 1349: }
1350:
1351: /*
1.91 rmind 1352: * Migrate the LWP to the another CPU. Unlocks the LWP.
1353: */
1354: void
1.114 rmind 1355: lwp_migrate(lwp_t *l, struct cpu_info *tci)
1.91 rmind 1356: {
1.114 rmind 1357: struct schedstate_percpu *tspc;
1.121 rmind 1358: int lstat = l->l_stat;
1359:
1.91 rmind 1360: KASSERT(lwp_locked(l, NULL));
1.114 rmind 1361: KASSERT(tci != NULL);
1362:
1.121 rmind 1363: /* If LWP is still on the CPU, it must be handled like LSONPROC */
1.227 ad 1364: if ((l->l_pflag & LP_RUNNING) != 0) {
1.121 rmind 1365: lstat = LSONPROC;
1366: }
1367:
1.114 rmind 1368: /*
1369: * The destination CPU could be changed while previous migration
1370: * was not finished.
1371: */
1.121 rmind 1372: if (l->l_target_cpu != NULL) {
1.114 rmind 1373: l->l_target_cpu = tci;
1374: lwp_unlock(l);
1375: return;
1376: }
1.91 rmind 1377:
1.114 rmind 1378: /* Nothing to do if trying to migrate to the same CPU */
1379: if (l->l_cpu == tci) {
1.91 rmind 1380: lwp_unlock(l);
1381: return;
1382: }
1383:
1.114 rmind 1384: KASSERT(l->l_target_cpu == NULL);
1385: tspc = &tci->ci_schedstate;
1.121 rmind 1386: switch (lstat) {
1.91 rmind 1387: case LSRUN:
1.134 rmind 1388: l->l_target_cpu = tci;
1389: break;
1.91 rmind 1390: case LSSLEEP:
1.114 rmind 1391: l->l_cpu = tci;
1.91 rmind 1392: break;
1.212 ad 1393: case LSIDL:
1.91 rmind 1394: case LSSTOP:
1395: case LSSUSPENDED:
1.114 rmind 1396: l->l_cpu = tci;
1397: if (l->l_wchan == NULL) {
1398: lwp_unlock_to(l, tspc->spc_lwplock);
1399: return;
1.91 rmind 1400: }
1.114 rmind 1401: break;
1.91 rmind 1402: case LSONPROC:
1.114 rmind 1403: l->l_target_cpu = tci;
1404: spc_lock(l->l_cpu);
1.212 ad 1405: sched_resched_cpu(l->l_cpu, PRI_USER_RT, true);
1406: /* spc now unlocked */
1.91 rmind 1407: break;
1408: }
1409: lwp_unlock(l);
1410: }
1411:
1.237 thorpej 1412: #define lwp_find_exclude(l) \
1413: ((l)->l_stat == LSIDL || (l)->l_stat == LSZOMB)
1414:
1.91 rmind 1415: /*
1.94 rmind 1416: * Find the LWP in the process. Arguments may be zero, in such case,
1417: * the calling process and first LWP in the list will be used.
1.103 ad 1418: * On success - returns proc locked.
1.237 thorpej 1419: *
1420: * => pid == 0 -> look in curproc.
1421: * => pid == -1 -> match any proc.
1422: * => otherwise look up the proc.
1423: *
1424: * => lid == 0 -> first LWP in the proc
1425: * => otherwise specific LWP
1.91 rmind 1426: */
1427: struct lwp *
1428: lwp_find2(pid_t pid, lwpid_t lid)
1429: {
1430: proc_t *p;
1431: lwp_t *l;
1432:
1.237 thorpej 1433: /* First LWP of specified proc. */
1434: if (lid == 0) {
1435: switch (pid) {
1436: case -1:
1437: /* No lookup keys. */
1438: return NULL;
1439: case 0:
1440: p = curproc;
1441: mutex_enter(p->p_lock);
1442: break;
1443: default:
1.239 ad 1444: mutex_enter(&proc_lock);
1.237 thorpej 1445: p = proc_find(pid);
1446: if (__predict_false(p == NULL)) {
1.239 ad 1447: mutex_exit(&proc_lock);
1.237 thorpej 1448: return NULL;
1449: }
1450: mutex_enter(p->p_lock);
1.239 ad 1451: mutex_exit(&proc_lock);
1.237 thorpej 1452: break;
1453: }
1454: LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1455: if (__predict_true(!lwp_find_exclude(l)))
1456: break;
1.150 rmind 1457: }
1.237 thorpej 1458: goto out;
1459: }
1460:
1461: l = proc_find_lwp_acquire_proc(lid, &p);
1462: if (l == NULL)
1463: return NULL;
1464: KASSERT(p != NULL);
1465: KASSERT(mutex_owned(p->p_lock));
1466:
1467: if (__predict_false(lwp_find_exclude(l))) {
1468: l = NULL;
1469: goto out;
1.150 rmind 1470: }
1.237 thorpej 1471:
1472: /* Apply proc filter, if applicable. */
1473: switch (pid) {
1474: case -1:
1475: /* Match anything. */
1476: break;
1477: case 0:
1478: if (p != curproc)
1479: l = NULL;
1480: break;
1481: default:
1482: if (p->p_pid != pid)
1483: l = NULL;
1484: break;
1.94 rmind 1485: }
1.237 thorpej 1486:
1487: out:
1488: if (__predict_false(l == NULL)) {
1.103 ad 1489: mutex_exit(p->p_lock);
1490: }
1.91 rmind 1491: return l;
1492: }
1493:
1494: /*
1.168 yamt 1495: * Look up a live LWP within the specified process.
1.52 ad 1496: *
1.223 ad 1497: * Must be called with p->p_lock held (as it looks at the radix tree,
1498: * and also wants to exclude idle and zombie LWPs).
1.52 ad 1499: */
1500: struct lwp *
1.151 chs 1501: lwp_find(struct proc *p, lwpid_t id)
1.52 ad 1502: {
1503: struct lwp *l;
1504:
1.103 ad 1505: KASSERT(mutex_owned(p->p_lock));
1.52 ad 1506:
1.235 thorpej 1507: l = proc_find_lwp(p, id);
1.223 ad 1508: KASSERT(l == NULL || l->l_lid == id);
1.52 ad 1509:
1510: /*
1511: * No need to lock - all of these conditions will
1512: * be visible with the process level mutex held.
1513: */
1.237 thorpej 1514: if (__predict_false(l != NULL && lwp_find_exclude(l)))
1.52 ad 1515: l = NULL;
1516:
1517: return l;
1518: }
1519:
1520: /*
1.37 ad 1521: * Update an LWP's cached credentials to mirror the process' master copy.
1522: *
1523: * This happens early in the syscall path, on user trap, and on LWP
1524: * creation. A long-running LWP can also voluntarily choose to update
1.179 snj 1525: * its credentials by calling this routine. This may be called from
1.37 ad 1526: * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand.
1527: */
1528: void
1529: lwp_update_creds(struct lwp *l)
1530: {
1531: kauth_cred_t oc;
1532: struct proc *p;
1533:
1534: p = l->l_proc;
1535: oc = l->l_cred;
1536:
1.103 ad 1537: mutex_enter(p->p_lock);
1.37 ad 1538: kauth_cred_hold(p->p_cred);
1539: l->l_cred = p->p_cred;
1.98 ad 1540: l->l_prflag &= ~LPR_CRMOD;
1.103 ad 1541: mutex_exit(p->p_lock);
1.88 ad 1542: if (oc != NULL)
1.37 ad 1543: kauth_cred_free(oc);
1.52 ad 1544: }
1545:
1546: /*
1547: * Verify that an LWP is locked, and optionally verify that the lock matches
1548: * one we specify.
1549: */
1550: int
1551: lwp_locked(struct lwp *l, kmutex_t *mtx)
1552: {
1553: kmutex_t *cur = l->l_mutex;
1554:
1555: return mutex_owned(cur) && (mtx == cur || mtx == NULL);
1556: }
1557:
1558: /*
1559: * Lend a new mutex to an LWP. The old mutex must be held.
1560: */
1.211 ad 1561: kmutex_t *
1.178 matt 1562: lwp_setlock(struct lwp *l, kmutex_t *mtx)
1.52 ad 1563: {
1.211 ad 1564: kmutex_t *oldmtx = l->l_mutex;
1.52 ad 1565:
1.211 ad 1566: KASSERT(mutex_owned(oldmtx));
1.52 ad 1567:
1.248 ! riastrad 1568: atomic_store_release(&l->l_mutex, mtx);
1.211 ad 1569: return oldmtx;
1.52 ad 1570: }
1571:
1572: /*
1573: * Lend a new mutex to an LWP, and release the old mutex. The old mutex
1574: * must be held.
1575: */
1576: void
1.178 matt 1577: lwp_unlock_to(struct lwp *l, kmutex_t *mtx)
1.52 ad 1578: {
1579: kmutex_t *old;
1580:
1.152 rmind 1581: KASSERT(lwp_locked(l, NULL));
1.52 ad 1582:
1583: old = l->l_mutex;
1.248 ! riastrad 1584: atomic_store_release(&l->l_mutex, mtx);
1.52 ad 1585: mutex_spin_exit(old);
1586: }
1587:
1.60 yamt 1588: int
1589: lwp_trylock(struct lwp *l)
1590: {
1591: kmutex_t *old;
1592:
1593: for (;;) {
1.248 ! riastrad 1594: if (!mutex_tryenter(old = atomic_load_consume(&l->l_mutex)))
1.60 yamt 1595: return 0;
1.248 ! riastrad 1596: if (__predict_true(atomic_load_relaxed(&l->l_mutex) == old))
1.60 yamt 1597: return 1;
1598: mutex_spin_exit(old);
1599: }
1600: }
1601:
1.134 rmind 1602: void
1.211 ad 1603: lwp_unsleep(lwp_t *l, bool unlock)
1.96 ad 1604: {
1605:
1606: KASSERT(mutex_owned(l->l_mutex));
1.211 ad 1607: (*l->l_syncobj->sobj_unsleep)(l, unlock);
1.96 ad 1608: }
1609:
1.52 ad 1610: /*
1.56 pavel 1611: * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is
1.52 ad 1612: * set.
1613: */
1614: void
1615: lwp_userret(struct lwp *l)
1616: {
1617: struct proc *p;
1618: int sig;
1619:
1.114 rmind 1620: KASSERT(l == curlwp);
1621: KASSERT(l->l_stat == LSONPROC);
1.52 ad 1622: p = l->l_proc;
1623:
1624: /*
1.167 rmind 1625: * It is safe to do this read unlocked on a MP system..
1.52 ad 1626: */
1.167 rmind 1627: while ((l->l_flag & LW_USERRET) != 0) {
1.52 ad 1628: /*
1629: * Process pending signals first, unless the process
1.61 ad 1630: * is dumping core or exiting, where we will instead
1.101 rmind 1631: * enter the LW_WSUSPEND case below.
1.52 ad 1632: */
1.61 ad 1633: if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) ==
1634: LW_PENDSIG) {
1.103 ad 1635: mutex_enter(p->p_lock);
1.52 ad 1636: while ((sig = issignal(l)) != 0)
1637: postsig(sig);
1.103 ad 1638: mutex_exit(p->p_lock);
1.52 ad 1639: }
1640:
1641: /*
1642: * Core-dump or suspend pending.
1643: *
1.159 matt 1644: * In case of core dump, suspend ourselves, so that the kernel
1645: * stack and therefore the userland registers saved in the
1646: * trapframe are around for coredump() to write them out.
1647: * We also need to save any PCU resources that we have so that
1648: * they accessible for coredump(). We issue a wakeup on
1649: * p->p_lwpcv so that sigexit() will write the core file out
1650: * once all other LWPs are suspended.
1.52 ad 1651: */
1.56 pavel 1652: if ((l->l_flag & LW_WSUSPEND) != 0) {
1.159 matt 1653: pcu_save_all(l);
1.103 ad 1654: mutex_enter(p->p_lock);
1.52 ad 1655: p->p_nrlwps--;
1656: cv_broadcast(&p->p_lwpcv);
1657: lwp_lock(l);
1658: l->l_stat = LSSUSPENDED;
1.104 ad 1659: lwp_unlock(l);
1.103 ad 1660: mutex_exit(p->p_lock);
1.104 ad 1661: lwp_lock(l);
1.217 ad 1662: spc_lock(l->l_cpu);
1.64 yamt 1663: mi_switch(l);
1.52 ad 1664: }
1665:
1666: /* Process is exiting. */
1.56 pavel 1667: if ((l->l_flag & LW_WEXIT) != 0) {
1.52 ad 1668: lwp_exit(l);
1669: KASSERT(0);
1670: /* NOTREACHED */
1671: }
1.156 pooka 1672:
1673: /* update lwpctl processor (for vfork child_return) */
1674: if (l->l_flag & LW_LWPCTL) {
1675: lwp_lock(l);
1676: KASSERT(kpreempt_disabled());
1677: l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu);
1678: l->l_lwpctl->lc_pctr++;
1679: l->l_flag &= ~LW_LWPCTL;
1680: lwp_unlock(l);
1681: }
1.52 ad 1682: }
1683: }
1684:
1685: /*
1686: * Force an LWP to enter the kernel, to take a trip through lwp_userret().
1687: */
1688: void
1689: lwp_need_userret(struct lwp *l)
1690: {
1.209 ad 1691:
1692: KASSERT(!cpu_intr_p());
1.63 ad 1693: KASSERT(lwp_locked(l, NULL));
1.52 ad 1694:
1695: /*
1.209 ad 1696: * If the LWP is in any state other than LSONPROC, we know that it
1697: * is executing in-kernel and will hit userret() on the way out.
1698: *
1699: * If the LWP is curlwp, then we know we'll be back out to userspace
1700: * soon (can't be called from a hardware interrupt here).
1701: *
1702: * Otherwise, we can't be sure what the LWP is doing, so first make
1703: * sure the update to l_flag will be globally visible, and then
1704: * force the LWP to take a trip through trap() where it will do
1705: * userret().
1706: */
1707: if (l->l_stat == LSONPROC && l != curlwp) {
1708: membar_producer();
1709: cpu_signotify(l);
1710: }
1.52 ad 1711: }
1712:
1713: /*
1714: * Add one reference to an LWP. This will prevent the LWP from
1715: * exiting, thus keep the lwp structure and PCB around to inspect.
1716: */
1717: void
1718: lwp_addref(struct lwp *l)
1719: {
1.103 ad 1720: KASSERT(mutex_owned(l->l_proc->p_lock));
1.237 thorpej 1721: KASSERT(l->l_stat != LSZOMB);
1722: l->l_refcnt++;
1.52 ad 1723: }
1724:
1725: /*
1726: * Remove one reference to an LWP. If this is the last reference,
1727: * then we must finalize the LWP's death.
1728: */
1729: void
1730: lwp_delref(struct lwp *l)
1731: {
1732: struct proc *p = l->l_proc;
1733:
1.103 ad 1734: mutex_enter(p->p_lock);
1.142 christos 1735: lwp_delref2(l);
1736: mutex_exit(p->p_lock);
1737: }
1738:
1739: /*
1740: * Remove one reference to an LWP. If this is the last reference,
1741: * then we must finalize the LWP's death. The proc mutex is held
1742: * on entry.
1743: */
1744: void
1745: lwp_delref2(struct lwp *l)
1746: {
1747: struct proc *p = l->l_proc;
1748:
1749: KASSERT(mutex_owned(p->p_lock));
1.72 ad 1750: KASSERT(l->l_stat != LSZOMB);
1.237 thorpej 1751: KASSERT(l->l_refcnt > 0);
1.231 ad 1752:
1.237 thorpej 1753: if (--l->l_refcnt == 0)
1.76 ad 1754: cv_broadcast(&p->p_lwpcv);
1.52 ad 1755: }
1756:
1757: /*
1.233 thorpej 1758: * Drain all references to the current LWP. Returns true if
1759: * we blocked.
1.52 ad 1760: */
1.233 thorpej 1761: bool
1.52 ad 1762: lwp_drainrefs(struct lwp *l)
1763: {
1764: struct proc *p = l->l_proc;
1.233 thorpej 1765: bool rv = false;
1.52 ad 1766:
1.103 ad 1767: KASSERT(mutex_owned(p->p_lock));
1.52 ad 1768:
1.233 thorpej 1769: l->l_prflag |= LPR_DRAINING;
1770:
1.237 thorpej 1771: while (l->l_refcnt > 0) {
1.233 thorpej 1772: rv = true;
1.103 ad 1773: cv_wait(&p->p_lwpcv, p->p_lock);
1.233 thorpej 1774: }
1775: return rv;
1.37 ad 1776: }
1.41 thorpej 1777:
1778: /*
1.127 ad 1779: * Return true if the specified LWP is 'alive'. Only p->p_lock need
1780: * be held.
1781: */
1782: bool
1783: lwp_alive(lwp_t *l)
1784: {
1785:
1786: KASSERT(mutex_owned(l->l_proc->p_lock));
1787:
1788: switch (l->l_stat) {
1789: case LSSLEEP:
1790: case LSRUN:
1791: case LSONPROC:
1792: case LSSTOP:
1793: case LSSUSPENDED:
1794: return true;
1795: default:
1796: return false;
1797: }
1798: }
1799:
1800: /*
1801: * Return first live LWP in the process.
1802: */
1803: lwp_t *
1804: lwp_find_first(proc_t *p)
1805: {
1806: lwp_t *l;
1807:
1808: KASSERT(mutex_owned(p->p_lock));
1809:
1810: LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1811: if (lwp_alive(l)) {
1812: return l;
1813: }
1814: }
1815:
1816: return NULL;
1817: }
1818:
1819: /*
1.78 ad 1820: * Allocate a new lwpctl structure for a user LWP.
1821: */
1822: int
1823: lwp_ctl_alloc(vaddr_t *uaddr)
1824: {
1825: lcproc_t *lp;
1826: u_int bit, i, offset;
1827: struct uvm_object *uao;
1828: int error;
1829: lcpage_t *lcp;
1830: proc_t *p;
1831: lwp_t *l;
1832:
1833: l = curlwp;
1834: p = l->l_proc;
1835:
1.156 pooka 1836: /* don't allow a vforked process to create lwp ctls */
1837: if (p->p_lflag & PL_PPWAIT)
1838: return EBUSY;
1839:
1.81 ad 1840: if (l->l_lcpage != NULL) {
1841: lcp = l->l_lcpage;
1842: *uaddr = lcp->lcp_uaddr + (vaddr_t)l->l_lwpctl - lcp->lcp_kaddr;
1.143 njoly 1843: return 0;
1.81 ad 1844: }
1.78 ad 1845:
1846: /* First time around, allocate header structure for the process. */
1847: if ((lp = p->p_lwpctl) == NULL) {
1848: lp = kmem_alloc(sizeof(*lp), KM_SLEEP);
1849: mutex_init(&lp->lp_lock, MUTEX_DEFAULT, IPL_NONE);
1850: lp->lp_uao = NULL;
1851: TAILQ_INIT(&lp->lp_pages);
1.103 ad 1852: mutex_enter(p->p_lock);
1.78 ad 1853: if (p->p_lwpctl == NULL) {
1854: p->p_lwpctl = lp;
1.103 ad 1855: mutex_exit(p->p_lock);
1.78 ad 1856: } else {
1.103 ad 1857: mutex_exit(p->p_lock);
1.78 ad 1858: mutex_destroy(&lp->lp_lock);
1859: kmem_free(lp, sizeof(*lp));
1860: lp = p->p_lwpctl;
1861: }
1862: }
1863:
1864: /*
1865: * Set up an anonymous memory region to hold the shared pages.
1866: * Map them into the process' address space. The user vmspace
1867: * gets the first reference on the UAO.
1868: */
1869: mutex_enter(&lp->lp_lock);
1870: if (lp->lp_uao == NULL) {
1871: lp->lp_uao = uao_create(LWPCTL_UAREA_SZ, 0);
1872: lp->lp_cur = 0;
1873: lp->lp_max = LWPCTL_UAREA_SZ;
1874: lp->lp_uva = p->p_emul->e_vm_default_addr(p,
1.182 martin 1875: (vaddr_t)p->p_vmspace->vm_daddr, LWPCTL_UAREA_SZ,
1876: p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1.78 ad 1877: error = uvm_map(&p->p_vmspace->vm_map, &lp->lp_uva,
1878: LWPCTL_UAREA_SZ, lp->lp_uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW,
1879: UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, 0));
1880: if (error != 0) {
1881: uao_detach(lp->lp_uao);
1882: lp->lp_uao = NULL;
1883: mutex_exit(&lp->lp_lock);
1884: return error;
1885: }
1886: }
1887:
1888: /* Get a free block and allocate for this LWP. */
1889: TAILQ_FOREACH(lcp, &lp->lp_pages, lcp_chain) {
1890: if (lcp->lcp_nfree != 0)
1891: break;
1892: }
1893: if (lcp == NULL) {
1894: /* Nothing available - try to set up a free page. */
1895: if (lp->lp_cur == lp->lp_max) {
1896: mutex_exit(&lp->lp_lock);
1897: return ENOMEM;
1898: }
1899: lcp = kmem_alloc(LWPCTL_LCPAGE_SZ, KM_SLEEP);
1.189 chs 1900:
1.78 ad 1901: /*
1902: * Wire the next page down in kernel space. Since this
1903: * is a new mapping, we must add a reference.
1904: */
1905: uao = lp->lp_uao;
1906: (*uao->pgops->pgo_reference)(uao);
1.99 ad 1907: lcp->lcp_kaddr = vm_map_min(kernel_map);
1.78 ad 1908: error = uvm_map(kernel_map, &lcp->lcp_kaddr, PAGE_SIZE,
1909: uao, lp->lp_cur, PAGE_SIZE,
1910: UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
1911: UVM_INH_NONE, UVM_ADV_RANDOM, 0));
1912: if (error != 0) {
1913: mutex_exit(&lp->lp_lock);
1914: kmem_free(lcp, LWPCTL_LCPAGE_SZ);
1915: (*uao->pgops->pgo_detach)(uao);
1916: return error;
1917: }
1.89 yamt 1918: error = uvm_map_pageable(kernel_map, lcp->lcp_kaddr,
1919: lcp->lcp_kaddr + PAGE_SIZE, FALSE, 0);
1920: if (error != 0) {
1921: mutex_exit(&lp->lp_lock);
1922: uvm_unmap(kernel_map, lcp->lcp_kaddr,
1923: lcp->lcp_kaddr + PAGE_SIZE);
1924: kmem_free(lcp, LWPCTL_LCPAGE_SZ);
1925: return error;
1926: }
1.78 ad 1927: /* Prepare the page descriptor and link into the list. */
1928: lcp->lcp_uaddr = lp->lp_uva + lp->lp_cur;
1929: lp->lp_cur += PAGE_SIZE;
1930: lcp->lcp_nfree = LWPCTL_PER_PAGE;
1931: lcp->lcp_rotor = 0;
1932: memset(lcp->lcp_bitmap, 0xff, LWPCTL_BITMAP_SZ);
1933: TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain);
1934: }
1935: for (i = lcp->lcp_rotor; lcp->lcp_bitmap[i] == 0;) {
1936: if (++i >= LWPCTL_BITMAP_ENTRIES)
1937: i = 0;
1938: }
1939: bit = ffs(lcp->lcp_bitmap[i]) - 1;
1.193 kamil 1940: lcp->lcp_bitmap[i] ^= (1U << bit);
1.78 ad 1941: lcp->lcp_rotor = i;
1942: lcp->lcp_nfree--;
1943: l->l_lcpage = lcp;
1944: offset = (i << 5) + bit;
1945: l->l_lwpctl = (lwpctl_t *)lcp->lcp_kaddr + offset;
1946: *uaddr = lcp->lcp_uaddr + offset * sizeof(lwpctl_t);
1947: mutex_exit(&lp->lp_lock);
1948:
1.107 ad 1949: KPREEMPT_DISABLE(l);
1.195 skrll 1950: l->l_lwpctl->lc_curcpu = (int)cpu_index(curcpu());
1.107 ad 1951: KPREEMPT_ENABLE(l);
1.78 ad 1952:
1953: return 0;
1954: }
1955:
1956: /*
1957: * Free an lwpctl structure back to the per-process list.
1958: */
1959: void
1960: lwp_ctl_free(lwp_t *l)
1961: {
1.156 pooka 1962: struct proc *p = l->l_proc;
1.78 ad 1963: lcproc_t *lp;
1964: lcpage_t *lcp;
1965: u_int map, offset;
1966:
1.156 pooka 1967: /* don't free a lwp context we borrowed for vfork */
1968: if (p->p_lflag & PL_PPWAIT) {
1969: l->l_lwpctl = NULL;
1970: return;
1971: }
1972:
1973: lp = p->p_lwpctl;
1.78 ad 1974: KASSERT(lp != NULL);
1975:
1976: lcp = l->l_lcpage;
1977: offset = (u_int)((lwpctl_t *)l->l_lwpctl - (lwpctl_t *)lcp->lcp_kaddr);
1978: KASSERT(offset < LWPCTL_PER_PAGE);
1979:
1980: mutex_enter(&lp->lp_lock);
1981: lcp->lcp_nfree++;
1982: map = offset >> 5;
1.194 kamil 1983: lcp->lcp_bitmap[map] |= (1U << (offset & 31));
1.78 ad 1984: if (lcp->lcp_bitmap[lcp->lcp_rotor] == 0)
1985: lcp->lcp_rotor = map;
1986: if (TAILQ_FIRST(&lp->lp_pages)->lcp_nfree == 0) {
1987: TAILQ_REMOVE(&lp->lp_pages, lcp, lcp_chain);
1988: TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain);
1989: }
1990: mutex_exit(&lp->lp_lock);
1991: }
1992:
1993: /*
1994: * Process is exiting; tear down lwpctl state. This can only be safely
1995: * called by the last LWP in the process.
1996: */
1997: void
1998: lwp_ctl_exit(void)
1999: {
2000: lcpage_t *lcp, *next;
2001: lcproc_t *lp;
2002: proc_t *p;
2003: lwp_t *l;
2004:
2005: l = curlwp;
2006: l->l_lwpctl = NULL;
1.95 ad 2007: l->l_lcpage = NULL;
1.78 ad 2008: p = l->l_proc;
2009: lp = p->p_lwpctl;
2010:
2011: KASSERT(lp != NULL);
2012: KASSERT(p->p_nlwps == 1);
2013:
2014: for (lcp = TAILQ_FIRST(&lp->lp_pages); lcp != NULL; lcp = next) {
2015: next = TAILQ_NEXT(lcp, lcp_chain);
2016: uvm_unmap(kernel_map, lcp->lcp_kaddr,
2017: lcp->lcp_kaddr + PAGE_SIZE);
2018: kmem_free(lcp, LWPCTL_LCPAGE_SZ);
2019: }
2020:
2021: if (lp->lp_uao != NULL) {
2022: uvm_unmap(&p->p_vmspace->vm_map, lp->lp_uva,
2023: lp->lp_uva + LWPCTL_UAREA_SZ);
2024: }
2025:
2026: mutex_destroy(&lp->lp_lock);
2027: kmem_free(lp, sizeof(*lp));
2028: p->p_lwpctl = NULL;
2029: }
1.84 yamt 2030:
1.130 ad 2031: /*
2032: * Return the current LWP's "preemption counter". Used to detect
2033: * preemption across operations that can tolerate preemption without
2034: * crashing, but which may generate incorrect results if preempted.
2035: */
2036: uint64_t
2037: lwp_pctr(void)
2038: {
2039:
2040: return curlwp->l_ncsw;
2041: }
2042:
1.151 chs 2043: /*
2044: * Set an LWP's private data pointer.
2045: */
2046: int
2047: lwp_setprivate(struct lwp *l, void *ptr)
2048: {
2049: int error = 0;
2050:
2051: l->l_private = ptr;
2052: #ifdef __HAVE_CPU_LWP_SETPRIVATE
2053: error = cpu_lwp_setprivate(l, ptr);
2054: #endif
2055: return error;
2056: }
2057:
1.233 thorpej 2058: /*
2059: * Perform any thread-related cleanup on LWP exit.
2060: * N.B. l->l_proc->p_lock must be HELD on entry but will
2061: * be released before returning!
2062: */
2063: void
2064: lwp_thread_cleanup(struct lwp *l)
2065: {
2066:
2067: KASSERT(mutex_owned(l->l_proc->p_lock));
1.235 thorpej 2068: mutex_exit(l->l_proc->p_lock);
1.236 thorpej 2069:
2070: /*
2071: * If the LWP has robust futexes, release them all
2072: * now.
2073: */
2074: if (__predict_false(l->l_robust_head != 0)) {
1.244 thorpej 2075: futex_release_all_lwp(l);
1.236 thorpej 2076: }
1.233 thorpej 2077: }
2078:
1.84 yamt 2079: #if defined(DDB)
1.153 rmind 2080: #include <machine/pcb.h>
2081:
1.84 yamt 2082: void
2083: lwp_whatis(uintptr_t addr, void (*pr)(const char *, ...))
2084: {
2085: lwp_t *l;
2086:
2087: LIST_FOREACH(l, &alllwp, l_list) {
2088: uintptr_t stack = (uintptr_t)KSTACK_LOWEST_ADDR(l);
2089:
2090: if (addr < stack || stack + KSTACK_SIZE <= addr) {
2091: continue;
2092: }
2093: (*pr)("%p is %p+%zu, LWP %p's stack\n",
2094: (void *)addr, (void *)stack,
2095: (size_t)(addr - stack), l);
2096: }
2097: }
2098: #endif /* defined(DDB) */
CVSweb <webmaster@jp.NetBSD.org>