Annotation of src/sys/kern/kern_exec.c, Revision 1.344
1.344 ! christos 1: /* $NetBSD: kern_exec.c,v 1.343 2012/02/21 03:44:54 christos Exp $ */
1.277 ad 2:
3: /*-
4: * Copyright (c) 2008 The NetBSD Foundation, Inc.
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26: * POSSIBILITY OF SUCH DAMAGE.
27: */
1.55 cgd 28:
29: /*-
1.77 cgd 30: * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
1.55 cgd 31: * Copyright (C) 1992 Wolfgang Solfrank.
32: * Copyright (C) 1992 TooLs GmbH.
33: * All rights reserved.
34: *
35: * Redistribution and use in source and binary forms, with or without
36: * modification, are permitted provided that the following conditions
37: * are met:
38: * 1. Redistributions of source code must retain the above copyright
39: * notice, this list of conditions and the following disclaimer.
40: * 2. Redistributions in binary form must reproduce the above copyright
41: * notice, this list of conditions and the following disclaimer in the
42: * documentation and/or other materials provided with the distribution.
43: * 3. All advertising materials mentioning features or use of this software
44: * must display the following acknowledgement:
45: * This product includes software developed by TooLs GmbH.
46: * 4. The name of TooLs GmbH may not be used to endorse or promote products
47: * derived from this software without specific prior written permission.
48: *
49: * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
50: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
51: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
52: * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
53: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
54: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
55: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
56: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
57: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
58: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59: */
1.146 lukem 60:
61: #include <sys/cdefs.h>
1.344 ! christos 62: __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.343 2012/02/21 03:44:54 christos Exp $");
1.89 mrg 63:
1.325 jmcneill 64: #include "opt_exec.h"
1.92 thorpej 65: #include "opt_ktrace.h"
1.285 apb 66: #include "opt_modular.h"
1.124 jdolecek 67: #include "opt_syscall_debug.h"
1.226 dogcow 68: #include "veriexec.h"
1.232 elad 69: #include "opt_pax.h"
1.55 cgd 70:
71: #include <sys/param.h>
72: #include <sys/systm.h>
73: #include <sys/filedesc.h>
74: #include <sys/kernel.h>
75: #include <sys/proc.h>
76: #include <sys/mount.h>
77: #include <sys/malloc.h>
1.265 yamt 78: #include <sys/kmem.h>
1.55 cgd 79: #include <sys/namei.h>
80: #include <sys/vnode.h>
81: #include <sys/file.h>
82: #include <sys/acct.h>
1.337 martin 83: #include <sys/atomic.h>
1.55 cgd 84: #include <sys/exec.h>
85: #include <sys/ktrace.h>
1.278 pooka 86: #include <sys/uidinfo.h>
1.55 cgd 87: #include <sys/wait.h>
88: #include <sys/mman.h>
1.155 gmcgarry 89: #include <sys/ras.h>
1.55 cgd 90: #include <sys/signalvar.h>
91: #include <sys/stat.h>
1.124 jdolecek 92: #include <sys/syscall.h>
1.218 elad 93: #include <sys/kauth.h>
1.253 ad 94: #include <sys/lwpctl.h>
1.260 christos 95: #include <sys/pax.h>
1.263 ad 96: #include <sys/cpu.h>
1.282 ad 97: #include <sys/module.h>
1.289 pooka 98: #include <sys/syscallvar.h>
1.56 cgd 99: #include <sys/syscallargs.h>
1.222 elad 100: #if NVERIEXEC > 0
1.197 blymn 101: #include <sys/verified_exec.h>
1.222 elad 102: #endif /* NVERIEXEC > 0 */
1.294 darran 103: #include <sys/sdt.h>
1.337 martin 104: #include <sys/spawn.h>
105: #include <sys/prot.h>
1.330 tls 106: #include <sys/cprng.h>
1.55 cgd 107:
1.88 mrg 108: #include <uvm/uvm_extern.h>
109:
1.55 cgd 110: #include <machine/reg.h>
111:
1.244 dsl 112: #include <compat/common/compat_util.h>
113:
1.171 chs 114: static int exec_sigcode_map(struct proc *, const struct emul *);
115:
1.143 christos 116: #ifdef DEBUG_EXEC
1.305 matt 117: #define DPRINTF(a) printf a
1.312 christos 118: #define COPYPRINTF(s, a, b) printf("%s, %d: copyout%s @%p %zu\n", __func__, \
119: __LINE__, (s), (a), (b))
1.143 christos 120: #else
121: #define DPRINTF(a)
1.312 christos 122: #define COPYPRINTF(s, a, b)
1.143 christos 123: #endif /* DEBUG_EXEC */
1.165 thorpej 124:
1.130 jdolecek 125: /*
1.294 darran 126: * DTrace SDT provider definitions
127: */
128: SDT_PROBE_DEFINE(proc,,,exec,
129: "char *", NULL,
130: NULL, NULL, NULL, NULL,
131: NULL, NULL, NULL, NULL);
132: SDT_PROBE_DEFINE(proc,,,exec_success,
133: "char *", NULL,
134: NULL, NULL, NULL, NULL,
135: NULL, NULL, NULL, NULL);
136: SDT_PROBE_DEFINE(proc,,,exec_failure,
137: "int", NULL,
138: NULL, NULL, NULL, NULL,
139: NULL, NULL, NULL, NULL);
140:
141: /*
1.130 jdolecek 142: * Exec function switch:
143: *
144: * Note that each makecmds function is responsible for loading the
145: * exec package with the necessary functions for any exec-type-specific
146: * handling.
147: *
148: * Functions for specific exec types should be defined in their own
149: * header file.
150: */
1.138 lukem 151: static const struct execsw **execsw = NULL;
152: static int nexecs;
153:
1.282 ad 154: u_int exec_maxhdrsz; /* must not be static - used by netbsd32 */
1.130 jdolecek 155:
156: /* list of dynamically loaded execsw entries */
1.282 ad 157: static LIST_HEAD(execlist_head, exec_entry) ex_head =
158: LIST_HEAD_INITIALIZER(ex_head);
1.130 jdolecek 159: struct exec_entry {
1.138 lukem 160: LIST_ENTRY(exec_entry) ex_list;
1.282 ad 161: SLIST_ENTRY(exec_entry) ex_slist;
162: const struct execsw *ex_sw;
1.130 jdolecek 163: };
164:
1.203 christos 165: #ifndef __HAVE_SYSCALL_INTERN
166: void syscall(void);
167: #endif
168:
1.173 christos 169: /* NetBSD emul struct */
1.282 ad 170: struct emul emul_netbsd = {
1.291 rmind 171: .e_name = "netbsd",
172: .e_path = NULL,
1.133 mycroft 173: #ifndef __HAVE_MINIMAL_EMUL
1.291 rmind 174: .e_flags = EMUL_HAS_SYS___syscall,
175: .e_errno = NULL,
176: .e_nosys = SYS_syscall,
177: .e_nsysent = SYS_NSYSENT,
1.133 mycroft 178: #endif
1.291 rmind 179: .e_sysent = sysent,
1.124 jdolecek 180: #ifdef SYSCALL_DEBUG
1.291 rmind 181: .e_syscallnames = syscallnames,
1.124 jdolecek 182: #else
1.291 rmind 183: .e_syscallnames = NULL,
1.124 jdolecek 184: #endif
1.291 rmind 185: .e_sendsig = sendsig,
186: .e_trapsignal = trapsignal,
187: .e_tracesig = NULL,
188: .e_sigcode = NULL,
189: .e_esigcode = NULL,
190: .e_sigobject = NULL,
191: .e_setregs = setregs,
192: .e_proc_exec = NULL,
193: .e_proc_fork = NULL,
194: .e_proc_exit = NULL,
195: .e_lwp_fork = NULL,
196: .e_lwp_exit = NULL,
1.133 mycroft 197: #ifdef __HAVE_SYSCALL_INTERN
1.291 rmind 198: .e_syscall_intern = syscall_intern,
1.133 mycroft 199: #else
1.291 rmind 200: .e_syscall = syscall,
1.133 mycroft 201: #endif
1.291 rmind 202: .e_sysctlovly = NULL,
203: .e_fault = NULL,
204: .e_vm_default_addr = uvm_default_mapaddr,
205: .e_usertrap = NULL,
206: .e_ucsize = sizeof(ucontext_t),
207: .e_startlwp = startlwp
1.124 jdolecek 208: };
209:
1.55 cgd 210: /*
1.130 jdolecek 211: * Exec lock. Used to control access to execsw[] structures.
212: * This must not be static so that netbsd32 can access it, too.
213: */
1.237 ad 214: krwlock_t exec_lock;
1.183 junyoung 215:
1.259 ad 216: static kmutex_t sigobject_lock;
217:
1.337 martin 218: /*
219: * Data used between a loadvm and execve part of an "exec" operation
220: */
221: struct execve_data {
222: struct exec_package ed_pack;
223: struct pathbuf *ed_pathbuf;
224: struct vattr ed_attr;
225: struct ps_strings ed_arginfo;
226: char *ed_argp;
227: const char *ed_pathstring;
228: char *ed_resolvedpathbuf;
229: size_t ed_ps_strings_sz;
230: int ed_szsigcode;
231: long ed_argc;
232: long ed_envc;
233: };
234:
235: /*
236: * data passed from parent lwp to child during a posix_spawn()
237: */
238: struct spawn_exec_data {
239: struct execve_data sed_exec;
240: size_t sed_actions_len;
241: struct posix_spawn_file_actions_entry
242: *sed_actions;
243: struct posix_spawnattr *sed_attrs;
244: struct proc *sed_parent;
245: kcondvar_t sed_cv_child_ready;
246: kmutex_t sed_mtx_child;
247: int sed_error;
248: };
249:
1.277 ad 250: static void *
251: exec_pool_alloc(struct pool *pp, int flags)
252: {
253:
254: return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
255: UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
256: }
257:
258: static void
259: exec_pool_free(struct pool *pp, void *addr)
260: {
261:
262: uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
263: }
264:
265: static struct pool exec_pool;
266:
267: static struct pool_allocator exec_palloc = {
268: .pa_alloc = exec_pool_alloc,
269: .pa_free = exec_pool_free,
270: .pa_pagesz = NCARGS
271: };
272:
1.130 jdolecek 273: /*
1.55 cgd 274: * check exec:
275: * given an "executable" described in the exec package's namei info,
276: * see what we can do with it.
277: *
278: * ON ENTRY:
279: * exec package with appropriate namei info
1.212 christos 280: * lwp pointer of exec'ing lwp
1.55 cgd 281: * NO SELF-LOCKED VNODES
282: *
283: * ON EXIT:
284: * error: nothing held, etc. exec header still allocated.
1.77 cgd 285: * ok: filled exec package, executable's vnode (unlocked).
1.55 cgd 286: *
287: * EXEC SWITCH ENTRY:
288: * Locked vnode to check, exec package, proc.
289: *
290: * EXEC SWITCH EXIT:
1.77 cgd 291: * ok: return 0, filled exec package, executable's vnode (unlocked).
1.55 cgd 292: * error: destructive:
293: * everything deallocated execept exec header.
1.76 cgd 294: * non-destructive:
1.77 cgd 295: * error code, executable's vnode (unlocked),
1.76 cgd 296: * exec header unmodified.
1.55 cgd 297: */
298: int
1.205 christos 299: /*ARGSUSED*/
1.301 dholland 300: check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb)
1.55 cgd 301: {
1.138 lukem 302: int error, i;
303: struct vnode *vp;
1.295 dholland 304: struct nameidata nd;
1.138 lukem 305: size_t resid;
1.55 cgd 306:
1.303 dholland 307: NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1.295 dholland 308:
1.55 cgd 309: /* first get the vnode */
1.295 dholland 310: if ((error = namei(&nd)) != 0)
1.55 cgd 311: return error;
1.295 dholland 312: epp->ep_vp = vp = nd.ni_vp;
313: /* this cannot overflow as both are size PATH_MAX */
1.302 dholland 314: strcpy(epp->ep_resolvedname, nd.ni_pnbuf);
1.295 dholland 315:
1.296 dholland 316: #ifdef DIAGNOSTIC
317: /* paranoia (take this out once namei stuff stabilizes) */
1.302 dholland 318: memset(nd.ni_pnbuf, '~', PATH_MAX);
1.295 dholland 319: #endif
1.55 cgd 320:
1.84 mycroft 321: /* check access and type */
1.55 cgd 322: if (vp->v_type != VREG) {
1.81 kleink 323: error = EACCES;
1.55 cgd 324: goto bad1;
325: }
1.254 pooka 326: if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
1.84 mycroft 327: goto bad1;
1.55 cgd 328:
329: /* get attributes */
1.254 pooka 330: if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
1.55 cgd 331: goto bad1;
332:
333: /* Check mount point */
334: if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
335: error = EACCES;
336: goto bad1;
337: }
1.141 thorpej 338: if (vp->v_mount->mnt_flag & MNT_NOSUID)
1.83 mycroft 339: epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
1.55 cgd 340:
341: /* try to open it */
1.254 pooka 342: if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
1.55 cgd 343: goto bad1;
344:
1.99 wrstuden 345: /* unlock vp, since we need it unlocked from here on out. */
1.298 hannken 346: VOP_UNLOCK(vp);
1.77 cgd 347:
1.222 elad 348: #if NVERIEXEC > 0
1.295 dholland 349: error = veriexec_verify(l, vp, epp->ep_resolvedname,
1.233 elad 350: epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
1.236 elad 351: NULL);
352: if (error)
1.234 elad 353: goto bad2;
1.222 elad 354: #endif /* NVERIEXEC > 0 */
1.160 blymn 355:
1.232 elad 356: #ifdef PAX_SEGVGUARD
1.295 dholland 357: error = pax_segvguard(l, vp, epp->ep_resolvedname, false);
1.234 elad 358: if (error)
359: goto bad2;
1.232 elad 360: #endif /* PAX_SEGVGUARD */
361:
1.55 cgd 362: /* now we have the file, get the exec header */
1.74 christos 363: error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
1.223 ad 364: UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
1.74 christos 365: if (error)
1.55 cgd 366: goto bad2;
367: epp->ep_hdrvalid = epp->ep_hdrlen - resid;
368:
369: /*
1.136 eeh 370: * Set up default address space limits. Can be overridden
371: * by individual exec packages.
1.183 junyoung 372: *
1.235 rillig 373: * XXX probably should be all done in the exec packages.
1.136 eeh 374: */
375: epp->ep_vm_minaddr = VM_MIN_ADDRESS;
376: epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
377: /*
1.55 cgd 378: * set up the vmcmds for creation of the process
379: * address space
380: */
381: error = ENOEXEC;
1.244 dsl 382: for (i = 0; i < nexecs; i++) {
1.68 cgd 383: int newerror;
384:
1.130 jdolecek 385: epp->ep_esch = execsw[i];
1.212 christos 386: newerror = (*execsw[i]->es_makecmds)(l, epp);
1.244 dsl 387:
388: if (!newerror) {
1.318 reinoud 389: /* Seems ok: check that entry point is not too high */
1.323 reinoud 390: if (epp->ep_entry > epp->ep_vm_maxaddr) {
1.322 reinoud 391: #ifdef DIAGNOSTIC
1.329 reinoud 392: printf("%s: rejecting %p due to "
1.331 christos 393: "too high entry address (> %p)\n",
394: __func__, (void *)epp->ep_entry,
395: (void *)epp->ep_vm_maxaddr);
1.322 reinoud 396: #endif
1.318 reinoud 397: error = ENOEXEC;
398: break;
399: }
400: /* Seems ok: check that entry point is not too low */
1.323 reinoud 401: if (epp->ep_entry < epp->ep_vm_minaddr) {
1.322 reinoud 402: #ifdef DIAGNOSTIC
1.329 reinoud 403: printf("%s: rejecting %p due to "
1.331 christos 404: "too low entry address (< %p)\n",
405: __func__, (void *)epp->ep_entry,
406: (void *)epp->ep_vm_minaddr);
1.322 reinoud 407: #endif
1.244 dsl 408: error = ENOEXEC;
409: break;
410: }
411:
412: /* check limits */
413: if ((epp->ep_tsize > MAXTSIZ) ||
414: (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
415: [RLIMIT_DATA].rlim_cur)) {
1.322 reinoud 416: #ifdef DIAGNOSTIC
1.323 reinoud 417: printf("%s: rejecting due to "
1.331 christos 418: "limits (t=%llu > %llu || d=%llu > %llu)\n",
419: __func__,
420: (unsigned long long)epp->ep_tsize,
421: (unsigned long long)MAXTSIZ,
422: (unsigned long long)epp->ep_dsize,
1.332 christos 423: (unsigned long long)
424: l->l_proc->p_rlimit[RLIMIT_DATA].rlim_cur);
1.322 reinoud 425: #endif
1.244 dsl 426: error = ENOMEM;
427: break;
428: }
429: return 0;
430: }
431:
432: if (epp->ep_emul_root != NULL) {
433: vrele(epp->ep_emul_root);
434: epp->ep_emul_root = NULL;
435: }
436: if (epp->ep_interp != NULL) {
437: vrele(epp->ep_interp);
438: epp->ep_interp = NULL;
439: }
440:
1.68 cgd 441: /* make sure the first "interesting" error code is saved. */
1.244 dsl 442: if (error == ENOEXEC)
1.68 cgd 443: error = newerror;
1.124 jdolecek 444:
1.244 dsl 445: if (epp->ep_flags & EXEC_DESTR)
446: /* Error from "#!" code, tidied up by recursive call */
1.55 cgd 447: return error;
448: }
449:
1.249 pooka 450: /* not found, error */
451:
1.55 cgd 452: /*
453: * free any vmspace-creation commands,
454: * and release their references
455: */
456: kill_vmcmds(&epp->ep_vmcmds);
457:
458: bad2:
459: /*
1.99 wrstuden 460: * close and release the vnode, restore the old one, free the
1.55 cgd 461: * pathname buf, and punt.
462: */
1.99 wrstuden 463: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.254 pooka 464: VOP_CLOSE(vp, FREAD, l->l_cred);
1.99 wrstuden 465: vput(vp);
1.55 cgd 466: return error;
467:
468: bad1:
469: /*
470: * free the namei pathname buffer, and put the vnode
471: * (which we don't yet have open).
472: */
1.77 cgd 473: vput(vp); /* was still locked */
1.55 cgd 474: return error;
475: }
476:
1.188 chs 477: #ifdef __MACHINE_STACK_GROWS_UP
478: #define STACK_PTHREADSPACE NBPG
479: #else
480: #define STACK_PTHREADSPACE 0
481: #endif
482:
1.204 cube 483: static int
484: execve_fetch_element(char * const *array, size_t index, char **value)
485: {
486: return copyin(array + index, value, sizeof(*value));
487: }
488:
1.55 cgd 489: /*
490: * exec system call
491: */
1.75 christos 492: int
1.258 dsl 493: sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
1.71 thorpej 494: {
1.258 dsl 495: /* {
1.138 lukem 496: syscallarg(const char *) path;
497: syscallarg(char * const *) argp;
498: syscallarg(char * const *) envp;
1.258 dsl 499: } */
1.204 cube 500:
501: return execve1(l, SCARG(uap, path), SCARG(uap, argp),
502: SCARG(uap, envp), execve_fetch_element);
503: }
504:
1.317 manu 505: int
506: sys_fexecve(struct lwp *l, const struct sys_fexecve_args *uap,
507: register_t *retval)
508: {
509: /* {
510: syscallarg(int) fd;
511: syscallarg(char * const *) argp;
512: syscallarg(char * const *) envp;
513: } */
514:
515: return ENOSYS;
516: }
517:
1.282 ad 518: /*
519: * Load modules to try and execute an image that we do not understand.
520: * If no execsw entries are present, we load those likely to be needed
521: * in order to run native images only. Otherwise, we autoload all
522: * possible modules that could let us run the binary. XXX lame
523: */
524: static void
525: exec_autoload(void)
526: {
527: #ifdef MODULAR
528: static const char * const native[] = {
529: "exec_elf32",
530: "exec_elf64",
531: "exec_script",
532: NULL
533: };
534: static const char * const compat[] = {
535: "exec_elf32",
536: "exec_elf64",
537: "exec_script",
538: "exec_aout",
539: "exec_coff",
540: "exec_ecoff",
541: "compat_aoutm68k",
542: "compat_freebsd",
543: "compat_ibcs2",
544: "compat_linux",
545: "compat_linux32",
546: "compat_netbsd32",
547: "compat_sunos",
548: "compat_sunos32",
549: "compat_svr4",
550: "compat_svr4_32",
551: "compat_ultrix",
552: NULL
553: };
554: char const * const *list;
555: int i;
556:
557: list = (nexecs == 0 ? native : compat);
558: for (i = 0; list[i] != NULL; i++) {
559: if (module_autoload(list[i], MODULE_CLASS_MISC) != 0) {
560: continue;
561: }
562: yield();
563: }
564: #endif
565: }
566:
1.337 martin 567: static int
568: execve_loadvm(struct lwp *l, const char *path, char * const *args,
569: char * const *envs, execve_fetch_element_t fetch_element,
570: struct execve_data * restrict data)
1.204 cube 571: {
1.153 thorpej 572: int error;
1.164 thorpej 573: struct proc *p;
1.138 lukem 574: char *dp, *sp;
1.248 christos 575: size_t i, len;
1.265 yamt 576: struct exec_fakearg *tmpfap;
1.282 ad 577: u_int modgen;
1.337 martin 578:
579: KASSERT(data != NULL);
1.55 cgd 580:
1.237 ad 581: p = l->l_proc;
1.282 ad 582: modgen = 0;
1.164 thorpej 583:
1.294 darran 584: SDT_PROBE(proc,,,exec, path, 0, 0, 0, 0);
585:
1.149 christos 586: /*
1.269 christos 587: * Check if we have exceeded our number of processes limit.
588: * This is so that we handle the case where a root daemon
589: * forked, ran setuid to become the desired user and is trying
590: * to exec. The obvious place to do the reference counting check
591: * is setuid(), but we don't do the reference counting check there
592: * like other OS's do because then all the programs that use setuid()
593: * must be modified to check the return code of setuid() and exit().
594: * It is dangerous to make setuid() fail, because it fails open and
595: * the program will continue to run as root. If we make it succeed
596: * and return an error code, again we are not enforcing the limit.
597: * The best place to enforce the limit is here, when the process tries
598: * to execute a new image, because eventually the process will need
599: * to call exec in order to do something useful.
600: */
1.282 ad 601: retry:
1.287 christos 602: if ((p->p_flag & PK_SUGID) && kauth_authorize_generic(l->l_cred,
603: KAUTH_GENERIC_ISSUSER, NULL) != 0 && chgproccnt(kauth_cred_getuid(
604: l->l_cred), 0) > p->p_rlimit[RLIMIT_NPROC].rlim_cur)
1.269 christos 605: return EAGAIN;
606:
607: /*
1.237 ad 608: * Drain existing references and forbid new ones. The process
609: * should be left alone until we're done here. This is necessary
610: * to avoid race conditions - e.g. in ptrace() - that might allow
611: * a local user to illicitly obtain elevated privileges.
612: */
1.252 ad 613: rw_enter(&p->p_reflock, RW_WRITER);
1.149 christos 614:
1.55 cgd 615: /*
1.129 jdolecek 616: * Init the namei data to point the file user's program name.
617: * This is done here rather than in check_exec(), so that it's
618: * possible to override this settings if any of makecmd/probe
619: * functions call check_exec() recursively - for example,
620: * see exec_script_makecmds().
621: */
1.337 martin 622: error = pathbuf_copyin(path, &data->ed_pathbuf);
1.248 christos 623: if (error) {
1.312 christos 624: DPRINTF(("%s: pathbuf_copyin path @%p %d\n", __func__,
625: path, error));
1.200 elad 626: goto clrflg;
1.248 christos 627: }
1.337 martin 628: data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf);
629:
630: data->ed_resolvedpathbuf = PNBUF_GET();
1.295 dholland 631: #ifdef DIAGNOSTIC
1.337 martin 632: strcpy(data->ed_resolvedpathbuf, "/wrong");
1.295 dholland 633: #endif
1.55 cgd 634:
635: /*
636: * initialize the fields of the exec package.
637: */
1.337 martin 638: data->ed_pack.ep_name = path;
639: data->ed_pack.ep_kname = data->ed_pathstring;
640: data->ed_pack.ep_resolvedname = data->ed_resolvedpathbuf;
641: data->ed_pack.ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
642: data->ed_pack.ep_hdrlen = exec_maxhdrsz;
643: data->ed_pack.ep_hdrvalid = 0;
644: data->ed_pack.ep_emul_arg = NULL;
645: data->ed_pack.ep_emul_arg_free = NULL;
646: data->ed_pack.ep_vmcmds.evs_cnt = 0;
647: data->ed_pack.ep_vmcmds.evs_used = 0;
648: data->ed_pack.ep_vap = &data->ed_attr;
649: data->ed_pack.ep_flags = 0;
650: data->ed_pack.ep_emul_root = NULL;
651: data->ed_pack.ep_interp = NULL;
652: data->ed_pack.ep_esch = NULL;
653: data->ed_pack.ep_pax_flags = 0;
1.55 cgd 654:
1.237 ad 655: rw_enter(&exec_lock, RW_READER);
1.130 jdolecek 656:
1.55 cgd 657: /* see if we can run it. */
1.337 martin 658: if ((error = check_exec(l, &data->ed_pack, data->ed_pathbuf)) != 0) {
1.261 xtraeme 659: if (error != ENOENT) {
1.312 christos 660: DPRINTF(("%s: check exec failed %d\n",
661: __func__, error));
1.261 xtraeme 662: }
1.55 cgd 663: goto freehdr;
1.248 christos 664: }
1.55 cgd 665:
666: /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
667:
668: /* allocate an argument buffer */
1.337 martin 669: data->ed_argp = pool_get(&exec_pool, PR_WAITOK);
670: KASSERT(data->ed_argp != NULL);
671: dp = data->ed_argp;
672: data->ed_argc = 0;
1.55 cgd 673:
674: /* copy the fake args list, if there's one, freeing it as we go */
1.337 martin 675: if (data->ed_pack.ep_flags & EXEC_HASARGL) {
676: tmpfap = data->ed_pack.ep_fa;
1.265 yamt 677: while (tmpfap->fa_arg != NULL) {
678: const char *cp;
1.55 cgd 679:
1.265 yamt 680: cp = tmpfap->fa_arg;
1.55 cgd 681: while (*cp)
682: *dp++ = *cp++;
1.276 ad 683: *dp++ = '\0';
1.290 dsl 684: ktrexecarg(tmpfap->fa_arg, cp - tmpfap->fa_arg);
1.55 cgd 685:
1.265 yamt 686: kmem_free(tmpfap->fa_arg, tmpfap->fa_len);
1.337 martin 687: tmpfap++; data->ed_argc++;
1.55 cgd 688: }
1.337 martin 689: kmem_free(data->ed_pack.ep_fa, data->ed_pack.ep_fa_len);
690: data->ed_pack.ep_flags &= ~EXEC_HASARGL;
1.55 cgd 691: }
692:
693: /* Now get argv & environment */
1.204 cube 694: if (args == NULL) {
1.312 christos 695: DPRINTF(("%s: null args\n", __func__));
1.55 cgd 696: error = EINVAL;
697: goto bad;
698: }
1.204 cube 699: /* 'i' will index the argp/envp element to be retrieved */
700: i = 0;
1.337 martin 701: if (data->ed_pack.ep_flags & EXEC_SKIPARG)
1.204 cube 702: i++;
1.55 cgd 703:
704: while (1) {
1.337 martin 705: len = data->ed_argp + ARG_MAX - dp;
1.248 christos 706: if ((error = (*fetch_element)(args, i, &sp)) != 0) {
1.312 christos 707: DPRINTF(("%s: fetch_element args %d\n",
1.313 jakllsch 708: __func__, error));
1.55 cgd 709: goto bad;
1.248 christos 710: }
1.55 cgd 711: if (!sp)
712: break;
1.74 christos 713: if ((error = copyinstr(sp, dp, len, &len)) != 0) {
1.312 christos 714: DPRINTF(("%s: copyinstr args %d\n", __func__, error));
1.55 cgd 715: if (error == ENAMETOOLONG)
716: error = E2BIG;
717: goto bad;
718: }
1.247 ad 719: ktrexecarg(dp, len - 1);
1.55 cgd 720: dp += len;
1.204 cube 721: i++;
1.337 martin 722: data->ed_argc++;
1.55 cgd 723: }
724:
1.337 martin 725: data->ed_envc = 0;
1.74 christos 726: /* environment need not be there */
1.204 cube 727: if (envs != NULL) {
728: i = 0;
1.55 cgd 729: while (1) {
1.337 martin 730: len = data->ed_argp + ARG_MAX - dp;
1.248 christos 731: if ((error = (*fetch_element)(envs, i, &sp)) != 0) {
1.312 christos 732: DPRINTF(("%s: fetch_element env %d\n",
733: __func__, error));
1.55 cgd 734: goto bad;
1.248 christos 735: }
1.55 cgd 736: if (!sp)
737: break;
1.74 christos 738: if ((error = copyinstr(sp, dp, len, &len)) != 0) {
1.312 christos 739: DPRINTF(("%s: copyinstr env %d\n",
740: __func__, error));
1.55 cgd 741: if (error == ENAMETOOLONG)
742: error = E2BIG;
743: goto bad;
744: }
1.337 martin 745:
1.247 ad 746: ktrexecenv(dp, len - 1);
1.55 cgd 747: dp += len;
1.204 cube 748: i++;
1.337 martin 749: data->ed_envc++;
1.55 cgd 750: }
751: }
1.61 mycroft 752:
753: dp = (char *) ALIGN(dp);
1.55 cgd 754:
1.337 martin 755: data->ed_szsigcode = data->ed_pack.ep_esch->es_emul->e_esigcode -
756: data->ed_pack.ep_esch->es_emul->e_sigcode;
1.65 fvdl 757:
1.267 dsl 758: #ifdef __MACHINE_STACK_GROWS_UP
759: /* See big comment lower down */
760: #define RTLD_GAP 32
761: #else
762: #define RTLD_GAP 0
763: #endif
764:
1.55 cgd 765: /* Now check if args & environ fit into new stack */
1.337 martin 766: if (data->ed_pack.ep_flags & EXEC_32) {
767: data->ed_ps_strings_sz = sizeof(struct ps_strings32);
768: len = ((data->ed_argc + data->ed_envc + 2 +
769: data->ed_pack.ep_esch->es_arglen) *
1.267 dsl 770: sizeof(int) + sizeof(int) + dp + RTLD_GAP +
1.337 martin 771: data->ed_szsigcode + data->ed_ps_strings_sz + STACK_PTHREADSPACE)
772: - data->ed_argp;
1.311 joerg 773: } else {
1.337 martin 774: data->ed_ps_strings_sz = sizeof(struct ps_strings);
775: len = ((data->ed_argc + data->ed_envc + 2 +
776: data->ed_pack.ep_esch->es_arglen) *
1.267 dsl 777: sizeof(char *) + sizeof(int) + dp + RTLD_GAP +
1.337 martin 778: data->ed_szsigcode + data->ed_ps_strings_sz + STACK_PTHREADSPACE)
779: - data->ed_argp;
1.311 joerg 780: }
1.67 christos 781:
1.262 elad 782: #ifdef PAX_ASLR
783: if (pax_aslr_active(l))
1.330 tls 784: len += (cprng_fast32() % PAGE_SIZE);
1.262 elad 785: #endif /* PAX_ASLR */
786:
1.334 christos 787: /* make the stack "safely" aligned */
1.335 christos 788: len = STACK_LEN_ALIGN(len, STACK_ALIGNBYTES);
1.55 cgd 789:
1.337 martin 790: if (len > data->ed_pack.ep_ssize) {
791: /* in effect, compare to initial limit */
1.312 christos 792: DPRINTF(("%s: stack limit exceeded %zu\n", __func__, len));
1.55 cgd 793: goto bad;
794: }
1.337 martin 795: /* adjust "active stack depth" for process VSZ */
796: data->ed_pack.ep_ssize = len;
797:
798: return 0;
799:
800: bad:
801: /* free the vmspace-creation commands, and release their references */
802: kill_vmcmds(&data->ed_pack.ep_vmcmds);
803: /* kill any opened file descriptor, if necessary */
804: if (data->ed_pack.ep_flags & EXEC_HASFD) {
805: data->ed_pack.ep_flags &= ~EXEC_HASFD;
806: fd_close(data->ed_pack.ep_fd);
807: }
808: /* close and put the exec'd file */
809: vn_lock(data->ed_pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
810: VOP_CLOSE(data->ed_pack.ep_vp, FREAD, l->l_cred);
811: vput(data->ed_pack.ep_vp);
812: pool_put(&exec_pool, data->ed_argp);
813:
814: freehdr:
815: kmem_free(data->ed_pack.ep_hdr, data->ed_pack.ep_hdrlen);
816: if (data->ed_pack.ep_emul_root != NULL)
817: vrele(data->ed_pack.ep_emul_root);
818: if (data->ed_pack.ep_interp != NULL)
819: vrele(data->ed_pack.ep_interp);
820:
821: rw_exit(&exec_lock);
822:
823: pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
824: pathbuf_destroy(data->ed_pathbuf);
825: PNBUF_PUT(data->ed_resolvedpathbuf);
826:
827: clrflg:
828: rw_exit(&p->p_reflock);
829:
830: if (modgen != module_gen && error == ENOEXEC) {
831: modgen = module_gen;
832: exec_autoload();
833: goto retry;
834: }
835:
836: SDT_PROBE(proc,,,exec_failure, error, 0, 0, 0, 0);
837: return error;
838: }
839:
840: static int
841: execve_runproc(struct lwp *l, struct execve_data * restrict data)
842: {
843: int error = 0;
844: struct proc *p;
845: size_t i;
846: char *stack, *dp;
847: const char *commandname;
848: struct ps_strings32 arginfo32;
849: struct exec_vmcmd *base_vcp;
850: void *aip;
851: struct vmspace *vm;
852: ksiginfo_t ksi;
853: ksiginfoq_t kq;
854: bool proc_is_new;
855:
856: KASSERT(rw_lock_held(&exec_lock));
857: KASSERT(data != NULL);
858: if (data == NULL)
859: return (EINVAL);
860:
861: p = l->l_proc;
862: proc_is_new = p->p_vmspace == NULL;
863:
864: base_vcp = NULL;
865:
866: if (data->ed_pack.ep_flags & EXEC_32)
867: aip = &arginfo32;
868: else
869: aip = &data->ed_arginfo;
1.55 cgd 870:
1.237 ad 871: /* Get rid of other LWPs. */
1.340 rmind 872: if (p->p_nlwps > 1) {
1.272 ad 873: mutex_enter(p->p_lock);
1.237 ad 874: exit_lwps(l);
1.272 ad 875: mutex_exit(p->p_lock);
1.237 ad 876: }
1.164 thorpej 877: KDASSERT(p->p_nlwps == 1);
878:
1.253 ad 879: /* Destroy any lwpctl info. */
880: if (p->p_lwpctl != NULL)
881: lwp_ctl_exit();
882:
1.164 thorpej 883: /* Remove POSIX timers */
884: timers_free(p, TIMERS_POSIX);
885:
1.86 thorpej 886: /*
887: * Do whatever is necessary to prepare the address space
888: * for remapping. Note that this might replace the current
889: * vmspace with another!
890: */
1.337 martin 891: uvmspace_exec(l, data->ed_pack.ep_vm_minaddr, data->ed_pack.ep_vm_maxaddr);
1.55 cgd 892:
1.186 chs 893: /* record proc's vnode, for use by procfs and others */
894: if (p->p_textvp)
895: vrele(p->p_textvp);
1.337 martin 896: vref(data->ed_pack.ep_vp);
897: p->p_textvp = data->ed_pack.ep_vp;
1.186 chs 898:
1.55 cgd 899: /* Now map address space */
1.86 thorpej 900: vm = p->p_vmspace;
1.337 martin 901: vm->vm_taddr = (void *)data->ed_pack.ep_taddr;
902: vm->vm_tsize = btoc(data->ed_pack.ep_tsize);
903: vm->vm_daddr = (void*)data->ed_pack.ep_daddr;
904: vm->vm_dsize = btoc(data->ed_pack.ep_dsize);
905: vm->vm_ssize = btoc(data->ed_pack.ep_ssize);
1.288 mrg 906: vm->vm_issize = 0;
1.337 martin 907: vm->vm_maxsaddr = (void *)data->ed_pack.ep_maxsaddr;
908: vm->vm_minsaddr = (void *)data->ed_pack.ep_minsaddr;
1.55 cgd 909:
1.260 christos 910: #ifdef PAX_ASLR
911: pax_aslr_init(l, vm);
912: #endif /* PAX_ASLR */
913:
1.55 cgd 914: /* create the new process's VM space by running the vmcmds */
915: #ifdef DIAGNOSTIC
1.337 martin 916: if (data->ed_pack.ep_vmcmds.evs_used == 0)
1.312 christos 917: panic("%s: no vmcmds", __func__);
1.55 cgd 918: #endif
1.326 reinoud 919:
920: #ifdef DEBUG_EXEC
921: {
922: size_t j;
1.337 martin 923: struct exec_vmcmd *vp = &data->ed_pack.ep_vmcmds.evs_cmds[0];
924: DPRINTF(("vmcmds %u\n", data->ed_pack.ep_vmcmds.evs_used));
925: for (j = 0; j < data->ed_pack.ep_vmcmds.evs_used; j++) {
1.328 reinoud 926: DPRINTF(("vmcmd[%zu] = vmcmd_map_%s %#"
1.326 reinoud 927: PRIxVADDR"/%#"PRIxVSIZE" fd@%#"
928: PRIxVSIZE" prot=0%o flags=%d\n", j,
929: vp[j].ev_proc == vmcmd_map_pagedvn ?
930: "pagedvn" :
931: vp[j].ev_proc == vmcmd_map_readvn ?
932: "readvn" :
933: vp[j].ev_proc == vmcmd_map_zero ?
934: "zero" : "*unknown*",
935: vp[j].ev_addr, vp[j].ev_len,
936: vp[j].ev_offset, vp[j].ev_prot,
1.327 reinoud 937: vp[j].ev_flags));
1.326 reinoud 938: }
939: }
940: #endif /* DEBUG_EXEC */
941:
1.337 martin 942: for (i = 0; i < data->ed_pack.ep_vmcmds.evs_used && !error; i++) {
1.55 cgd 943: struct exec_vmcmd *vcp;
944:
1.337 martin 945: vcp = &data->ed_pack.ep_vmcmds.evs_cmds[i];
1.114 matt 946: if (vcp->ev_flags & VMCMD_RELATIVE) {
947: #ifdef DIAGNOSTIC
948: if (base_vcp == NULL)
1.312 christos 949: panic("%s: relative vmcmd with no base",
950: __func__);
1.114 matt 951: if (vcp->ev_flags & VMCMD_BASE)
1.312 christos 952: panic("%s: illegal base & relative vmcmd",
953: __func__);
1.114 matt 954: #endif
955: vcp->ev_addr += base_vcp->ev_addr;
956: }
1.212 christos 957: error = (*vcp->ev_proc)(l, vcp);
1.143 christos 958: #ifdef DEBUG_EXEC
1.111 matt 959: if (error) {
1.248 christos 960: size_t j;
1.337 martin 961: struct exec_vmcmd *vp =
962: &data->ed_pack.ep_vmcmds.evs_cmds[0];
1.327 reinoud 963: DPRINTF(("vmcmds %zu/%u, error %d\n", i,
1.337 martin 964: data->ed_pack.ep_vmcmds.evs_used, error));
965: for (j = 0; j < data->ed_pack.ep_vmcmds.evs_used; j++) {
1.327 reinoud 966: DPRINTF(("vmcmd[%zu] = vmcmd_map_%s %#"
1.310 christos 967: PRIxVADDR"/%#"PRIxVSIZE" fd@%#"
968: PRIxVSIZE" prot=0%o flags=%d\n", j,
969: vp[j].ev_proc == vmcmd_map_pagedvn ?
970: "pagedvn" :
971: vp[j].ev_proc == vmcmd_map_readvn ?
972: "readvn" :
973: vp[j].ev_proc == vmcmd_map_zero ?
974: "zero" : "*unknown*",
975: vp[j].ev_addr, vp[j].ev_len,
1.143 christos 976: vp[j].ev_offset, vp[j].ev_prot,
1.327 reinoud 977: vp[j].ev_flags));
1.326 reinoud 978: if (j == i)
1.327 reinoud 979: DPRINTF((" ^--- failed\n"));
1.326 reinoud 980: }
1.111 matt 981: }
1.143 christos 982: #endif /* DEBUG_EXEC */
1.114 matt 983: if (vcp->ev_flags & VMCMD_BASE)
984: base_vcp = vcp;
1.55 cgd 985: }
986:
987: /* free the vmspace-creation commands, and release their references */
1.337 martin 988: kill_vmcmds(&data->ed_pack.ep_vmcmds);
1.55 cgd 989:
1.337 martin 990: vn_lock(data->ed_pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
991: VOP_CLOSE(data->ed_pack.ep_vp, FREAD, l->l_cred);
992: vput(data->ed_pack.ep_vp);
1.186 chs 993:
1.55 cgd 994: /* if an error happened, deallocate and punt */
1.111 matt 995: if (error) {
1.312 christos 996: DPRINTF(("%s: vmcmd %zu failed: %d\n", __func__, i - 1, error));
1.55 cgd 997: goto exec_abort;
1.111 matt 998: }
1.55 cgd 999:
1000: /* remember information about the process */
1.337 martin 1001: data->ed_arginfo.ps_nargvstr = data->ed_argc;
1002: data->ed_arginfo.ps_nenvstr = data->ed_envc;
1.55 cgd 1003:
1.255 christos 1004: /* set command name & other accounting info */
1.337 martin 1005: commandname = strrchr(data->ed_pack.ep_resolvedname, '/');
1.295 dholland 1006: if (commandname != NULL) {
1007: commandname++;
1008: } else {
1.337 martin 1009: commandname = data->ed_pack.ep_resolvedname;
1.295 dholland 1010: }
1011: i = min(strlen(commandname), MAXCOMLEN);
1012: (void)memcpy(p->p_comm, commandname, i);
1.255 christos 1013: p->p_comm[i] = '\0';
1014:
1015: dp = PNBUF_GET();
1016: /*
1017: * If the path starts with /, we don't need to do any work.
1018: * This handles the majority of the cases.
1019: * In the future perhaps we could canonicalize it?
1020: */
1.337 martin 1021: if (data->ed_pathstring[0] == '/')
1022: (void)strlcpy(data->ed_pack.ep_path = dp, data->ed_pathstring,
1023: MAXPATHLEN);
1.333 dholland 1024: #ifdef notyet
1.255 christos 1025: /*
1026: * Although this works most of the time [since the entry was just
1027: * entered in the cache] we don't use it because it theoretically
1028: * can fail and it is not the cleanest interface, because there
1029: * could be races. When the namei cache is re-written, this can
1030: * be changed to use the appropriate function.
1031: */
1032: else if (!(error = vnode_to_path(dp, MAXPATHLEN, p->p_textvp, l, p)))
1.337 martin 1033: data->ed_pack.ep_path = dp;
1.255 christos 1034: #endif
1035: else {
1.333 dholland 1036: #ifdef notyet
1.255 christos 1037: printf("Cannot get path for pid %d [%s] (error %d)",
1038: (int)p->p_pid, p->p_comm, error);
1039: #endif
1.337 martin 1040: data->ed_pack.ep_path = NULL;
1.255 christos 1041: PNBUF_PUT(dp);
1042: }
1043:
1.163 chs 1044: stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
1.337 martin 1045: STACK_PTHREADSPACE + data->ed_ps_strings_sz + data->ed_szsigcode),
1046: data->ed_pack.ep_ssize - (data->ed_ps_strings_sz + data->ed_szsigcode));
1.267 dsl 1047:
1.163 chs 1048: #ifdef __MACHINE_STACK_GROWS_UP
1049: /*
1050: * The copyargs call always copies into lower addresses
1051: * first, moving towards higher addresses, starting with
1.183 junyoung 1052: * the stack pointer that we give. When the stack grows
1053: * down, this puts argc/argv/envp very shallow on the
1.267 dsl 1054: * stack, right at the first user stack pointer.
1055: * When the stack grows up, the situation is reversed.
1.163 chs 1056: *
1057: * Normally, this is no big deal. But the ld_elf.so _rtld()
1.183 junyoung 1058: * function expects to be called with a single pointer to
1059: * a region that has a few words it can stash values into,
1.163 chs 1060: * followed by argc/argv/envp. When the stack grows down,
1061: * it's easy to decrement the stack pointer a little bit to
1062: * allocate the space for these few words and pass the new
1063: * stack pointer to _rtld. When the stack grows up, however,
1.171 chs 1064: * a few words before argc is part of the signal trampoline, XXX
1.163 chs 1065: * so we have a problem.
1066: *
1.183 junyoung 1067: * Instead of changing how _rtld works, we take the easy way
1.267 dsl 1068: * out and steal 32 bytes before we call copyargs.
1.337 martin 1069: * This extra space was allowed for when 'pack.ep_ssize' was calculated.
1.163 chs 1070: */
1.267 dsl 1071: stack += RTLD_GAP;
1.163 chs 1072: #endif /* __MACHINE_STACK_GROWS_UP */
1.337 martin 1073:
1074: /* Now copy argc, args & environ to new stack */
1075: error = (*data->ed_pack.ep_esch->es_copyargs)(l, &data->ed_pack,
1076: &data->ed_arginfo, &stack, data->ed_argp);
1.163 chs 1077:
1.337 martin 1078: if (data->ed_pack.ep_path) {
1079: PNBUF_PUT(data->ed_pack.ep_path);
1080: data->ed_pack.ep_path = NULL;
1.255 christos 1081: }
1.144 christos 1082: if (error) {
1.312 christos 1083: DPRINTF(("%s: copyargs failed %d\n", __func__, error));
1.55 cgd 1084: goto exec_abort;
1.111 matt 1085: }
1.144 christos 1086: /* Move the stack back to original point */
1.337 martin 1087: stack = (char *)STACK_GROW(vm->vm_minsaddr, data->ed_pack.ep_ssize);
1.55 cgd 1088:
1.121 eeh 1089: /* fill process ps_strings info */
1.311 joerg 1090: p->p_psstrp = (vaddr_t)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
1.337 martin 1091: STACK_PTHREADSPACE), data->ed_ps_strings_sz);
1.311 joerg 1092:
1.337 martin 1093: if (data->ed_pack.ep_flags & EXEC_32) {
1094: arginfo32.ps_argvstr = (vaddr_t)data->ed_arginfo.ps_argvstr;
1095: arginfo32.ps_nargvstr = data->ed_arginfo.ps_nargvstr;
1096: arginfo32.ps_envstr = (vaddr_t)data->ed_arginfo.ps_envstr;
1097: arginfo32.ps_nenvstr = data->ed_arginfo.ps_nenvstr;
1.311 joerg 1098: }
1.121 eeh 1099:
1.55 cgd 1100: /* copy out the process's ps_strings structure */
1.337 martin 1101: if ((error = copyout(aip, (void *)p->p_psstrp, data->ed_ps_strings_sz))
1102: != 0) {
1.312 christos 1103: DPRINTF(("%s: ps_strings copyout %p->%p size %zu failed\n",
1.337 martin 1104: __func__, aip, (void *)p->p_psstrp, data->ed_ps_strings_sz));
1.55 cgd 1105: goto exec_abort;
1.111 matt 1106: }
1.109 simonb 1107:
1.307 pooka 1108: cwdexec(p);
1.270 ad 1109: fd_closeexec(); /* handle close on exec */
1.315 alnsn 1110:
1111: if (__predict_false(ktrace_on))
1112: fd_ktrexecfd();
1113:
1.55 cgd 1114: execsigs(p); /* reset catched signals */
1.183 junyoung 1115:
1.164 thorpej 1116: l->l_ctxlink = NULL; /* reset ucontext link */
1.55 cgd 1117:
1.255 christos 1118:
1.55 cgd 1119: p->p_acflag &= ~AFORK;
1.272 ad 1120: mutex_enter(p->p_lock);
1.238 pavel 1121: p->p_flag |= PK_EXEC;
1.272 ad 1122: mutex_exit(p->p_lock);
1.237 ad 1123:
1124: /*
1125: * Stop profiling.
1126: */
1127: if ((p->p_stflag & PST_PROFIL) != 0) {
1128: mutex_spin_enter(&p->p_stmutex);
1129: stopprofclock(p);
1130: mutex_spin_exit(&p->p_stmutex);
1131: }
1132:
1133: /*
1.275 ad 1134: * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
1.237 ad 1135: * exited and exec()/exit() are the only places it will be cleared.
1136: */
1.275 ad 1137: if ((p->p_lflag & PL_PPWAIT) != 0) {
1.271 ad 1138: mutex_enter(proc_lock);
1.308 pooka 1139: l->l_lwpctl = NULL; /* was on loan from blocked parent */
1.275 ad 1140: p->p_lflag &= ~PL_PPWAIT;
1.237 ad 1141: cv_broadcast(&p->p_pptr->p_waitcv);
1.271 ad 1142: mutex_exit(proc_lock);
1.55 cgd 1143: }
1144:
1145: /*
1.237 ad 1146: * Deal with set[ug]id. MNT_NOSUID has already been used to disable
1147: * s[ug]id. It's OK to check for PSL_TRACED here as we have blocked
1148: * out additional references on the process for the moment.
1.55 cgd 1149: */
1.237 ad 1150: if ((p->p_slflag & PSL_TRACED) == 0 &&
1.141 thorpej 1151:
1.337 martin 1152: (((data->ed_attr.va_mode & S_ISUID) != 0 &&
1153: kauth_cred_geteuid(l->l_cred) != data->ed_attr.va_uid) ||
1.141 thorpej 1154:
1.337 martin 1155: ((data->ed_attr.va_mode & S_ISGID) != 0 &&
1156: kauth_cred_getegid(l->l_cred) != data->ed_attr.va_gid))) {
1.141 thorpej 1157: /*
1158: * Mark the process as SUGID before we do
1159: * anything that might block.
1160: */
1.237 ad 1161: proc_crmod_enter();
1.240 thorpej 1162: proc_crmod_leave(NULL, NULL, true);
1.152 christos 1163:
1164: /* Make sure file descriptors 0..2 are in use. */
1.270 ad 1165: if ((error = fd_checkstd()) != 0) {
1.312 christos 1166: DPRINTF(("%s: fdcheckstd failed %d\n",
1167: __func__, error));
1.152 christos 1168: goto exec_abort;
1.209 christos 1169: }
1.141 thorpej 1170:
1.220 ad 1171: /*
1172: * Copy the credential so other references don't see our
1173: * changes.
1174: */
1.221 ad 1175: l->l_cred = kauth_cred_copy(l->l_cred);
1.55 cgd 1176: #ifdef KTRACE
1177: /*
1.268 elad 1178: * If the persistent trace flag isn't set, turn off.
1.55 cgd 1179: */
1.237 ad 1180: if (p->p_tracep) {
1.247 ad 1181: mutex_enter(&ktrace_lock);
1.268 elad 1182: if (!(p->p_traceflag & KTRFAC_PERSISTENT))
1.237 ad 1183: ktrderef(p);
1.247 ad 1184: mutex_exit(&ktrace_lock);
1.237 ad 1185: }
1.55 cgd 1186: #endif
1.337 martin 1187: if (data->ed_attr.va_mode & S_ISUID)
1188: kauth_cred_seteuid(l->l_cred, data->ed_attr.va_uid);
1189: if (data->ed_attr.va_mode & S_ISGID)
1190: kauth_cred_setegid(l->l_cred, data->ed_attr.va_gid);
1.210 christos 1191: } else {
1.221 ad 1192: if (kauth_cred_geteuid(l->l_cred) ==
1193: kauth_cred_getuid(l->l_cred) &&
1194: kauth_cred_getegid(l->l_cred) ==
1195: kauth_cred_getgid(l->l_cred))
1.238 pavel 1196: p->p_flag &= ~PK_SUGID;
1.210 christos 1197: }
1.220 ad 1198:
1199: /*
1200: * Copy the credential so other references don't see our changes.
1201: * Test to see if this is necessary first, since in the common case
1202: * we won't need a private reference.
1203: */
1.221 ad 1204: if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
1205: kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
1206: l->l_cred = kauth_cred_copy(l->l_cred);
1207: kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
1208: kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
1.220 ad 1209: }
1.155 gmcgarry 1210:
1.221 ad 1211: /* Update the master credentials. */
1.227 ad 1212: if (l->l_cred != p->p_cred) {
1213: kauth_cred_t ocred;
1214:
1215: kauth_cred_hold(l->l_cred);
1.272 ad 1216: mutex_enter(p->p_lock);
1.227 ad 1217: ocred = p->p_cred;
1218: p->p_cred = l->l_cred;
1.272 ad 1219: mutex_exit(p->p_lock);
1.227 ad 1220: kauth_cred_free(ocred);
1221: }
1.221 ad 1222:
1.155 gmcgarry 1223: #if defined(__HAVE_RAS)
1224: /*
1225: * Remove all RASs from the address space.
1226: */
1.251 ad 1227: ras_purgeall();
1.155 gmcgarry 1228: #endif
1.107 fvdl 1229:
1230: doexechooks(p);
1.55 cgd 1231:
1232: /* setup new registers and do misc. setup. */
1.337 martin 1233: (*data->ed_pack.ep_esch->es_emul->e_setregs)(l, &data->ed_pack,
1234: (vaddr_t)stack);
1235: if (data->ed_pack.ep_esch->es_setregs)
1236: (*data->ed_pack.ep_esch->es_setregs)(l, &data->ed_pack,
1237: (vaddr_t)stack);
1.55 cgd 1238:
1.309 joerg 1239: /* Provide a consistent LWP private setting */
1240: (void)lwp_setprivate(l, NULL);
1241:
1.316 matt 1242: /* Discard all PCU state; need to start fresh */
1243: pcu_discard_all(l);
1244:
1.171 chs 1245: /* map the process's signal trampoline code */
1.337 martin 1246: if ((error = exec_sigcode_map(p, data->ed_pack.ep_esch->es_emul)) != 0) {
1.312 christos 1247: DPRINTF(("%s: map sigcode failed %d\n", __func__, error));
1.171 chs 1248: goto exec_abort;
1.209 christos 1249: }
1.171 chs 1250:
1.337 martin 1251: pool_put(&exec_pool, data->ed_argp);
1.276 ad 1252:
1253: /* notify others that we exec'd */
1254: KNOTE(&p->p_klist, NOTE_EXEC);
1255:
1.337 martin 1256: kmem_free(data->ed_pack.ep_hdr, data->ed_pack.ep_hdrlen);
1.122 jdolecek 1257:
1.339 martin 1258: SDT_PROBE(proc,,,exec_success, data->ed_pack.ep_name, 0, 0, 0, 0);
1.294 darran 1259:
1.244 dsl 1260: /* The emulation root will usually have been found when we looked
1261: * for the elf interpreter (or similar), if not look now. */
1.337 martin 1262: if (data->ed_pack.ep_esch->es_emul->e_path != NULL &&
1263: data->ed_pack.ep_emul_root == NULL)
1264: emul_find_root(l, &data->ed_pack);
1.244 dsl 1265:
1266: /* Any old emulation root got removed by fdcloseexec */
1.259 ad 1267: rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
1.337 martin 1268: p->p_cwdi->cwdi_edir = data->ed_pack.ep_emul_root;
1.259 ad 1269: rw_exit(&p->p_cwdi->cwdi_lock);
1.337 martin 1270: data->ed_pack.ep_emul_root = NULL;
1271: if (data->ed_pack.ep_interp != NULL)
1272: vrele(data->ed_pack.ep_interp);
1.244 dsl 1273:
1.122 jdolecek 1274: /*
1.194 peter 1275: * Call emulation specific exec hook. This can setup per-process
1.122 jdolecek 1276: * p->p_emuldata or do any other per-process stuff an emulation needs.
1277: *
1278: * If we are executing process of different emulation than the
1279: * original forked process, call e_proc_exit() of the old emulation
1280: * first, then e_proc_exec() of new emulation. If the emulation is
1281: * same, the exec hook code should deallocate any old emulation
1282: * resources held previously by this process.
1283: */
1.124 jdolecek 1284: if (p->p_emul && p->p_emul->e_proc_exit
1.337 martin 1285: && p->p_emul != data->ed_pack.ep_esch->es_emul)
1.122 jdolecek 1286: (*p->p_emul->e_proc_exit)(p);
1287:
1.123 jdolecek 1288: /*
1.299 chs 1289: * This is now LWP 1.
1290: */
1291: mutex_enter(p->p_lock);
1292: p->p_nlwpid = 1;
1293: l->l_lid = 1;
1294: mutex_exit(p->p_lock);
1295:
1296: /*
1.123 jdolecek 1297: * Call exec hook. Emulation code may NOT store reference to anything
1298: * from &pack.
1299: */
1.337 martin 1300: if (data->ed_pack.ep_esch->es_emul->e_proc_exec)
1301: (*data->ed_pack.ep_esch->es_emul->e_proc_exec)(p, &data->ed_pack);
1.122 jdolecek 1302:
1303: /* update p_emul, the old value is no longer needed */
1.337 martin 1304: p->p_emul = data->ed_pack.ep_esch->es_emul;
1.148 thorpej 1305:
1306: /* ...and the same for p_execsw */
1.337 martin 1307: p->p_execsw = data->ed_pack.ep_esch;
1.148 thorpej 1308:
1.133 mycroft 1309: #ifdef __HAVE_SYSCALL_INTERN
1310: (*p->p_emul->e_syscall_intern)(p);
1311: #endif
1.247 ad 1312: ktremul();
1.85 mycroft 1313:
1.252 ad 1314: /* Allow new references from the debugger/procfs. */
1.341 martin 1315: rw_exit(&p->p_reflock);
1.237 ad 1316: rw_exit(&exec_lock);
1.162 manu 1317:
1.271 ad 1318: mutex_enter(proc_lock);
1.237 ad 1319:
1320: if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
1321: KSI_INIT_EMPTY(&ksi);
1322: ksi.ksi_signo = SIGTRAP;
1323: ksi.ksi_lid = l->l_lid;
1324: kpsignal(p, &ksi, NULL);
1325: }
1.162 manu 1326:
1.237 ad 1327: if (p->p_sflag & PS_STOPEXEC) {
1328: KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.175 dsl 1329: p->p_pptr->p_nstopchild++;
1.237 ad 1330: p->p_pptr->p_waited = 0;
1.272 ad 1331: mutex_enter(p->p_lock);
1.237 ad 1332: ksiginfo_queue_init(&kq);
1333: sigclearall(p, &contsigmask, &kq);
1334: lwp_lock(l);
1335: l->l_stat = LSSTOP;
1.162 manu 1336: p->p_stat = SSTOP;
1.164 thorpej 1337: p->p_nrlwps--;
1.304 rmind 1338: lwp_unlock(l);
1.272 ad 1339: mutex_exit(p->p_lock);
1.271 ad 1340: mutex_exit(proc_lock);
1.304 rmind 1341: lwp_lock(l);
1.245 yamt 1342: mi_switch(l);
1.237 ad 1343: ksiginfo_queue_drain(&kq);
1344: KERNEL_LOCK(l->l_biglocks, l);
1345: } else {
1.271 ad 1346: mutex_exit(proc_lock);
1.162 manu 1347: }
1348:
1.337 martin 1349: pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
1350: pathbuf_destroy(data->ed_pathbuf);
1351: PNBUF_PUT(data->ed_resolvedpathbuf);
1.327 reinoud 1352: DPRINTF(("%s finished\n", __func__));
1.85 mycroft 1353: return (EJUSTRETURN);
1.55 cgd 1354:
1.138 lukem 1355: exec_abort:
1.294 darran 1356: SDT_PROBE(proc,,,exec_failure, error, 0, 0, 0, 0);
1.297 rmind 1357: rw_exit(&p->p_reflock);
1358: rw_exit(&exec_lock);
1359:
1.337 martin 1360: pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
1361: pathbuf_destroy(data->ed_pathbuf);
1362: PNBUF_PUT(data->ed_resolvedpathbuf);
1.130 jdolecek 1363:
1.55 cgd 1364: /*
1365: * the old process doesn't exist anymore. exit gracefully.
1366: * get rid of the (new) address space we have created, if any, get rid
1367: * of our namei data and vnode, and exit noting failure
1368: */
1.88 mrg 1369: uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
1370: VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
1.337 martin 1371: exec_free_emul_arg(&data->ed_pack);
1372: pool_put(&exec_pool, data->ed_argp);
1373: kmem_free(data->ed_pack.ep_hdr, data->ed_pack.ep_hdrlen);
1374: if (data->ed_pack.ep_emul_root != NULL)
1375: vrele(data->ed_pack.ep_emul_root);
1376: if (data->ed_pack.ep_interp != NULL)
1377: vrele(data->ed_pack.ep_interp);
1.237 ad 1378:
1.252 ad 1379: /* Acquire the sched-state mutex (exit1() will release it). */
1.337 martin 1380: if (!proc_is_new) {
1381: mutex_enter(p->p_lock);
1382: exit1(l, W_EXITCODE(error, SIGABRT));
1383: }
1.55 cgd 1384:
1385: /* NOTREACHED */
1386: return 0;
1.67 christos 1387: }
1388:
1.144 christos 1389: int
1.337 martin 1390: execve1(struct lwp *l, const char *path, char * const *args,
1391: char * const *envs, execve_fetch_element_t fetch_element)
1392: {
1393: struct execve_data data;
1394: int error;
1395:
1396: error = execve_loadvm(l, path, args, envs, fetch_element, &data);
1397: if (error)
1398: return error;
1399: error = execve_runproc(l, &data);
1400: return error;
1401: }
1402:
1403: int
1.231 yamt 1404: copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
1405: char **stackp, void *argp)
1.67 christos 1406: {
1.138 lukem 1407: char **cpp, *dp, *sp;
1408: size_t len;
1409: void *nullp;
1410: long argc, envc;
1.144 christos 1411: int error;
1.138 lukem 1412:
1.144 christos 1413: cpp = (char **)*stackp;
1.138 lukem 1414: nullp = NULL;
1415: argc = arginfo->ps_nargvstr;
1416: envc = arginfo->ps_nenvstr;
1.305 matt 1417: if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) {
1.312 christos 1418: COPYPRINTF("", cpp - 1, sizeof(argc));
1.144 christos 1419: return error;
1.305 matt 1420: }
1.67 christos 1421:
1.244 dsl 1422: dp = (char *) (cpp + argc + envc + 2 + pack->ep_esch->es_arglen);
1.67 christos 1423: sp = argp;
1424:
1425: /* XXX don't copy them out, remap them! */
1.69 mycroft 1426: arginfo->ps_argvstr = cpp; /* remember location of argv for later */
1.67 christos 1427:
1.305 matt 1428: for (; --argc >= 0; sp += len, dp += len) {
1429: if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1.312 christos 1430: COPYPRINTF("", cpp - 1, sizeof(dp));
1.305 matt 1431: return error;
1432: }
1433: if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1.313 jakllsch 1434: COPYPRINTF("str", dp, (size_t)ARG_MAX);
1.144 christos 1435: return error;
1.305 matt 1436: }
1437: }
1.67 christos 1438:
1.305 matt 1439: if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1.312 christos 1440: COPYPRINTF("", cpp - 1, sizeof(nullp));
1.144 christos 1441: return error;
1.305 matt 1442: }
1.67 christos 1443:
1.69 mycroft 1444: arginfo->ps_envstr = cpp; /* remember location of envp for later */
1.67 christos 1445:
1.305 matt 1446: for (; --envc >= 0; sp += len, dp += len) {
1447: if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1.312 christos 1448: COPYPRINTF("", cpp - 1, sizeof(dp));
1.144 christos 1449: return error;
1.305 matt 1450: }
1451: if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1.313 jakllsch 1452: COPYPRINTF("str", dp, (size_t)ARG_MAX);
1.305 matt 1453: return error;
1454: }
1.337 martin 1455:
1.305 matt 1456: }
1.67 christos 1457:
1.305 matt 1458: if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1.312 christos 1459: COPYPRINTF("", cpp - 1, sizeof(nullp));
1.144 christos 1460: return error;
1.305 matt 1461: }
1.67 christos 1462:
1.144 christos 1463: *stackp = (char *)cpp;
1464: return 0;
1.55 cgd 1465: }
1.130 jdolecek 1466:
1467:
1468: /*
1.282 ad 1469: * Add execsw[] entries.
1.130 jdolecek 1470: */
1471: int
1.282 ad 1472: exec_add(struct execsw *esp, int count)
1.130 jdolecek 1473: {
1.282 ad 1474: struct exec_entry *it;
1475: int i;
1.130 jdolecek 1476:
1.283 ad 1477: if (count == 0) {
1478: return 0;
1479: }
1.130 jdolecek 1480:
1.282 ad 1481: /* Check for duplicates. */
1.237 ad 1482: rw_enter(&exec_lock, RW_WRITER);
1.282 ad 1483: for (i = 0; i < count; i++) {
1484: LIST_FOREACH(it, &ex_head, ex_list) {
1485: /* assume unique (makecmds, probe_func, emulation) */
1486: if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
1487: it->ex_sw->u.elf_probe_func ==
1488: esp[i].u.elf_probe_func &&
1489: it->ex_sw->es_emul == esp[i].es_emul) {
1490: rw_exit(&exec_lock);
1491: return EEXIST;
1.130 jdolecek 1492: }
1493: }
1494: }
1495:
1.282 ad 1496: /* Allocate new entries. */
1497: for (i = 0; i < count; i++) {
1498: it = kmem_alloc(sizeof(*it), KM_SLEEP);
1499: it->ex_sw = &esp[i];
1500: LIST_INSERT_HEAD(&ex_head, it, ex_list);
1.130 jdolecek 1501: }
1502:
1503: /* update execsw[] */
1504: exec_init(0);
1.237 ad 1505: rw_exit(&exec_lock);
1.282 ad 1506: return 0;
1.130 jdolecek 1507: }
1508:
1509: /*
1510: * Remove execsw[] entry.
1511: */
1512: int
1.282 ad 1513: exec_remove(struct execsw *esp, int count)
1.130 jdolecek 1514: {
1.282 ad 1515: struct exec_entry *it, *next;
1516: int i;
1517: const struct proclist_desc *pd;
1518: proc_t *p;
1519:
1.283 ad 1520: if (count == 0) {
1521: return 0;
1522: }
1.130 jdolecek 1523:
1.282 ad 1524: /* Abort if any are busy. */
1.237 ad 1525: rw_enter(&exec_lock, RW_WRITER);
1.282 ad 1526: for (i = 0; i < count; i++) {
1527: mutex_enter(proc_lock);
1528: for (pd = proclists; pd->pd_list != NULL; pd++) {
1529: PROCLIST_FOREACH(p, pd->pd_list) {
1530: if (p->p_execsw == &esp[i]) {
1531: mutex_exit(proc_lock);
1532: rw_exit(&exec_lock);
1533: return EBUSY;
1534: }
1535: }
1536: }
1537: mutex_exit(proc_lock);
1538: }
1.130 jdolecek 1539:
1.282 ad 1540: /* None are busy, so remove them all. */
1541: for (i = 0; i < count; i++) {
1542: for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
1543: next = LIST_NEXT(it, ex_list);
1544: if (it->ex_sw == &esp[i]) {
1545: LIST_REMOVE(it, ex_list);
1546: kmem_free(it, sizeof(*it));
1547: break;
1548: }
1549: }
1.130 jdolecek 1550: }
1551:
1552: /* update execsw[] */
1553: exec_init(0);
1.237 ad 1554: rw_exit(&exec_lock);
1.282 ad 1555: return 0;
1.130 jdolecek 1556: }
1557:
1558: /*
1559: * Initialize exec structures. If init_boot is true, also does necessary
1560: * one-time initialization (it's called from main() that way).
1.147 jdolecek 1561: * Once system is multiuser, this should be called with exec_lock held,
1.130 jdolecek 1562: * i.e. via exec_{add|remove}().
1563: */
1564: int
1.138 lukem 1565: exec_init(int init_boot)
1.130 jdolecek 1566: {
1.282 ad 1567: const struct execsw **sw;
1568: struct exec_entry *ex;
1569: SLIST_HEAD(,exec_entry) first;
1570: SLIST_HEAD(,exec_entry) any;
1571: SLIST_HEAD(,exec_entry) last;
1572: int i, sz;
1.130 jdolecek 1573:
1574: if (init_boot) {
1575: /* do one-time initializations */
1.237 ad 1576: rw_init(&exec_lock);
1.259 ad 1577: mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
1.277 ad 1578: pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
1579: "execargs", &exec_palloc, IPL_NONE);
1580: pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1.282 ad 1581: } else {
1582: KASSERT(rw_write_held(&exec_lock));
1583: }
1.130 jdolecek 1584:
1.282 ad 1585: /* Sort each entry onto the appropriate queue. */
1586: SLIST_INIT(&first);
1587: SLIST_INIT(&any);
1588: SLIST_INIT(&last);
1589: sz = 0;
1590: LIST_FOREACH(ex, &ex_head, ex_list) {
1591: switch(ex->ex_sw->es_prio) {
1592: case EXECSW_PRIO_FIRST:
1593: SLIST_INSERT_HEAD(&first, ex, ex_slist);
1594: break;
1595: case EXECSW_PRIO_ANY:
1596: SLIST_INSERT_HEAD(&any, ex, ex_slist);
1597: break;
1598: case EXECSW_PRIO_LAST:
1599: SLIST_INSERT_HEAD(&last, ex, ex_slist);
1600: break;
1601: default:
1.312 christos 1602: panic("%s", __func__);
1.282 ad 1603: break;
1.130 jdolecek 1604: }
1.282 ad 1605: sz++;
1.130 jdolecek 1606: }
1607:
1608: /*
1.282 ad 1609: * Create new execsw[]. Ensure we do not try a zero-sized
1610: * allocation.
1.130 jdolecek 1611: */
1.282 ad 1612: sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
1613: i = 0;
1614: SLIST_FOREACH(ex, &first, ex_slist) {
1615: sw[i++] = ex->ex_sw;
1616: }
1617: SLIST_FOREACH(ex, &any, ex_slist) {
1618: sw[i++] = ex->ex_sw;
1619: }
1620: SLIST_FOREACH(ex, &last, ex_slist) {
1621: sw[i++] = ex->ex_sw;
1.130 jdolecek 1622: }
1.183 junyoung 1623:
1.282 ad 1624: /* Replace old execsw[] and free used memory. */
1625: if (execsw != NULL) {
1626: kmem_free(__UNCONST(execsw),
1627: nexecs * sizeof(struct execsw *) + 1);
1.130 jdolecek 1628: }
1.282 ad 1629: execsw = sw;
1630: nexecs = sz;
1.130 jdolecek 1631:
1.282 ad 1632: /* Figure out the maximum size of an exec header. */
1633: exec_maxhdrsz = sizeof(int);
1.130 jdolecek 1634: for (i = 0; i < nexecs; i++) {
1635: if (execsw[i]->es_hdrsz > exec_maxhdrsz)
1636: exec_maxhdrsz = execsw[i]->es_hdrsz;
1637: }
1638:
1639: return 0;
1640: }
1.171 chs 1641:
1642: static int
1643: exec_sigcode_map(struct proc *p, const struct emul *e)
1644: {
1645: vaddr_t va;
1646: vsize_t sz;
1647: int error;
1648: struct uvm_object *uobj;
1649:
1.184 drochner 1650: sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
1651:
1652: if (e->e_sigobject == NULL || sz == 0) {
1.171 chs 1653: return 0;
1654: }
1655:
1656: /*
1657: * If we don't have a sigobject for this emulation, create one.
1658: *
1659: * sigobject is an anonymous memory object (just like SYSV shared
1660: * memory) that we keep a permanent reference to and that we map
1661: * in all processes that need this sigcode. The creation is simple,
1662: * we create an object, add a permanent reference to it, map it in
1663: * kernel space, copy out the sigcode to it and unmap it.
1.189 jdolecek 1664: * We map it with PROT_READ|PROT_EXEC into the process just
1665: * the way sys_mmap() would map it.
1.171 chs 1666: */
1667:
1668: uobj = *e->e_sigobject;
1669: if (uobj == NULL) {
1.259 ad 1670: mutex_enter(&sigobject_lock);
1671: if ((uobj = *e->e_sigobject) == NULL) {
1672: uobj = uao_create(sz, 0);
1673: (*uobj->pgops->pgo_reference)(uobj);
1674: va = vm_map_min(kernel_map);
1675: if ((error = uvm_map(kernel_map, &va, round_page(sz),
1676: uobj, 0, 0,
1677: UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
1678: UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
1679: printf("kernel mapping failed %d\n", error);
1680: (*uobj->pgops->pgo_detach)(uobj);
1681: mutex_exit(&sigobject_lock);
1682: return (error);
1683: }
1684: memcpy((void *)va, e->e_sigcode, sz);
1.171 chs 1685: #ifdef PMAP_NEED_PROCWR
1.259 ad 1686: pmap_procwr(&proc0, va, sz);
1.171 chs 1687: #endif
1.259 ad 1688: uvm_unmap(kernel_map, va, va + round_page(sz));
1689: *e->e_sigobject = uobj;
1690: }
1691: mutex_exit(&sigobject_lock);
1.171 chs 1692: }
1693:
1.172 enami 1694: /* Just a hint to uvm_map where to put it. */
1.195 fvdl 1695: va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
1696: round_page(sz));
1.187 chs 1697:
1698: #ifdef __alpha__
1699: /*
1700: * Tru64 puts /sbin/loader at the end of user virtual memory,
1701: * which causes the above calculation to put the sigcode at
1702: * an invalid address. Put it just below the text instead.
1703: */
1.193 jmc 1704: if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1.187 chs 1705: va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
1706: }
1707: #endif
1708:
1.171 chs 1709: (*uobj->pgops->pgo_reference)(uobj);
1710: error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
1711: uobj, 0, 0,
1712: UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
1713: UVM_ADV_RANDOM, 0));
1714: if (error) {
1.312 christos 1715: DPRINTF(("%s, %d: map %p "
1.305 matt 1716: "uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n",
1.312 christos 1717: __func__, __LINE__, &p->p_vmspace->vm_map, round_page(sz),
1718: va, error));
1.171 chs 1719: (*uobj->pgops->pgo_detach)(uobj);
1720: return (error);
1721: }
1722: p->p_sigctx.ps_sigcode = (void *)va;
1723: return (0);
1724: }
1.336 matt 1725:
1.337 martin 1726: /*
1727: * A child lwp of a posix_spawn operation starts here and ends up in
1728: * cpu_spawn_return, dealing with all filedescriptor and scheduler
1729: * manipulations in between.
1730: */
1731: static void
1732: spawn_return(void *arg)
1733: {
1734: struct spawn_exec_data *spawn_data = arg;
1735: struct lwp *l = curlwp;
1736: int error, newfd;
1737: size_t i;
1738: const struct posix_spawn_file_actions_entry *fae;
1739: register_t retval;
1.341 martin 1740: bool have_reflock;
1741:
1742: /*
1743: * The following actions may block, so we need a temporary
1744: * vmspace - borrow the kernel one
1745: */
1746: KPREEMPT_DISABLE(l);
1747: l->l_proc->p_vmspace = proc0.p_vmspace;
1748: pmap_activate(l);
1749: KPREEMPT_ENABLE(l);
1750:
1751: /* don't allow debugger access yet */
1752: rw_enter(&l->l_proc->p_reflock, RW_WRITER);
1753: have_reflock = true;
1.337 martin 1754:
1.338 martin 1755: error = 0;
1.337 martin 1756: /* handle posix_spawn_file_actions */
1757: if (spawn_data->sed_actions != NULL) {
1758: for (i = 0; i < spawn_data->sed_actions_len; i++) {
1759: fae = &spawn_data->sed_actions[i];
1760: switch (fae->fae_action) {
1761: case FAE_OPEN:
1.338 martin 1762: if (fd_getfile(fae->fae_fildes) != NULL) {
1763: error = fd_close(fae->fae_fildes);
1764: if (error)
1765: break;
1766: }
1.337 martin 1767: error = fd_open(fae->fae_path, fae->fae_oflag,
1768: fae->fae_mode, &newfd);
1.338 martin 1769: if (error)
1770: break;
1.337 martin 1771: if (newfd != fae->fae_fildes) {
1772: error = dodup(l, newfd,
1773: fae->fae_fildes, 0, &retval);
1774: if (fd_getfile(newfd) != NULL)
1775: fd_close(newfd);
1776: }
1777: break;
1778: case FAE_DUP2:
1779: error = dodup(l, fae->fae_fildes,
1780: fae->fae_newfildes, 0, &retval);
1781: break;
1782: case FAE_CLOSE:
1783: if (fd_getfile(fae->fae_fildes) == NULL) {
1784: error = EBADF;
1785: break;
1786: }
1787: error = fd_close(fae->fae_fildes);
1788: break;
1789: }
1790: if (error)
1791: goto report_error;
1792: }
1793: }
1794:
1795: /* handle posix_spawnattr */
1796: if (spawn_data->sed_attrs != NULL) {
1797: struct sigaction sigact;
1798: sigact._sa_u._sa_handler = SIG_DFL;
1799: sigact.sa_flags = 0;
1800:
1801: /*
1802: * set state to SSTOP so that this proc can be found by pid.
1803: * see proc_enterprp, do_sched_setparam below
1804: */
1805: l->l_proc->p_stat = SSTOP;
1806:
1807: /* Set process group */
1808: if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETPGROUP) {
1809: pid_t mypid = l->l_proc->p_pid,
1810: pgrp = spawn_data->sed_attrs->sa_pgroup;
1811:
1812: if (pgrp == 0)
1813: pgrp = mypid;
1814:
1815: error = proc_enterpgrp(spawn_data->sed_parent,
1816: mypid, pgrp, false);
1817: if (error)
1818: goto report_error;
1819: }
1820:
1821: /* Set scheduler policy */
1822: if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSCHEDULER)
1823: error = do_sched_setparam(l->l_proc->p_pid, 0,
1824: spawn_data->sed_attrs->sa_schedpolicy,
1825: &spawn_data->sed_attrs->sa_schedparam);
1826: else if (spawn_data->sed_attrs->sa_flags
1827: & POSIX_SPAWN_SETSCHEDPARAM) {
1828: error = do_sched_setparam(spawn_data->sed_parent->p_pid, 0,
1829: SCHED_NONE, &spawn_data->sed_attrs->sa_schedparam);
1830: }
1831: if (error)
1832: goto report_error;
1833:
1834: /* Reset user ID's */
1835: if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_RESETIDS) {
1836: error = do_setresuid(l, -1,
1837: kauth_cred_getgid(l->l_cred), -1,
1838: ID_E_EQ_R | ID_E_EQ_S);
1839: if (error)
1840: goto report_error;
1841: error = do_setresuid(l, -1,
1842: kauth_cred_getuid(l->l_cred), -1,
1843: ID_E_EQ_R | ID_E_EQ_S);
1844: if (error)
1845: goto report_error;
1846: }
1847:
1848: /* Set signal masks/defaults */
1849: if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGMASK) {
1850: mutex_enter(l->l_proc->p_lock);
1851: error = sigprocmask1(l, SIG_SETMASK,
1852: &spawn_data->sed_attrs->sa_sigmask, NULL);
1853: mutex_exit(l->l_proc->p_lock);
1854: if (error)
1855: goto report_error;
1856: }
1857:
1858: if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGDEF) {
1859: for (i = 1; i <= NSIG; i++) {
1860: if (sigismember(
1861: &spawn_data->sed_attrs->sa_sigdefault, i))
1862: sigaction1(l, i, &sigact, NULL, NULL,
1863: 0);
1864: }
1865: }
1866: }
1867:
1.341 martin 1868: /* stop using kernel vmspace */
1869: KPREEMPT_DISABLE(l);
1870: pmap_deactivate(l);
1871: l->l_proc->p_vmspace = NULL;
1872: KPREEMPT_ENABLE(l);
1873:
1.337 martin 1874:
1875: /* now do the real exec */
1876: rw_enter(&exec_lock, RW_READER);
1877: error = execve_runproc(l, &spawn_data->sed_exec);
1.341 martin 1878: have_reflock = false;
1.337 martin 1879: if (error == EJUSTRETURN)
1880: error = 0;
1881: else if (error)
1882: goto report_error;
1883:
1884: /* done, signal parent */
1885: mutex_enter(&spawn_data->sed_mtx_child);
1886: cv_signal(&spawn_data->sed_cv_child_ready);
1887: mutex_exit(&spawn_data->sed_mtx_child);
1888:
1889: /* and finaly: leave to userland for the first time */
1890: cpu_spawn_return(l);
1891:
1892: /* NOTREACHED */
1893: return;
1894:
1895: report_error:
1.341 martin 1896: if (have_reflock)
1897: rw_exit(&l->l_proc->p_reflock);
1898:
1899: /* stop using kernel vmspace (if we haven't already) */
1900: if (l->l_proc->p_vmspace) {
1901: KPREEMPT_DISABLE(l);
1902: pmap_deactivate(l);
1903: l->l_proc->p_vmspace = NULL;
1904: KPREEMPT_ENABLE(l);
1.337 martin 1905: }
1906:
1907: /*
1908: * Set error value for parent to pick up (and take over ownership
1909: * of spawn_data again), signal parent and exit this process.
1910: */
1911: mutex_enter(&spawn_data->sed_mtx_child);
1912: spawn_data->sed_error = error;
1913: cv_signal(&spawn_data->sed_cv_child_ready);
1914: mutex_exit(&spawn_data->sed_mtx_child);
1915: mutex_enter(l->l_proc->p_lock);
1916: exit1(l, W_EXITCODE(error, SIGABRT));
1917: }
1918:
1.342 christos 1919: static void
1.344 ! christos 1920: posix_spawn_fa_free(struct posix_spawn_file_actions *fa, size_t len)
1.342 christos 1921: {
1922:
1.344 ! christos 1923: for (size_t i = 0; i < len; i++) {
1.342 christos 1924: struct posix_spawn_file_actions_entry *fae = &fa->fae[i];
1925: if (fae->fae_action != FAE_OPEN)
1926: continue;
1927: kmem_free(fae->fae_path, strlen(fae->fae_path) + 1);
1928: }
1.343 christos 1929: if (fa->len)
1930: kmem_free(fa->fae, sizeof(*fa->fae) * fa->len);
1.342 christos 1931: kmem_free(fa, sizeof(*fa));
1932: }
1933:
1934: static int
1935: posix_spawn_fa_alloc(struct posix_spawn_file_actions **fap,
1936: const struct posix_spawn_file_actions *ufa)
1937: {
1938: struct posix_spawn_file_actions *fa;
1939: struct posix_spawn_file_actions_entry *fae;
1940: char *pbuf = NULL;
1941: int error;
1.344 ! christos 1942: size_t i = 0;
1.342 christos 1943:
1944: fa = kmem_alloc(sizeof(*fa), KM_SLEEP);
1945: error = copyin(ufa, fa, sizeof(*fa));
1946: if (error) {
1947: fa->fae = NULL;
1948: fa->len = 0;
1949: goto out;
1950: }
1951:
1952: if (fa->len == 0)
1953: return 0;
1954:
1955: size_t fal = fa->len * sizeof(*fae);
1956: fae = fa->fae;
1957: fa->fae = kmem_alloc(fal, KM_SLEEP);
1958: error = copyin(fae, fa->fae, fal);
1.344 ! christos 1959: if (error)
1.342 christos 1960: goto out;
1961:
1962: pbuf = PNBUF_GET();
1.344 ! christos 1963: for (; i < fa->len; i++) {
1.342 christos 1964: fae = &fa->fae[i];
1965: if (fae->fae_action != FAE_OPEN)
1966: continue;
1967: error = copyinstr(fae->fae_path, pbuf, MAXPATHLEN, &fal);
1.344 ! christos 1968: if (error)
1.342 christos 1969: goto out;
1970: fae->fae_path = kmem_alloc(fal, KM_SLEEP);
1971: memcpy(fae->fae_path, pbuf, fal);
1972: }
1973: PNBUF_PUT(pbuf);
1974: *fap = fa;
1975: return 0;
1976: out:
1977: if (pbuf)
1978: PNBUF_PUT(pbuf);
1.344 ! christos 1979: posix_spawn_fa_free(fa, i);
1.342 christos 1980: return error;
1981: }
1982:
1.337 martin 1983: int
1984: sys_posix_spawn(struct lwp *l1, const struct sys_posix_spawn_args *uap,
1985: register_t *retval)
1986: {
1987: /* {
1988: syscallarg(pid_t *) pid;
1989: syscallarg(const char *) path;
1990: syscallarg(const struct posix_spawn_file_actions *) file_actions;
1991: syscallarg(const struct posix_spawnattr *) attrp;
1992: syscallarg(char *const *) argv;
1993: syscallarg(char *const *) envp;
1994: } */
1995:
1996: struct proc *p1, *p2;
1997: struct plimit *p1_lim;
1998: struct lwp *l2;
1.342 christos 1999: int error = 0, tnprocs, count;
1.337 martin 2000: struct posix_spawn_file_actions *fa = NULL;
2001: struct posix_spawnattr *sa = NULL;
2002: struct spawn_exec_data *spawn_data;
2003: uid_t uid;
2004: vaddr_t uaddr;
2005: pid_t pid;
2006: bool have_exec_lock = false;
2007:
2008: p1 = l1->l_proc;
2009: uid = kauth_cred_getuid(l1->l_cred);
2010: tnprocs = atomic_inc_uint_nv(&nprocs);
2011:
2012: /*
2013: * Although process entries are dynamically created, we still keep
2014: * a global limit on the maximum number we will create.
2015: */
2016: if (__predict_false(tnprocs >= maxproc))
2017: error = -1;
2018: else
2019: error = kauth_authorize_process(l1->l_cred,
2020: KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL);
2021:
2022: if (error) {
2023: atomic_dec_uint(&nprocs);
2024: *retval = EAGAIN;
2025: return 0;
2026: }
2027:
2028: /*
2029: * Enforce limits.
2030: */
2031: count = chgproccnt(uid, 1);
2032: if (kauth_authorize_generic(l1->l_cred, KAUTH_GENERIC_ISSUSER, NULL) !=
2033: 0 && __predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
2034: error = EAGAIN;
2035: goto error_exit;
2036: }
2037:
2038: /* copy in file_actions struct */
2039: if (SCARG(uap, file_actions) != NULL) {
1.342 christos 2040: error = posix_spawn_fa_alloc(&fa, SCARG(uap, file_actions));
2041: if (error)
1.337 martin 2042: goto error_exit;
2043: }
1.342 christos 2044:
1.337 martin 2045: /* copyin posix_spawnattr struct */
2046: if (SCARG(uap, attrp) != NULL) {
1.342 christos 2047: sa = kmem_alloc(sizeof(*sa), KM_SLEEP);
2048: error = copyin(SCARG(uap, attrp), sa, sizeof(*sa));
1.337 martin 2049: if (error)
2050: goto error_exit;
2051: }
2052:
2053: /*
2054: * Do the first part of the exec now, collect state
2055: * in spawn_data.
2056: */
2057: spawn_data = kmem_zalloc(sizeof(*spawn_data), KM_SLEEP);
2058: error = execve_loadvm(l1, SCARG(uap, path), SCARG(uap, argv),
2059: SCARG(uap, envp), execve_fetch_element, &spawn_data->sed_exec);
2060: if (error == EJUSTRETURN)
2061: error = 0;
2062: else if (error)
2063: goto error_exit;
2064:
2065: have_exec_lock = true;
2066:
2067: /*
2068: * Allocate virtual address space for the U-area now, while it
2069: * is still easy to abort the fork operation if we're out of
2070: * kernel virtual address space.
2071: */
2072: uaddr = uvm_uarea_alloc();
2073: if (__predict_false(uaddr == 0)) {
2074: error = ENOMEM;
2075: goto error_exit;
2076: }
2077:
2078: /*
2079: * Allocate new proc. Leave it's p_vmspace NULL for now.
2080: * This is a point of no return, we will have to go through
2081: * the child proc to properly clean it up past this point.
2082: */
2083: p2 = proc_alloc();
2084: pid = p2->p_pid;
2085:
2086: /*
2087: * Make a proc table entry for the new process.
2088: * Start by zeroing the section of proc that is zero-initialized,
2089: * then copy the section that is copied directly from the parent.
2090: */
2091: memset(&p2->p_startzero, 0,
2092: (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero));
2093: memcpy(&p2->p_startcopy, &p1->p_startcopy,
2094: (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy));
2095: p2->p_vmspace = NULL;
2096:
2097: CIRCLEQ_INIT(&p2->p_sigpend.sp_info);
2098:
2099: LIST_INIT(&p2->p_lwps);
2100: LIST_INIT(&p2->p_sigwaiters);
2101:
2102: /*
2103: * Duplicate sub-structures as needed.
2104: * Increase reference counts on shared objects.
2105: * Inherit flags we want to keep. The flags related to SIGCHLD
2106: * handling are important in order to keep a consistent behaviour
2107: * for the child after the fork. If we are a 32-bit process, the
2108: * child will be too.
2109: */
2110: p2->p_flag =
2111: p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32);
2112: p2->p_emul = p1->p_emul;
2113: p2->p_execsw = p1->p_execsw;
2114:
2115: mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
2116: mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
2117: rw_init(&p2->p_reflock);
2118: cv_init(&p2->p_waitcv, "wait");
2119: cv_init(&p2->p_lwpcv, "lwpwait");
2120:
2121: p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
2122:
2123: kauth_proc_fork(p1, p2);
2124:
2125: p2->p_raslist = NULL;
2126: p2->p_fd = fd_copy();
2127:
2128: /* XXX racy */
2129: p2->p_mqueue_cnt = p1->p_mqueue_cnt;
2130:
2131: p2->p_cwdi = cwdinit();
2132:
2133: /*
2134: * Note: p_limit (rlimit stuff) is copy-on-write, so normally
2135: * we just need increase pl_refcnt.
2136: */
2137: p1_lim = p1->p_limit;
2138: if (!p1_lim->pl_writeable) {
2139: lim_addref(p1_lim);
2140: p2->p_limit = p1_lim;
2141: } else {
2142: p2->p_limit = lim_copy(p1->p_limit);
2143: }
2144:
2145: p2->p_lflag = 0;
2146: p2->p_sflag = 0;
2147: p2->p_slflag = 0;
2148: p2->p_pptr = p1;
2149: p2->p_ppid = p1->p_pid;
2150: LIST_INIT(&p2->p_children);
2151:
2152: p2->p_aio = NULL;
2153:
2154: #ifdef KTRACE
2155: /*
2156: * Copy traceflag and tracefile if enabled.
2157: * If not inherited, these were zeroed above.
2158: */
2159: if (p1->p_traceflag & KTRFAC_INHERIT) {
2160: mutex_enter(&ktrace_lock);
2161: p2->p_traceflag = p1->p_traceflag;
2162: if ((p2->p_tracep = p1->p_tracep) != NULL)
2163: ktradref(p2);
2164: mutex_exit(&ktrace_lock);
2165: }
2166: #endif
2167:
2168: /*
2169: * Create signal actions for the child process.
2170: */
2171: p2->p_sigacts = sigactsinit(p1, 0);
2172: mutex_enter(p1->p_lock);
2173: p2->p_sflag |=
2174: (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP));
2175: sched_proc_fork(p1, p2);
2176: mutex_exit(p1->p_lock);
2177:
2178: p2->p_stflag = p1->p_stflag;
2179:
2180: /*
2181: * p_stats.
2182: * Copy parts of p_stats, and zero out the rest.
2183: */
2184: p2->p_stats = pstatscopy(p1->p_stats);
2185:
2186: /* copy over machdep flags to the new proc */
2187: cpu_proc_fork(p1, p2);
2188:
2189: /*
2190: * Prepare remaining parts of spawn data
2191: */
1.342 christos 2192: if (fa && fa->len) {
2193: spawn_data->sed_actions_len = fa->len;
2194: spawn_data->sed_actions = fa->fae;
1.337 martin 2195: }
1.342 christos 2196: if (sa)
1.337 martin 2197: spawn_data->sed_attrs = sa;
2198:
2199: spawn_data->sed_parent = p1;
2200: cv_init(&spawn_data->sed_cv_child_ready, "pspawn");
2201: mutex_init(&spawn_data->sed_mtx_child, MUTEX_DEFAULT, IPL_NONE);
2202: mutex_enter(&spawn_data->sed_mtx_child);
2203:
2204: /* create LWP */
2205: lwp_create(l1, p2, uaddr, 0, NULL, 0, spawn_return, spawn_data,
2206: &l2, l1->l_class);
2207: l2->l_ctxlink = NULL; /* reset ucontext link */
2208:
2209: /*
2210: * Copy the credential so other references don't see our changes.
2211: * Test to see if this is necessary first, since in the common case
2212: * we won't need a private reference.
2213: */
2214: if (kauth_cred_geteuid(l2->l_cred) != kauth_cred_getsvuid(l2->l_cred) ||
2215: kauth_cred_getegid(l2->l_cred) != kauth_cred_getsvgid(l2->l_cred)) {
2216: l2->l_cred = kauth_cred_copy(l2->l_cred);
2217: kauth_cred_setsvuid(l2->l_cred, kauth_cred_geteuid(l2->l_cred));
2218: kauth_cred_setsvgid(l2->l_cred, kauth_cred_getegid(l2->l_cred));
2219: }
2220:
2221: /* Update the master credentials. */
2222: if (l2->l_cred != p2->p_cred) {
2223: kauth_cred_t ocred;
2224:
2225: kauth_cred_hold(l2->l_cred);
2226: mutex_enter(p2->p_lock);
2227: ocred = p2->p_cred;
2228: p2->p_cred = l2->l_cred;
2229: mutex_exit(p2->p_lock);
2230: kauth_cred_free(ocred);
2231: }
2232:
2233: /*
2234: * It's now safe for the scheduler and other processes to see the
2235: * child process.
2236: */
2237: mutex_enter(proc_lock);
2238:
2239: if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT)
2240: p2->p_lflag |= PL_CONTROLT;
2241:
2242: LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
2243: p2->p_exitsig = SIGCHLD; /* signal for parent on exit */
2244:
2245: LIST_INSERT_AFTER(p1, p2, p_pglist);
2246: LIST_INSERT_HEAD(&allproc, p2, p_list);
2247:
2248: p2->p_trace_enabled = trace_is_enabled(p2);
2249: #ifdef __HAVE_SYSCALL_INTERN
2250: (*p2->p_emul->e_syscall_intern)(p2);
2251: #endif
2252:
2253: /*
2254: * Make child runnable, set start time, and add to run queue except
2255: * if the parent requested the child to start in SSTOP state.
2256: */
2257: mutex_enter(p2->p_lock);
2258:
2259: getmicrotime(&p2->p_stats->p_start);
2260:
2261: lwp_lock(l2);
2262: KASSERT(p2->p_nrlwps == 1);
2263: p2->p_nrlwps = 1;
2264: p2->p_stat = SACTIVE;
2265: l2->l_stat = LSRUN;
2266: sched_enqueue(l2, false);
2267: lwp_unlock(l2);
2268:
2269: mutex_exit(p2->p_lock);
2270: mutex_exit(proc_lock);
2271:
2272: cv_wait(&spawn_data->sed_cv_child_ready, &spawn_data->sed_mtx_child);
2273: mutex_exit(&spawn_data->sed_mtx_child);
2274: error = spawn_data->sed_error;
2275:
1.341 martin 2276: rw_exit(&p1->p_reflock);
1.337 martin 2277: rw_exit(&exec_lock);
2278: have_exec_lock = false;
2279:
1.342 christos 2280: if (fa)
1.344 ! christos 2281: posix_spawn_fa_free(fa, fa->len);
1.337 martin 2282:
1.342 christos 2283: if (sa)
2284: kmem_free(sa, sizeof(*sa));
1.337 martin 2285:
2286: cv_destroy(&spawn_data->sed_cv_child_ready);
2287: mutex_destroy(&spawn_data->sed_mtx_child);
2288:
2289: kmem_free(spawn_data, sizeof(*spawn_data));
2290:
2291: if (error == 0 && SCARG(uap, pid) != NULL)
2292: error = copyout(&pid, SCARG(uap, pid), sizeof(pid));
2293:
2294: *retval = error;
2295: return 0;
2296:
2297: error_exit:
2298: if (have_exec_lock)
2299: rw_exit(&exec_lock);
2300:
1.342 christos 2301: if (fa)
1.344 ! christos 2302: posix_spawn_fa_free(fa, fa->len);
1.337 martin 2303:
1.342 christos 2304: if (sa)
1.337 martin 2305: kmem_free(sa, sizeof(*sa));
2306:
2307: (void)chgproccnt(uid, -1);
2308: atomic_dec_uint(&nprocs);
2309:
2310: *retval = error;
2311: return 0;
2312: }
2313:
1.336 matt 2314: void
2315: exec_free_emul_arg(struct exec_package *epp)
2316: {
2317: if (epp->ep_emul_arg_free != NULL) {
2318: KASSERT(epp->ep_emul_arg != NULL);
2319: (*epp->ep_emul_arg_free)(epp->ep_emul_arg);
2320: epp->ep_emul_arg_free = NULL;
2321: epp->ep_emul_arg = NULL;
2322: } else {
2323: KASSERT(epp->ep_emul_arg == NULL);
2324: }
2325: }
CVSweb <webmaster@jp.NetBSD.org>