Annotation of src/sys/kern/kern_exec.c, Revision 1.420
1.420 ! pgoyette 1: /* $NetBSD: kern_exec.c,v 1.419 2015/10/13 00:24:35 pgoyette Exp $ */
1.277 ad 2:
3: /*-
4: * Copyright (c) 2008 The NetBSD Foundation, Inc.
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26: * POSSIBILITY OF SUCH DAMAGE.
27: */
1.55 cgd 28:
29: /*-
1.77 cgd 30: * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
1.55 cgd 31: * Copyright (C) 1992 Wolfgang Solfrank.
32: * Copyright (C) 1992 TooLs GmbH.
33: * All rights reserved.
34: *
35: * Redistribution and use in source and binary forms, with or without
36: * modification, are permitted provided that the following conditions
37: * are met:
38: * 1. Redistributions of source code must retain the above copyright
39: * notice, this list of conditions and the following disclaimer.
40: * 2. Redistributions in binary form must reproduce the above copyright
41: * notice, this list of conditions and the following disclaimer in the
42: * documentation and/or other materials provided with the distribution.
43: * 3. All advertising materials mentioning features or use of this software
44: * must display the following acknowledgement:
45: * This product includes software developed by TooLs GmbH.
46: * 4. The name of TooLs GmbH may not be used to endorse or promote products
47: * derived from this software without specific prior written permission.
48: *
49: * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
50: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
51: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
52: * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
53: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
54: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
55: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
56: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
57: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
58: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59: */
1.146 lukem 60:
61: #include <sys/cdefs.h>
1.420 ! pgoyette 62: __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.419 2015/10/13 00:24:35 pgoyette Exp $");
1.89 mrg 63:
1.325 jmcneill 64: #include "opt_exec.h"
1.360 christos 65: #include "opt_execfmt.h"
1.92 thorpej 66: #include "opt_ktrace.h"
1.285 apb 67: #include "opt_modular.h"
1.124 jdolecek 68: #include "opt_syscall_debug.h"
1.226 dogcow 69: #include "veriexec.h"
1.232 elad 70: #include "opt_pax.h"
1.55 cgd 71:
72: #include <sys/param.h>
73: #include <sys/systm.h>
74: #include <sys/filedesc.h>
75: #include <sys/kernel.h>
76: #include <sys/proc.h>
77: #include <sys/mount.h>
1.265 yamt 78: #include <sys/kmem.h>
1.55 cgd 79: #include <sys/namei.h>
80: #include <sys/vnode.h>
81: #include <sys/file.h>
1.414 christos 82: #include <sys/filedesc.h>
1.55 cgd 83: #include <sys/acct.h>
1.337 martin 84: #include <sys/atomic.h>
1.55 cgd 85: #include <sys/exec.h>
86: #include <sys/ktrace.h>
1.278 pooka 87: #include <sys/uidinfo.h>
1.55 cgd 88: #include <sys/wait.h>
89: #include <sys/mman.h>
1.155 gmcgarry 90: #include <sys/ras.h>
1.55 cgd 91: #include <sys/signalvar.h>
92: #include <sys/stat.h>
1.124 jdolecek 93: #include <sys/syscall.h>
1.218 elad 94: #include <sys/kauth.h>
1.253 ad 95: #include <sys/lwpctl.h>
1.260 christos 96: #include <sys/pax.h>
1.263 ad 97: #include <sys/cpu.h>
1.282 ad 98: #include <sys/module.h>
1.289 pooka 99: #include <sys/syscallvar.h>
1.56 cgd 100: #include <sys/syscallargs.h>
1.222 elad 101: #if NVERIEXEC > 0
1.197 blymn 102: #include <sys/verified_exec.h>
1.222 elad 103: #endif /* NVERIEXEC > 0 */
1.294 darran 104: #include <sys/sdt.h>
1.337 martin 105: #include <sys/spawn.h>
106: #include <sys/prot.h>
1.330 tls 107: #include <sys/cprng.h>
1.55 cgd 108:
1.88 mrg 109: #include <uvm/uvm_extern.h>
110:
1.55 cgd 111: #include <machine/reg.h>
112:
1.244 dsl 113: #include <compat/common/compat_util.h>
114:
1.364 martin 115: #ifndef MD_TOPDOWN_INIT
1.370 christos 116: #ifdef __USE_TOPDOWN_VM
1.364 martin 117: #define MD_TOPDOWN_INIT(epp) (epp)->ep_flags |= EXEC_TOPDOWN_VM
118: #else
119: #define MD_TOPDOWN_INIT(epp)
120: #endif
121: #endif
122:
1.391 uebayasi 123: struct execve_data;
124:
1.396 uebayasi 125: static size_t calcargs(struct execve_data * restrict, const size_t);
126: static size_t calcstack(struct execve_data * restrict, const size_t);
1.399 uebayasi 127: static int copyoutargs(struct execve_data * restrict, struct lwp *,
128: char * const);
1.398 uebayasi 129: static int copyoutpsstrs(struct execve_data * restrict, struct proc *);
1.391 uebayasi 130: static int copyinargs(struct execve_data * restrict, char * const *,
131: char * const *, execve_fetch_element_t, char **);
1.392 uebayasi 132: static int copyinargstrs(struct execve_data * restrict, char * const *,
133: execve_fetch_element_t, char **, size_t *, void (*)(const void *, size_t));
1.171 chs 134: static int exec_sigcode_map(struct proc *, const struct emul *);
135:
1.143 christos 136: #ifdef DEBUG_EXEC
1.305 matt 137: #define DPRINTF(a) printf a
1.312 christos 138: #define COPYPRINTF(s, a, b) printf("%s, %d: copyout%s @%p %zu\n", __func__, \
139: __LINE__, (s), (a), (b))
1.388 uebayasi 140: static void dump_vmcmds(const struct exec_package * const, size_t, int);
141: #define DUMPVMCMDS(p, x, e) do { dump_vmcmds((p), (x), (e)); } while (0)
1.143 christos 142: #else
143: #define DPRINTF(a)
1.312 christos 144: #define COPYPRINTF(s, a, b)
1.388 uebayasi 145: #define DUMPVMCMDS(p, x, e) do {} while (0)
1.143 christos 146: #endif /* DEBUG_EXEC */
1.165 thorpej 147:
1.130 jdolecek 148: /*
1.294 darran 149: * DTrace SDT provider definitions
150: */
1.418 christos 151: SDT_PROVIDER_DECLARE(proc);
152: SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *");
153: SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *");
154: SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int");
1.294 darran 155:
156: /*
1.130 jdolecek 157: * Exec function switch:
158: *
159: * Note that each makecmds function is responsible for loading the
160: * exec package with the necessary functions for any exec-type-specific
161: * handling.
162: *
163: * Functions for specific exec types should be defined in their own
164: * header file.
165: */
1.138 lukem 166: static const struct execsw **execsw = NULL;
167: static int nexecs;
168:
1.282 ad 169: u_int exec_maxhdrsz; /* must not be static - used by netbsd32 */
1.130 jdolecek 170:
171: /* list of dynamically loaded execsw entries */
1.282 ad 172: static LIST_HEAD(execlist_head, exec_entry) ex_head =
173: LIST_HEAD_INITIALIZER(ex_head);
1.130 jdolecek 174: struct exec_entry {
1.138 lukem 175: LIST_ENTRY(exec_entry) ex_list;
1.282 ad 176: SLIST_ENTRY(exec_entry) ex_slist;
177: const struct execsw *ex_sw;
1.130 jdolecek 178: };
179:
1.203 christos 180: #ifndef __HAVE_SYSCALL_INTERN
181: void syscall(void);
182: #endif
183:
1.173 christos 184: /* NetBSD emul struct */
1.282 ad 185: struct emul emul_netbsd = {
1.291 rmind 186: .e_name = "netbsd",
1.371 manu 187: #ifdef EMUL_NATIVEROOT
188: .e_path = EMUL_NATIVEROOT,
189: #else
190: .e_path = NULL,
191: #endif
1.133 mycroft 192: #ifndef __HAVE_MINIMAL_EMUL
1.291 rmind 193: .e_flags = EMUL_HAS_SYS___syscall,
194: .e_errno = NULL,
195: .e_nosys = SYS_syscall,
196: .e_nsysent = SYS_NSYSENT,
1.133 mycroft 197: #endif
1.291 rmind 198: .e_sysent = sysent,
1.124 jdolecek 199: #ifdef SYSCALL_DEBUG
1.291 rmind 200: .e_syscallnames = syscallnames,
1.124 jdolecek 201: #else
1.291 rmind 202: .e_syscallnames = NULL,
1.124 jdolecek 203: #endif
1.291 rmind 204: .e_sendsig = sendsig,
205: .e_trapsignal = trapsignal,
206: .e_tracesig = NULL,
207: .e_sigcode = NULL,
208: .e_esigcode = NULL,
209: .e_sigobject = NULL,
210: .e_setregs = setregs,
211: .e_proc_exec = NULL,
212: .e_proc_fork = NULL,
213: .e_proc_exit = NULL,
214: .e_lwp_fork = NULL,
215: .e_lwp_exit = NULL,
1.133 mycroft 216: #ifdef __HAVE_SYSCALL_INTERN
1.291 rmind 217: .e_syscall_intern = syscall_intern,
1.133 mycroft 218: #else
1.291 rmind 219: .e_syscall = syscall,
1.133 mycroft 220: #endif
1.291 rmind 221: .e_sysctlovly = NULL,
222: .e_fault = NULL,
223: .e_vm_default_addr = uvm_default_mapaddr,
224: .e_usertrap = NULL,
225: .e_ucsize = sizeof(ucontext_t),
226: .e_startlwp = startlwp
1.124 jdolecek 227: };
228:
1.55 cgd 229: /*
1.130 jdolecek 230: * Exec lock. Used to control access to execsw[] structures.
231: * This must not be static so that netbsd32 can access it, too.
232: */
1.352 rmind 233: krwlock_t exec_lock;
234:
235: static kmutex_t sigobject_lock;
1.259 ad 236:
1.337 martin 237: /*
238: * Data used between a loadvm and execve part of an "exec" operation
239: */
240: struct execve_data {
241: struct exec_package ed_pack;
242: struct pathbuf *ed_pathbuf;
243: struct vattr ed_attr;
244: struct ps_strings ed_arginfo;
245: char *ed_argp;
246: const char *ed_pathstring;
247: char *ed_resolvedpathbuf;
248: size_t ed_ps_strings_sz;
249: int ed_szsigcode;
1.396 uebayasi 250: size_t ed_argslen;
1.337 martin 251: long ed_argc;
252: long ed_envc;
253: };
254:
255: /*
256: * data passed from parent lwp to child during a posix_spawn()
257: */
258: struct spawn_exec_data {
259: struct execve_data sed_exec;
1.348 martin 260: struct posix_spawn_file_actions
1.337 martin 261: *sed_actions;
262: struct posix_spawnattr *sed_attrs;
263: struct proc *sed_parent;
264: kcondvar_t sed_cv_child_ready;
265: kmutex_t sed_mtx_child;
266: int sed_error;
1.348 martin 267: volatile uint32_t sed_refcnt;
1.337 martin 268: };
269:
1.277 ad 270: static void *
271: exec_pool_alloc(struct pool *pp, int flags)
272: {
273:
274: return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
275: UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
276: }
277:
278: static void
279: exec_pool_free(struct pool *pp, void *addr)
280: {
281:
282: uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
283: }
284:
285: static struct pool exec_pool;
286:
287: static struct pool_allocator exec_palloc = {
288: .pa_alloc = exec_pool_alloc,
289: .pa_free = exec_pool_free,
290: .pa_pagesz = NCARGS
291: };
292:
1.130 jdolecek 293: /*
1.55 cgd 294: * check exec:
295: * given an "executable" described in the exec package's namei info,
296: * see what we can do with it.
297: *
298: * ON ENTRY:
299: * exec package with appropriate namei info
1.212 christos 300: * lwp pointer of exec'ing lwp
1.55 cgd 301: * NO SELF-LOCKED VNODES
302: *
303: * ON EXIT:
304: * error: nothing held, etc. exec header still allocated.
1.77 cgd 305: * ok: filled exec package, executable's vnode (unlocked).
1.55 cgd 306: *
307: * EXEC SWITCH ENTRY:
308: * Locked vnode to check, exec package, proc.
309: *
310: * EXEC SWITCH EXIT:
1.77 cgd 311: * ok: return 0, filled exec package, executable's vnode (unlocked).
1.55 cgd 312: * error: destructive:
313: * everything deallocated execept exec header.
1.76 cgd 314: * non-destructive:
1.77 cgd 315: * error code, executable's vnode (unlocked),
1.76 cgd 316: * exec header unmodified.
1.55 cgd 317: */
318: int
1.352 rmind 319: /*ARGSUSED*/
1.301 dholland 320: check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb)
1.55 cgd 321: {
1.138 lukem 322: int error, i;
323: struct vnode *vp;
1.295 dholland 324: struct nameidata nd;
1.138 lukem 325: size_t resid;
1.55 cgd 326:
1.303 dholland 327: NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1.295 dholland 328:
1.55 cgd 329: /* first get the vnode */
1.295 dholland 330: if ((error = namei(&nd)) != 0)
1.55 cgd 331: return error;
1.295 dholland 332: epp->ep_vp = vp = nd.ni_vp;
1.368 christos 333: /* normally this can't fail */
1.408 maxv 334: error = copystr(nd.ni_pnbuf, epp->ep_resolvedname, PATH_MAX, NULL);
335: KASSERT(error == 0);
1.295 dholland 336:
1.296 dholland 337: #ifdef DIAGNOSTIC
338: /* paranoia (take this out once namei stuff stabilizes) */
1.302 dholland 339: memset(nd.ni_pnbuf, '~', PATH_MAX);
1.295 dholland 340: #endif
1.55 cgd 341:
1.84 mycroft 342: /* check access and type */
1.55 cgd 343: if (vp->v_type != VREG) {
1.81 kleink 344: error = EACCES;
1.55 cgd 345: goto bad1;
346: }
1.254 pooka 347: if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
1.84 mycroft 348: goto bad1;
1.55 cgd 349:
350: /* get attributes */
1.254 pooka 351: if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
1.55 cgd 352: goto bad1;
353:
354: /* Check mount point */
355: if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
356: error = EACCES;
357: goto bad1;
358: }
1.141 thorpej 359: if (vp->v_mount->mnt_flag & MNT_NOSUID)
1.83 mycroft 360: epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
1.55 cgd 361:
362: /* try to open it */
1.254 pooka 363: if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
1.55 cgd 364: goto bad1;
365:
1.99 wrstuden 366: /* unlock vp, since we need it unlocked from here on out. */
1.298 hannken 367: VOP_UNLOCK(vp);
1.77 cgd 368:
1.222 elad 369: #if NVERIEXEC > 0
1.295 dholland 370: error = veriexec_verify(l, vp, epp->ep_resolvedname,
1.233 elad 371: epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
1.236 elad 372: NULL);
373: if (error)
1.234 elad 374: goto bad2;
1.222 elad 375: #endif /* NVERIEXEC > 0 */
1.160 blymn 376:
1.232 elad 377: #ifdef PAX_SEGVGUARD
1.295 dholland 378: error = pax_segvguard(l, vp, epp->ep_resolvedname, false);
1.234 elad 379: if (error)
380: goto bad2;
1.232 elad 381: #endif /* PAX_SEGVGUARD */
382:
1.55 cgd 383: /* now we have the file, get the exec header */
1.74 christos 384: error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
1.223 ad 385: UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
1.74 christos 386: if (error)
1.55 cgd 387: goto bad2;
388: epp->ep_hdrvalid = epp->ep_hdrlen - resid;
389:
390: /*
1.136 eeh 391: * Set up default address space limits. Can be overridden
392: * by individual exec packages.
1.183 junyoung 393: *
1.235 rillig 394: * XXX probably should be all done in the exec packages.
1.136 eeh 395: */
396: epp->ep_vm_minaddr = VM_MIN_ADDRESS;
397: epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
398: /*
1.55 cgd 399: * set up the vmcmds for creation of the process
400: * address space
401: */
402: error = ENOEXEC;
1.244 dsl 403: for (i = 0; i < nexecs; i++) {
1.68 cgd 404: int newerror;
405:
1.130 jdolecek 406: epp->ep_esch = execsw[i];
1.212 christos 407: newerror = (*execsw[i]->es_makecmds)(l, epp);
1.244 dsl 408:
409: if (!newerror) {
1.318 reinoud 410: /* Seems ok: check that entry point is not too high */
1.323 reinoud 411: if (epp->ep_entry > epp->ep_vm_maxaddr) {
1.322 reinoud 412: #ifdef DIAGNOSTIC
1.329 reinoud 413: printf("%s: rejecting %p due to "
1.331 christos 414: "too high entry address (> %p)\n",
415: __func__, (void *)epp->ep_entry,
416: (void *)epp->ep_vm_maxaddr);
1.322 reinoud 417: #endif
1.318 reinoud 418: error = ENOEXEC;
419: break;
420: }
421: /* Seems ok: check that entry point is not too low */
1.323 reinoud 422: if (epp->ep_entry < epp->ep_vm_minaddr) {
1.322 reinoud 423: #ifdef DIAGNOSTIC
1.329 reinoud 424: printf("%s: rejecting %p due to "
1.331 christos 425: "too low entry address (< %p)\n",
426: __func__, (void *)epp->ep_entry,
427: (void *)epp->ep_vm_minaddr);
1.322 reinoud 428: #endif
1.244 dsl 429: error = ENOEXEC;
430: break;
431: }
432:
433: /* check limits */
434: if ((epp->ep_tsize > MAXTSIZ) ||
435: (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
436: [RLIMIT_DATA].rlim_cur)) {
1.322 reinoud 437: #ifdef DIAGNOSTIC
1.323 reinoud 438: printf("%s: rejecting due to "
1.331 christos 439: "limits (t=%llu > %llu || d=%llu > %llu)\n",
440: __func__,
441: (unsigned long long)epp->ep_tsize,
442: (unsigned long long)MAXTSIZ,
443: (unsigned long long)epp->ep_dsize,
1.332 christos 444: (unsigned long long)
445: l->l_proc->p_rlimit[RLIMIT_DATA].rlim_cur);
1.322 reinoud 446: #endif
1.244 dsl 447: error = ENOMEM;
448: break;
449: }
450: return 0;
451: }
452:
453: if (epp->ep_emul_root != NULL) {
454: vrele(epp->ep_emul_root);
455: epp->ep_emul_root = NULL;
456: }
457: if (epp->ep_interp != NULL) {
458: vrele(epp->ep_interp);
459: epp->ep_interp = NULL;
460: }
461:
1.68 cgd 462: /* make sure the first "interesting" error code is saved. */
1.244 dsl 463: if (error == ENOEXEC)
1.68 cgd 464: error = newerror;
1.124 jdolecek 465:
1.244 dsl 466: if (epp->ep_flags & EXEC_DESTR)
467: /* Error from "#!" code, tidied up by recursive call */
1.55 cgd 468: return error;
469: }
470:
1.249 pooka 471: /* not found, error */
472:
1.55 cgd 473: /*
474: * free any vmspace-creation commands,
475: * and release their references
476: */
477: kill_vmcmds(&epp->ep_vmcmds);
478:
479: bad2:
480: /*
1.99 wrstuden 481: * close and release the vnode, restore the old one, free the
1.55 cgd 482: * pathname buf, and punt.
483: */
1.99 wrstuden 484: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.254 pooka 485: VOP_CLOSE(vp, FREAD, l->l_cred);
1.99 wrstuden 486: vput(vp);
1.55 cgd 487: return error;
488:
489: bad1:
490: /*
491: * free the namei pathname buffer, and put the vnode
492: * (which we don't yet have open).
493: */
1.77 cgd 494: vput(vp); /* was still locked */
1.55 cgd 495: return error;
496: }
497:
1.188 chs 498: #ifdef __MACHINE_STACK_GROWS_UP
499: #define STACK_PTHREADSPACE NBPG
500: #else
501: #define STACK_PTHREADSPACE 0
502: #endif
503:
1.204 cube 504: static int
505: execve_fetch_element(char * const *array, size_t index, char **value)
506: {
507: return copyin(array + index, value, sizeof(*value));
508: }
509:
1.55 cgd 510: /*
511: * exec system call
512: */
1.75 christos 513: int
1.258 dsl 514: sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
1.71 thorpej 515: {
1.258 dsl 516: /* {
1.138 lukem 517: syscallarg(const char *) path;
518: syscallarg(char * const *) argp;
519: syscallarg(char * const *) envp;
1.258 dsl 520: } */
1.204 cube 521:
522: return execve1(l, SCARG(uap, path), SCARG(uap, argp),
523: SCARG(uap, envp), execve_fetch_element);
524: }
525:
1.376 maxv 526: int
1.317 manu 527: sys_fexecve(struct lwp *l, const struct sys_fexecve_args *uap,
528: register_t *retval)
529: {
530: /* {
531: syscallarg(int) fd;
532: syscallarg(char * const *) argp;
533: syscallarg(char * const *) envp;
534: } */
535:
536: return ENOSYS;
537: }
538:
1.282 ad 539: /*
540: * Load modules to try and execute an image that we do not understand.
541: * If no execsw entries are present, we load those likely to be needed
542: * in order to run native images only. Otherwise, we autoload all
543: * possible modules that could let us run the binary. XXX lame
544: */
545: static void
546: exec_autoload(void)
547: {
548: #ifdef MODULAR
549: static const char * const native[] = {
550: "exec_elf32",
551: "exec_elf64",
552: "exec_script",
553: NULL
554: };
555: static const char * const compat[] = {
556: "exec_elf32",
557: "exec_elf64",
558: "exec_script",
559: "exec_aout",
560: "exec_coff",
561: "exec_ecoff",
562: "compat_aoutm68k",
563: "compat_freebsd",
564: "compat_ibcs2",
565: "compat_linux",
566: "compat_linux32",
567: "compat_netbsd32",
568: "compat_sunos",
569: "compat_sunos32",
570: "compat_svr4",
571: "compat_svr4_32",
572: "compat_ultrix",
573: NULL
574: };
575: char const * const *list;
576: int i;
577:
578: list = (nexecs == 0 ? native : compat);
579: for (i = 0; list[i] != NULL; i++) {
1.363 christos 580: if (module_autoload(list[i], MODULE_CLASS_EXEC) != 0) {
1.376 maxv 581: continue;
1.282 ad 582: }
1.376 maxv 583: yield();
1.282 ad 584: }
585: #endif
586: }
587:
1.415 christos 588: static int
589: makepathbuf(struct lwp *l, const char *upath, struct pathbuf **pbp,
590: size_t *offs)
1.414 christos 591: {
592: char *path, *bp;
1.415 christos 593: size_t len, tlen;
1.414 christos 594: int error;
595: struct cwdinfo *cwdi;
596:
597: path = PNBUF_GET();
598: error = copyinstr(upath, path, MAXPATHLEN, &len);
599: if (error) {
600: PNBUF_PUT(path);
601: DPRINTF(("%s: copyin path @%p %d\n", __func__, upath, error));
1.415 christos 602: return error;
1.414 christos 603: }
604:
1.415 christos 605: if (path[0] == '/') {
606: *offs = 0;
1.414 christos 607: goto out;
1.415 christos 608: }
1.414 christos 609:
610: len++;
611: if (len + 1 >= MAXPATHLEN)
612: goto out;
613: bp = path + MAXPATHLEN - len;
614: memmove(bp, path, len);
615: *(--bp) = '/';
616:
617: cwdi = l->l_proc->p_cwdi;
618: rw_enter(&cwdi->cwdi_lock, RW_READER);
619: error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, MAXPATHLEN / 2,
620: GETCWD_CHECK_ACCESS, l);
621: rw_exit(&cwdi->cwdi_lock);
622:
623: if (error) {
624: DPRINTF(("%s: getcwd_common path %s %d\n", __func__, path,
625: error));
626: goto out;
627: }
1.415 christos 628: tlen = path + MAXPATHLEN - bp;
1.414 christos 629:
1.415 christos 630: memmove(path, bp, tlen);
631: path[tlen] = '\0';
632: *offs = tlen - len;
1.414 christos 633: out:
1.415 christos 634: *pbp = pathbuf_assimilate(path);
635: return 0;
1.414 christos 636: }
637:
1.337 martin 638: static int
639: execve_loadvm(struct lwp *l, const char *path, char * const *args,
640: char * const *envs, execve_fetch_element_t fetch_element,
641: struct execve_data * restrict data)
1.204 cube 642: {
1.378 uebayasi 643: struct exec_package * const epp = &data->ed_pack;
1.153 thorpej 644: int error;
1.164 thorpej 645: struct proc *p;
1.391 uebayasi 646: char *dp;
1.282 ad 647: u_int modgen;
1.416 christos 648: size_t offs = 0; // XXX: GCC
1.337 martin 649:
650: KASSERT(data != NULL);
1.55 cgd 651:
1.237 ad 652: p = l->l_proc;
1.376 maxv 653: modgen = 0;
1.164 thorpej 654:
1.418 christos 655: SDT_PROBE(proc, kernel, , exec, path, 0, 0, 0, 0);
1.294 darran 656:
1.149 christos 657: /*
1.269 christos 658: * Check if we have exceeded our number of processes limit.
659: * This is so that we handle the case where a root daemon
660: * forked, ran setuid to become the desired user and is trying
661: * to exec. The obvious place to do the reference counting check
662: * is setuid(), but we don't do the reference counting check there
663: * like other OS's do because then all the programs that use setuid()
664: * must be modified to check the return code of setuid() and exit().
665: * It is dangerous to make setuid() fail, because it fails open and
666: * the program will continue to run as root. If we make it succeed
667: * and return an error code, again we are not enforcing the limit.
668: * The best place to enforce the limit is here, when the process tries
669: * to execute a new image, because eventually the process will need
670: * to call exec in order to do something useful.
671: */
1.282 ad 672: retry:
1.347 elad 673: if (p->p_flag & PK_SUGID) {
674: if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
675: p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
676: &p->p_rlimit[RLIMIT_NPROC],
677: KAUTH_ARG(RLIMIT_NPROC)) != 0 &&
678: chgproccnt(kauth_cred_getuid(l->l_cred), 0) >
679: p->p_rlimit[RLIMIT_NPROC].rlim_cur)
1.269 christos 680: return EAGAIN;
1.347 elad 681: }
1.269 christos 682:
683: /*
1.352 rmind 684: * Drain existing references and forbid new ones. The process
685: * should be left alone until we're done here. This is necessary
686: * to avoid race conditions - e.g. in ptrace() - that might allow
687: * a local user to illicitly obtain elevated privileges.
688: */
689: rw_enter(&p->p_reflock, RW_WRITER);
690:
691: /*
1.129 jdolecek 692: * Init the namei data to point the file user's program name.
693: * This is done here rather than in check_exec(), so that it's
694: * possible to override this settings if any of makecmd/probe
695: * functions call check_exec() recursively - for example,
696: * see exec_script_makecmds().
697: */
1.415 christos 698: if ((error = makepathbuf(l, path, &data->ed_pathbuf, &offs)) != 0)
1.352 rmind 699: goto clrflg;
1.337 martin 700: data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf);
701: data->ed_resolvedpathbuf = PNBUF_GET();
1.55 cgd 702:
703: /*
704: * initialize the fields of the exec package.
705: */
1.415 christos 706: epp->ep_kname = data->ed_pathstring + offs;
1.378 uebayasi 707: epp->ep_resolvedname = data->ed_resolvedpathbuf;
708: epp->ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
709: epp->ep_hdrlen = exec_maxhdrsz;
710: epp->ep_hdrvalid = 0;
711: epp->ep_emul_arg = NULL;
712: epp->ep_emul_arg_free = NULL;
713: memset(&epp->ep_vmcmds, 0, sizeof(epp->ep_vmcmds));
714: epp->ep_vap = &data->ed_attr;
1.411 christos 715: epp->ep_flags = (p->p_flag & PK_32) ? EXEC_FROM32 : 0;
1.378 uebayasi 716: MD_TOPDOWN_INIT(epp);
717: epp->ep_emul_root = NULL;
718: epp->ep_interp = NULL;
719: epp->ep_esch = NULL;
720: epp->ep_pax_flags = 0;
721: memset(epp->ep_machine_arch, 0, sizeof(epp->ep_machine_arch));
1.55 cgd 722:
1.237 ad 723: rw_enter(&exec_lock, RW_READER);
1.130 jdolecek 724:
1.55 cgd 725: /* see if we can run it. */
1.378 uebayasi 726: if ((error = check_exec(l, epp, data->ed_pathbuf)) != 0) {
1.261 xtraeme 727: if (error != ENOENT) {
1.312 christos 728: DPRINTF(("%s: check exec failed %d\n",
729: __func__, error));
1.261 xtraeme 730: }
1.352 rmind 731: goto freehdr;
1.248 christos 732: }
1.55 cgd 733:
734: /* allocate an argument buffer */
1.337 martin 735: data->ed_argp = pool_get(&exec_pool, PR_WAITOK);
736: KASSERT(data->ed_argp != NULL);
737: dp = data->ed_argp;
1.55 cgd 738:
1.391 uebayasi 739: if ((error = copyinargs(data, args, envs, fetch_element, &dp)) != 0) {
1.55 cgd 740: goto bad;
741: }
1.61 mycroft 742:
1.379 uebayasi 743: /*
744: * Calculate the new stack size.
745: */
746:
747: #ifdef PAX_ASLR
1.417 maxv 748: #define ASLR_GAP(epp) (pax_aslr_epp_active(epp) ? (cprng_fast32() % PAGE_SIZE) : 0)
1.379 uebayasi 749: #else
1.417 maxv 750: #define ASLR_GAP(epp) 0
1.379 uebayasi 751: #endif
752:
1.267 dsl 753: #ifdef __MACHINE_STACK_GROWS_UP
1.386 uebayasi 754: /*
755: * copyargs() fills argc/argv/envp from the lower address even on
756: * __MACHINE_STACK_GROWS_UP machines. Reserve a few words just below the SP
757: * so that _rtld() use it.
758: */
1.267 dsl 759: #define RTLD_GAP 32
760: #else
761: #define RTLD_GAP 0
762: #endif
763:
1.396 uebayasi 764: const size_t argenvstrlen = (char *)ALIGN(dp) - data->ed_argp;
1.386 uebayasi 765:
1.396 uebayasi 766: data->ed_argslen = calcargs(data, argenvstrlen);
1.386 uebayasi 767:
1.417 maxv 768: const size_t len = calcstack(data, ASLR_GAP(epp) + RTLD_GAP);
1.55 cgd 769:
1.396 uebayasi 770: if (len > epp->ep_ssize) {
1.337 martin 771: /* in effect, compare to initial limit */
1.396 uebayasi 772: DPRINTF(("%s: stack limit exceeded %zu\n", __func__, len));
1.403 maxv 773: error = ENOMEM;
1.55 cgd 774: goto bad;
775: }
1.337 martin 776: /* adjust "active stack depth" for process VSZ */
1.396 uebayasi 777: epp->ep_ssize = len;
1.337 martin 778:
779: return 0;
780:
1.352 rmind 781: bad:
782: /* free the vmspace-creation commands, and release their references */
1.378 uebayasi 783: kill_vmcmds(&epp->ep_vmcmds);
1.352 rmind 784: /* kill any opened file descriptor, if necessary */
1.378 uebayasi 785: if (epp->ep_flags & EXEC_HASFD) {
786: epp->ep_flags &= ~EXEC_HASFD;
787: fd_close(epp->ep_fd);
1.352 rmind 788: }
789: /* close and put the exec'd file */
1.378 uebayasi 790: vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
791: VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
792: vput(epp->ep_vp);
1.352 rmind 793: pool_put(&exec_pool, data->ed_argp);
794:
795: freehdr:
1.378 uebayasi 796: kmem_free(epp->ep_hdr, epp->ep_hdrlen);
797: if (epp->ep_emul_root != NULL)
798: vrele(epp->ep_emul_root);
799: if (epp->ep_interp != NULL)
800: vrele(epp->ep_interp);
1.352 rmind 801:
1.337 martin 802: rw_exit(&exec_lock);
1.352 rmind 803:
804: pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
805: pathbuf_destroy(data->ed_pathbuf);
806: PNBUF_PUT(data->ed_resolvedpathbuf);
807:
808: clrflg:
1.351 rmind 809: rw_exit(&p->p_reflock);
1.337 martin 810:
811: if (modgen != module_gen && error == ENOEXEC) {
812: modgen = module_gen;
813: exec_autoload();
814: goto retry;
815: }
816:
1.418 christos 817: SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
1.337 martin 818: return error;
819: }
820:
1.401 uebayasi 821: static int
822: execve_dovmcmds(struct lwp *l, struct execve_data * restrict data)
823: {
824: struct exec_package * const epp = &data->ed_pack;
825: struct proc *p = l->l_proc;
826: struct exec_vmcmd *base_vcp;
827: int error = 0;
1.407 riastrad 828: size_t i;
1.401 uebayasi 829:
830: /* record proc's vnode, for use by procfs and others */
831: if (p->p_textvp)
832: vrele(p->p_textvp);
833: vref(epp->ep_vp);
834: p->p_textvp = epp->ep_vp;
835:
836: /* create the new process's VM space by running the vmcmds */
837: KASSERTMSG(epp->ep_vmcmds.evs_used != 0, "%s: no vmcmds", __func__);
838:
839: DUMPVMCMDS(epp, 0, 0);
840:
841: base_vcp = NULL;
842:
843: for (i = 0; i < epp->ep_vmcmds.evs_used && !error; i++) {
844: struct exec_vmcmd *vcp;
845:
846: vcp = &epp->ep_vmcmds.evs_cmds[i];
847: if (vcp->ev_flags & VMCMD_RELATIVE) {
848: KASSERTMSG(base_vcp != NULL,
849: "%s: relative vmcmd with no base", __func__);
850: KASSERTMSG((vcp->ev_flags & VMCMD_BASE) == 0,
851: "%s: illegal base & relative vmcmd", __func__);
852: vcp->ev_addr += base_vcp->ev_addr;
853: }
854: error = (*vcp->ev_proc)(l, vcp);
855: if (error)
856: DUMPVMCMDS(epp, i, error);
857: if (vcp->ev_flags & VMCMD_BASE)
858: base_vcp = vcp;
859: }
860:
861: /* free the vmspace-creation commands, and release their references */
862: kill_vmcmds(&epp->ep_vmcmds);
863:
864: vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
865: VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
866: vput(epp->ep_vp);
867:
868: /* if an error happened, deallocate and punt */
869: if (error != 0) {
870: DPRINTF(("%s: vmcmd %zu failed: %d\n", __func__, i - 1, error));
871: }
872: return error;
873: }
874:
1.352 rmind 875: static void
876: execve_free_data(struct execve_data *data)
877: {
1.378 uebayasi 878: struct exec_package * const epp = &data->ed_pack;
1.352 rmind 879:
880: /* free the vmspace-creation commands, and release their references */
1.378 uebayasi 881: kill_vmcmds(&epp->ep_vmcmds);
1.352 rmind 882: /* kill any opened file descriptor, if necessary */
1.378 uebayasi 883: if (epp->ep_flags & EXEC_HASFD) {
884: epp->ep_flags &= ~EXEC_HASFD;
885: fd_close(epp->ep_fd);
1.352 rmind 886: }
887:
888: /* close and put the exec'd file */
1.378 uebayasi 889: vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
890: VOP_CLOSE(epp->ep_vp, FREAD, curlwp->l_cred);
891: vput(epp->ep_vp);
1.352 rmind 892: pool_put(&exec_pool, data->ed_argp);
893:
1.378 uebayasi 894: kmem_free(epp->ep_hdr, epp->ep_hdrlen);
895: if (epp->ep_emul_root != NULL)
896: vrele(epp->ep_emul_root);
897: if (epp->ep_interp != NULL)
898: vrele(epp->ep_interp);
1.352 rmind 899:
900: pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
901: pathbuf_destroy(data->ed_pathbuf);
902: PNBUF_PUT(data->ed_resolvedpathbuf);
903: }
904:
1.400 uebayasi 905: static void
906: pathexec(struct exec_package *epp, struct proc *p, const char *pathstring)
907: {
908: const char *commandname;
909: size_t commandlen;
910: char *path;
911:
912: /* set command name & other accounting info */
913: commandname = strrchr(epp->ep_resolvedname, '/');
914: if (commandname != NULL) {
915: commandname++;
916: } else {
917: commandname = epp->ep_resolvedname;
918: }
919: commandlen = min(strlen(commandname), MAXCOMLEN);
920: (void)memcpy(p->p_comm, commandname, commandlen);
921: p->p_comm[commandlen] = '\0';
922:
923:
924: /*
925: * If the path starts with /, we don't need to do any work.
926: * This handles the majority of the cases.
927: * In the future perhaps we could canonicalize it?
928: */
929: if (pathstring[0] == '/') {
1.414 christos 930: path = PNBUF_GET();
931: (void)strlcpy(path, pathstring, MAXPATHLEN);
1.400 uebayasi 932: epp->ep_path = path;
1.414 christos 933: } else
1.400 uebayasi 934: epp->ep_path = NULL;
935: }
936:
1.387 uebayasi 937: /* XXX elsewhere */
938: static int
939: credexec(struct lwp *l, struct vattr *attr)
940: {
941: struct proc *p = l->l_proc;
942: int error;
943:
944: /*
945: * Deal with set[ug]id. MNT_NOSUID has already been used to disable
946: * s[ug]id. It's OK to check for PSL_TRACED here as we have blocked
947: * out additional references on the process for the moment.
948: */
949: if ((p->p_slflag & PSL_TRACED) == 0 &&
950:
951: (((attr->va_mode & S_ISUID) != 0 &&
952: kauth_cred_geteuid(l->l_cred) != attr->va_uid) ||
953:
954: ((attr->va_mode & S_ISGID) != 0 &&
955: kauth_cred_getegid(l->l_cred) != attr->va_gid))) {
956: /*
957: * Mark the process as SUGID before we do
958: * anything that might block.
959: */
960: proc_crmod_enter();
961: proc_crmod_leave(NULL, NULL, true);
962:
963: /* Make sure file descriptors 0..2 are in use. */
964: if ((error = fd_checkstd()) != 0) {
965: DPRINTF(("%s: fdcheckstd failed %d\n",
966: __func__, error));
967: return error;
968: }
969:
970: /*
971: * Copy the credential so other references don't see our
972: * changes.
973: */
974: l->l_cred = kauth_cred_copy(l->l_cred);
975: #ifdef KTRACE
976: /*
977: * If the persistent trace flag isn't set, turn off.
978: */
979: if (p->p_tracep) {
980: mutex_enter(&ktrace_lock);
981: if (!(p->p_traceflag & KTRFAC_PERSISTENT))
982: ktrderef(p);
983: mutex_exit(&ktrace_lock);
984: }
985: #endif
986: if (attr->va_mode & S_ISUID)
987: kauth_cred_seteuid(l->l_cred, attr->va_uid);
988: if (attr->va_mode & S_ISGID)
989: kauth_cred_setegid(l->l_cred, attr->va_gid);
990: } else {
991: if (kauth_cred_geteuid(l->l_cred) ==
992: kauth_cred_getuid(l->l_cred) &&
993: kauth_cred_getegid(l->l_cred) ==
994: kauth_cred_getgid(l->l_cred))
995: p->p_flag &= ~PK_SUGID;
996: }
997:
998: /*
999: * Copy the credential so other references don't see our changes.
1000: * Test to see if this is necessary first, since in the common case
1001: * we won't need a private reference.
1002: */
1003: if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
1004: kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
1005: l->l_cred = kauth_cred_copy(l->l_cred);
1006: kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
1007: kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
1008: }
1009:
1010: /* Update the master credentials. */
1011: if (l->l_cred != p->p_cred) {
1012: kauth_cred_t ocred;
1013:
1014: kauth_cred_hold(l->l_cred);
1015: mutex_enter(p->p_lock);
1016: ocred = p->p_cred;
1017: p->p_cred = l->l_cred;
1018: mutex_exit(p->p_lock);
1019: kauth_cred_free(ocred);
1020: }
1021:
1022: return 0;
1023: }
1024:
1.406 uebayasi 1025: static void
1026: emulexec(struct lwp *l, struct exec_package *epp)
1027: {
1028: struct proc *p = l->l_proc;
1029:
1030: /* The emulation root will usually have been found when we looked
1031: * for the elf interpreter (or similar), if not look now. */
1032: if (epp->ep_esch->es_emul->e_path != NULL &&
1033: epp->ep_emul_root == NULL)
1034: emul_find_root(l, epp);
1035:
1036: /* Any old emulation root got removed by fdcloseexec */
1037: rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
1038: p->p_cwdi->cwdi_edir = epp->ep_emul_root;
1039: rw_exit(&p->p_cwdi->cwdi_lock);
1040: epp->ep_emul_root = NULL;
1041: if (epp->ep_interp != NULL)
1042: vrele(epp->ep_interp);
1043:
1044: /*
1045: * Call emulation specific exec hook. This can setup per-process
1046: * p->p_emuldata or do any other per-process stuff an emulation needs.
1047: *
1048: * If we are executing process of different emulation than the
1049: * original forked process, call e_proc_exit() of the old emulation
1050: * first, then e_proc_exec() of new emulation. If the emulation is
1051: * same, the exec hook code should deallocate any old emulation
1052: * resources held previously by this process.
1053: */
1054: if (p->p_emul && p->p_emul->e_proc_exit
1055: && p->p_emul != epp->ep_esch->es_emul)
1056: (*p->p_emul->e_proc_exit)(p);
1057:
1058: /*
1059: * This is now LWP 1.
1060: */
1061: /* XXX elsewhere */
1062: mutex_enter(p->p_lock);
1063: p->p_nlwpid = 1;
1064: l->l_lid = 1;
1065: mutex_exit(p->p_lock);
1066:
1067: /*
1068: * Call exec hook. Emulation code may NOT store reference to anything
1069: * from &pack.
1070: */
1071: if (epp->ep_esch->es_emul->e_proc_exec)
1072: (*epp->ep_esch->es_emul->e_proc_exec)(p, epp);
1073:
1074: /* update p_emul, the old value is no longer needed */
1075: p->p_emul = epp->ep_esch->es_emul;
1076:
1077: /* ...and the same for p_execsw */
1078: p->p_execsw = epp->ep_esch;
1079:
1080: #ifdef __HAVE_SYSCALL_INTERN
1081: (*p->p_emul->e_syscall_intern)(p);
1082: #endif
1083: ktremul();
1084: }
1085:
1.337 martin 1086: static int
1.348 martin 1087: execve_runproc(struct lwp *l, struct execve_data * restrict data,
1088: bool no_local_exec_lock, bool is_spawn)
1.337 martin 1089: {
1.378 uebayasi 1090: struct exec_package * const epp = &data->ed_pack;
1.352 rmind 1091: int error = 0;
1092: struct proc *p;
1.337 martin 1093:
1.348 martin 1094: /*
1095: * In case of a posix_spawn operation, the child doing the exec
1096: * might not hold the reader lock on exec_lock, but the parent
1097: * will do this instead.
1098: */
1099: KASSERT(no_local_exec_lock || rw_lock_held(&exec_lock));
1.381 uebayasi 1100: KASSERT(!no_local_exec_lock || is_spawn);
1.337 martin 1101: KASSERT(data != NULL);
1.352 rmind 1102:
1103: p = l->l_proc;
1.337 martin 1104:
1.237 ad 1105: /* Get rid of other LWPs. */
1.340 rmind 1106: if (p->p_nlwps > 1) {
1.272 ad 1107: mutex_enter(p->p_lock);
1.237 ad 1108: exit_lwps(l);
1.272 ad 1109: mutex_exit(p->p_lock);
1.237 ad 1110: }
1.164 thorpej 1111: KDASSERT(p->p_nlwps == 1);
1112:
1.253 ad 1113: /* Destroy any lwpctl info. */
1114: if (p->p_lwpctl != NULL)
1115: lwp_ctl_exit();
1116:
1.164 thorpej 1117: /* Remove POSIX timers */
1118: timers_free(p, TIMERS_POSIX);
1119:
1.417 maxv 1120: /* Set the PaX flags. */
1121: p->p_pax = epp->ep_pax_flags;
1122:
1.86 thorpej 1123: /*
1124: * Do whatever is necessary to prepare the address space
1125: * for remapping. Note that this might replace the current
1126: * vmspace with another!
1127: */
1.348 martin 1128: if (is_spawn)
1.378 uebayasi 1129: uvmspace_spawn(l, epp->ep_vm_minaddr,
1130: epp->ep_vm_maxaddr,
1131: epp->ep_flags & EXEC_TOPDOWN_VM);
1.348 martin 1132: else
1.378 uebayasi 1133: uvmspace_exec(l, epp->ep_vm_minaddr,
1134: epp->ep_vm_maxaddr,
1135: epp->ep_flags & EXEC_TOPDOWN_VM);
1.55 cgd 1136:
1.385 uebayasi 1137: struct vmspace *vm;
1.86 thorpej 1138: vm = p->p_vmspace;
1.378 uebayasi 1139: vm->vm_taddr = (void *)epp->ep_taddr;
1140: vm->vm_tsize = btoc(epp->ep_tsize);
1141: vm->vm_daddr = (void*)epp->ep_daddr;
1142: vm->vm_dsize = btoc(epp->ep_dsize);
1143: vm->vm_ssize = btoc(epp->ep_ssize);
1.288 mrg 1144: vm->vm_issize = 0;
1.378 uebayasi 1145: vm->vm_maxsaddr = (void *)epp->ep_maxsaddr;
1146: vm->vm_minsaddr = (void *)epp->ep_minsaddr;
1.55 cgd 1147:
1.260 christos 1148: #ifdef PAX_ASLR
1.413 maxv 1149: pax_aslr_init_vm(l, vm);
1.260 christos 1150: #endif /* PAX_ASLR */
1151:
1.401 uebayasi 1152: /* Now map address space. */
1153: error = execve_dovmcmds(l, data);
1154: if (error != 0)
1.55 cgd 1155: goto exec_abort;
1156:
1.400 uebayasi 1157: pathexec(epp, p, data->ed_pathstring);
1.255 christos 1158:
1.397 uebayasi 1159: char * const newstack = STACK_GROW(vm->vm_minsaddr, epp->ep_ssize);
1.386 uebayasi 1160:
1.399 uebayasi 1161: error = copyoutargs(data, l, newstack);
1.398 uebayasi 1162: if (error != 0)
1.55 cgd 1163: goto exec_abort;
1.109 simonb 1164:
1.307 pooka 1165: cwdexec(p);
1.270 ad 1166: fd_closeexec(); /* handle close on exec */
1.315 alnsn 1167:
1168: if (__predict_false(ktrace_on))
1169: fd_ktrexecfd();
1170:
1.55 cgd 1171: execsigs(p); /* reset catched signals */
1.183 junyoung 1172:
1.380 uebayasi 1173: mutex_enter(p->p_lock);
1.164 thorpej 1174: l->l_ctxlink = NULL; /* reset ucontext link */
1.55 cgd 1175: p->p_acflag &= ~AFORK;
1.238 pavel 1176: p->p_flag |= PK_EXEC;
1.272 ad 1177: mutex_exit(p->p_lock);
1.237 ad 1178:
1179: /*
1180: * Stop profiling.
1181: */
1182: if ((p->p_stflag & PST_PROFIL) != 0) {
1183: mutex_spin_enter(&p->p_stmutex);
1184: stopprofclock(p);
1185: mutex_spin_exit(&p->p_stmutex);
1186: }
1187:
1188: /*
1.275 ad 1189: * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
1.237 ad 1190: * exited and exec()/exit() are the only places it will be cleared.
1191: */
1.275 ad 1192: if ((p->p_lflag & PL_PPWAIT) != 0) {
1.354 christos 1193: #if 0
1.353 rmind 1194: lwp_t *lp;
1195:
1.271 ad 1196: mutex_enter(proc_lock);
1.353 rmind 1197: lp = p->p_vforklwp;
1198: p->p_vforklwp = NULL;
1199:
1.308 pooka 1200: l->l_lwpctl = NULL; /* was on loan from blocked parent */
1.275 ad 1201: p->p_lflag &= ~PL_PPWAIT;
1.353 rmind 1202:
1203: lp->l_pflag &= ~LP_VFORKWAIT; /* XXX */
1204: cv_broadcast(&lp->l_waitcv);
1.271 ad 1205: mutex_exit(proc_lock);
1.354 christos 1206: #else
1207: mutex_enter(proc_lock);
1208: l->l_lwpctl = NULL; /* was on loan from blocked parent */
1209: p->p_lflag &= ~PL_PPWAIT;
1210: cv_broadcast(&p->p_pptr->p_waitcv);
1211: mutex_exit(proc_lock);
1212: #endif
1.55 cgd 1213: }
1214:
1.387 uebayasi 1215: error = credexec(l, &data->ed_attr);
1216: if (error)
1217: goto exec_abort;
1.221 ad 1218:
1.155 gmcgarry 1219: #if defined(__HAVE_RAS)
1220: /*
1221: * Remove all RASs from the address space.
1222: */
1.251 ad 1223: ras_purgeall();
1.155 gmcgarry 1224: #endif
1.107 fvdl 1225:
1226: doexechooks(p);
1.55 cgd 1227:
1.390 uebayasi 1228: /*
1229: * Set initial SP at the top of the stack.
1230: *
1231: * Note that on machines where stack grows up (e.g. hppa), SP points to
1232: * the end of arg/env strings. Userland guesses the address of argc
1233: * via ps_strings::ps_argvstr.
1234: */
1235:
1236: /* Setup new registers and do misc. setup. */
1.397 uebayasi 1237: (*epp->ep_esch->es_emul->e_setregs)(l, epp, (vaddr_t)newstack);
1.378 uebayasi 1238: if (epp->ep_esch->es_setregs)
1.397 uebayasi 1239: (*epp->ep_esch->es_setregs)(l, epp, (vaddr_t)newstack);
1.55 cgd 1240:
1.309 joerg 1241: /* Provide a consistent LWP private setting */
1242: (void)lwp_setprivate(l, NULL);
1243:
1.316 matt 1244: /* Discard all PCU state; need to start fresh */
1245: pcu_discard_all(l);
1246:
1.171 chs 1247: /* map the process's signal trampoline code */
1.378 uebayasi 1248: if ((error = exec_sigcode_map(p, epp->ep_esch->es_emul)) != 0) {
1.312 christos 1249: DPRINTF(("%s: map sigcode failed %d\n", __func__, error));
1.171 chs 1250: goto exec_abort;
1.209 christos 1251: }
1.171 chs 1252:
1.337 martin 1253: pool_put(&exec_pool, data->ed_argp);
1.276 ad 1254:
1255: /* notify others that we exec'd */
1256: KNOTE(&p->p_klist, NOTE_EXEC);
1257:
1.378 uebayasi 1258: kmem_free(epp->ep_hdr, epp->ep_hdrlen);
1.122 jdolecek 1259:
1.418 christos 1260: SDT_PROBE(proc, kernel, , exec__success, epp->ep_kname, 0, 0, 0, 0);
1.294 darran 1261:
1.406 uebayasi 1262: emulexec(l, epp);
1.85 mycroft 1263:
1.252 ad 1264: /* Allow new references from the debugger/procfs. */
1.341 martin 1265: rw_exit(&p->p_reflock);
1.348 martin 1266: if (!no_local_exec_lock)
1267: rw_exit(&exec_lock);
1.162 manu 1268:
1.271 ad 1269: mutex_enter(proc_lock);
1.237 ad 1270:
1271: if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
1.383 uebayasi 1272: ksiginfo_t ksi;
1273:
1.237 ad 1274: KSI_INIT_EMPTY(&ksi);
1275: ksi.ksi_signo = SIGTRAP;
1276: ksi.ksi_lid = l->l_lid;
1277: kpsignal(p, &ksi, NULL);
1278: }
1.162 manu 1279:
1.237 ad 1280: if (p->p_sflag & PS_STOPEXEC) {
1.383 uebayasi 1281: ksiginfoq_t kq;
1282:
1.237 ad 1283: KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.175 dsl 1284: p->p_pptr->p_nstopchild++;
1.419 pgoyette 1285: p->p_waited = 0;
1.272 ad 1286: mutex_enter(p->p_lock);
1.237 ad 1287: ksiginfo_queue_init(&kq);
1288: sigclearall(p, &contsigmask, &kq);
1289: lwp_lock(l);
1290: l->l_stat = LSSTOP;
1.162 manu 1291: p->p_stat = SSTOP;
1.164 thorpej 1292: p->p_nrlwps--;
1.304 rmind 1293: lwp_unlock(l);
1.272 ad 1294: mutex_exit(p->p_lock);
1.271 ad 1295: mutex_exit(proc_lock);
1.304 rmind 1296: lwp_lock(l);
1.245 yamt 1297: mi_switch(l);
1.237 ad 1298: ksiginfo_queue_drain(&kq);
1299: KERNEL_LOCK(l->l_biglocks, l);
1300: } else {
1.271 ad 1301: mutex_exit(proc_lock);
1.162 manu 1302: }
1303:
1.337 martin 1304: pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
1305: pathbuf_destroy(data->ed_pathbuf);
1306: PNBUF_PUT(data->ed_resolvedpathbuf);
1.327 reinoud 1307: DPRINTF(("%s finished\n", __func__));
1.374 martin 1308: return EJUSTRETURN;
1.55 cgd 1309:
1.138 lukem 1310: exec_abort:
1.418 christos 1311: SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
1.297 rmind 1312: rw_exit(&p->p_reflock);
1.348 martin 1313: if (!no_local_exec_lock)
1314: rw_exit(&exec_lock);
1.297 rmind 1315:
1.352 rmind 1316: pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
1317: pathbuf_destroy(data->ed_pathbuf);
1318: PNBUF_PUT(data->ed_resolvedpathbuf);
1319:
1.55 cgd 1320: /*
1321: * the old process doesn't exist anymore. exit gracefully.
1322: * get rid of the (new) address space we have created, if any, get rid
1323: * of our namei data and vnode, and exit noting failure
1324: */
1.88 mrg 1325: uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
1.352 rmind 1326: VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
1.348 martin 1327:
1.378 uebayasi 1328: exec_free_emul_arg(epp);
1.337 martin 1329: pool_put(&exec_pool, data->ed_argp);
1.378 uebayasi 1330: kmem_free(epp->ep_hdr, epp->ep_hdrlen);
1331: if (epp->ep_emul_root != NULL)
1332: vrele(epp->ep_emul_root);
1333: if (epp->ep_interp != NULL)
1334: vrele(epp->ep_interp);
1.237 ad 1335:
1.252 ad 1336: /* Acquire the sched-state mutex (exit1() will release it). */
1.348 martin 1337: if (!is_spawn) {
1.337 martin 1338: mutex_enter(p->p_lock);
1339: exit1(l, W_EXITCODE(error, SIGABRT));
1340: }
1.55 cgd 1341:
1.348 martin 1342: return error;
1.67 christos 1343: }
1344:
1.144 christos 1345: int
1.337 martin 1346: execve1(struct lwp *l, const char *path, char * const *args,
1347: char * const *envs, execve_fetch_element_t fetch_element)
1348: {
1349: struct execve_data data;
1350: int error;
1351:
1352: error = execve_loadvm(l, path, args, envs, fetch_element, &data);
1353: if (error)
1354: return error;
1.348 martin 1355: error = execve_runproc(l, &data, false, false);
1.337 martin 1356: return error;
1357: }
1358:
1.396 uebayasi 1359: static size_t
1.411 christos 1360: fromptrsz(const struct exec_package *epp)
1361: {
1362: return (epp->ep_flags & EXEC_FROM32) ? sizeof(int) : sizeof(char *);
1363: }
1364:
1365: static size_t
1.409 christos 1366: ptrsz(const struct exec_package *epp)
1367: {
1.411 christos 1368: return (epp->ep_flags & EXEC_32) ? sizeof(int) : sizeof(char *);
1.409 christos 1369: }
1370:
1371: static size_t
1.396 uebayasi 1372: calcargs(struct execve_data * restrict data, const size_t argenvstrlen)
1373: {
1374: struct exec_package * const epp = &data->ed_pack;
1375:
1376: const size_t nargenvptrs =
1.402 uebayasi 1377: 1 + /* long argc */
1.396 uebayasi 1378: data->ed_argc + /* char *argv[] */
1379: 1 + /* \0 */
1380: data->ed_envc + /* char *env[] */
1381: 1 + /* \0 */
1382: epp->ep_esch->es_arglen; /* auxinfo */
1383:
1.409 christos 1384: return (nargenvptrs * ptrsz(epp)) + argenvstrlen;
1.396 uebayasi 1385: }
1386:
1387: static size_t
1388: calcstack(struct execve_data * restrict data, const size_t gaplen)
1389: {
1390: struct exec_package * const epp = &data->ed_pack;
1391:
1392: data->ed_szsigcode = epp->ep_esch->es_emul->e_esigcode -
1393: epp->ep_esch->es_emul->e_sigcode;
1394:
1395: data->ed_ps_strings_sz = (epp->ep_flags & EXEC_32) ?
1396: sizeof(struct ps_strings32) : sizeof(struct ps_strings);
1397:
1398: const size_t sigcode_psstr_sz =
1399: data->ed_szsigcode + /* sigcode */
1400: data->ed_ps_strings_sz + /* ps_strings */
1401: STACK_PTHREADSPACE; /* pthread space */
1402:
1403: const size_t stacklen =
1404: data->ed_argslen +
1405: gaplen +
1406: sigcode_psstr_sz;
1407:
1408: /* make the stack "safely" aligned */
1409: return STACK_LEN_ALIGN(stacklen, STACK_ALIGNBYTES);
1410: }
1411:
1.391 uebayasi 1412: static int
1.399 uebayasi 1413: copyoutargs(struct execve_data * restrict data, struct lwp *l,
1414: char * const newstack)
1415: {
1416: struct exec_package * const epp = &data->ed_pack;
1417: struct proc *p = l->l_proc;
1418: int error;
1419:
1420: /* remember information about the process */
1421: data->ed_arginfo.ps_nargvstr = data->ed_argc;
1422: data->ed_arginfo.ps_nenvstr = data->ed_envc;
1423:
1424: /*
1425: * Allocate the stack address passed to the newly execve()'ed process.
1426: *
1427: * The new stack address will be set to the SP (stack pointer) register
1428: * in setregs().
1429: */
1430:
1431: char *newargs = STACK_ALLOC(
1432: STACK_SHRINK(newstack, data->ed_argslen), data->ed_argslen);
1433:
1434: error = (*epp->ep_esch->es_copyargs)(l, epp,
1435: &data->ed_arginfo, &newargs, data->ed_argp);
1436:
1437: if (epp->ep_path) {
1438: PNBUF_PUT(epp->ep_path);
1439: epp->ep_path = NULL;
1440: }
1441: if (error) {
1442: DPRINTF(("%s: copyargs failed %d\n", __func__, error));
1443: return error;
1444: }
1445:
1446: error = copyoutpsstrs(data, p);
1447: if (error != 0)
1448: return error;
1449:
1450: return 0;
1451: }
1452:
1453: static int
1.398 uebayasi 1454: copyoutpsstrs(struct execve_data * restrict data, struct proc *p)
1455: {
1456: struct exec_package * const epp = &data->ed_pack;
1457: struct ps_strings32 arginfo32;
1458: void *aip;
1459: int error;
1460:
1461: /* fill process ps_strings info */
1462: p->p_psstrp = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
1463: STACK_PTHREADSPACE), data->ed_ps_strings_sz);
1464:
1465: if (epp->ep_flags & EXEC_32) {
1466: aip = &arginfo32;
1467: arginfo32.ps_argvstr = (vaddr_t)data->ed_arginfo.ps_argvstr;
1468: arginfo32.ps_nargvstr = data->ed_arginfo.ps_nargvstr;
1469: arginfo32.ps_envstr = (vaddr_t)data->ed_arginfo.ps_envstr;
1470: arginfo32.ps_nenvstr = data->ed_arginfo.ps_nenvstr;
1471: } else
1472: aip = &data->ed_arginfo;
1473:
1474: /* copy out the process's ps_strings structure */
1475: if ((error = copyout(aip, (void *)p->p_psstrp, data->ed_ps_strings_sz))
1476: != 0) {
1477: DPRINTF(("%s: ps_strings copyout %p->%p size %zu failed\n",
1478: __func__, aip, (void *)p->p_psstrp, data->ed_ps_strings_sz));
1479: return error;
1480: }
1481:
1482: return 0;
1483: }
1484:
1485: static int
1.391 uebayasi 1486: copyinargs(struct execve_data * restrict data, char * const *args,
1487: char * const *envs, execve_fetch_element_t fetch_element, char **dpp)
1488: {
1489: struct exec_package * const epp = &data->ed_pack;
1.392 uebayasi 1490: char *dp;
1.391 uebayasi 1491: size_t i;
1492: int error;
1493:
1494: dp = *dpp;
1495:
1496: data->ed_argc = 0;
1497:
1498: /* copy the fake args list, if there's one, freeing it as we go */
1499: if (epp->ep_flags & EXEC_HASARGL) {
1.405 uebayasi 1500: struct exec_fakearg *fa = epp->ep_fa;
1.391 uebayasi 1501:
1.405 uebayasi 1502: while (fa->fa_arg != NULL) {
1.394 uebayasi 1503: const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
1504: size_t len;
1.391 uebayasi 1505:
1.405 uebayasi 1506: len = strlcpy(dp, fa->fa_arg, maxlen);
1.394 uebayasi 1507: /* Count NUL into len. */
1508: if (len < maxlen)
1509: len++;
1.404 uebayasi 1510: else {
1.405 uebayasi 1511: while (fa->fa_arg != NULL) {
1512: kmem_free(fa->fa_arg, fa->fa_len);
1513: fa++;
1.404 uebayasi 1514: }
1515: kmem_free(epp->ep_fa, epp->ep_fa_len);
1516: epp->ep_flags &= ~EXEC_HASARGL;
1.395 uebayasi 1517: return E2BIG;
1.404 uebayasi 1518: }
1.405 uebayasi 1519: ktrexecarg(fa->fa_arg, len - 1);
1.394 uebayasi 1520: dp += len;
1.391 uebayasi 1521:
1.405 uebayasi 1522: kmem_free(fa->fa_arg, fa->fa_len);
1523: fa++;
1.391 uebayasi 1524: data->ed_argc++;
1525: }
1526: kmem_free(epp->ep_fa, epp->ep_fa_len);
1527: epp->ep_flags &= ~EXEC_HASARGL;
1528: }
1529:
1.392 uebayasi 1530: /*
1531: * Read and count argument strings from user.
1532: */
1533:
1.391 uebayasi 1534: if (args == NULL) {
1535: DPRINTF(("%s: null args\n", __func__));
1536: return EINVAL;
1537: }
1.392 uebayasi 1538: if (epp->ep_flags & EXEC_SKIPARG)
1.411 christos 1539: args = (const void *)((const char *)args + fromptrsz(epp));
1.391 uebayasi 1540: i = 0;
1.392 uebayasi 1541: error = copyinargstrs(data, args, fetch_element, &dp, &i, ktr_execarg);
1542: if (error != 0) {
1543: DPRINTF(("%s: copyin arg %d\n", __func__, error));
1544: return error;
1545: }
1546: data->ed_argc += i;
1547:
1548: /*
1549: * Read and count environment strings from user.
1550: */
1551:
1552: data->ed_envc = 0;
1553: /* environment need not be there */
1554: if (envs == NULL)
1555: goto done;
1556: i = 0;
1557: error = copyinargstrs(data, envs, fetch_element, &dp, &i, ktr_execenv);
1558: if (error != 0) {
1559: DPRINTF(("%s: copyin env %d\n", __func__, error));
1560: return error;
1561: }
1562: data->ed_envc += i;
1563:
1564: done:
1565: *dpp = dp;
1566:
1567: return 0;
1568: }
1569:
1570: static int
1571: copyinargstrs(struct execve_data * restrict data, char * const *strs,
1572: execve_fetch_element_t fetch_element, char **dpp, size_t *ip,
1573: void (*ktr)(const void *, size_t))
1574: {
1575: char *dp, *sp;
1576: size_t i;
1577: int error;
1578:
1579: dp = *dpp;
1.391 uebayasi 1580:
1.392 uebayasi 1581: i = 0;
1.391 uebayasi 1582: while (1) {
1.394 uebayasi 1583: const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
1.391 uebayasi 1584: size_t len;
1585:
1.392 uebayasi 1586: if ((error = (*fetch_element)(strs, i, &sp)) != 0) {
1.391 uebayasi 1587: return error;
1588: }
1589: if (!sp)
1590: break;
1591: if ((error = copyinstr(sp, dp, maxlen, &len)) != 0) {
1592: if (error == ENAMETOOLONG)
1593: error = E2BIG;
1594: return error;
1595: }
1.392 uebayasi 1596: if (__predict_false(ktrace_on))
1597: (*ktr)(dp, len - 1);
1.391 uebayasi 1598: dp += len;
1599: i++;
1600: }
1601:
1602: *dpp = dp;
1.392 uebayasi 1603: *ip = i;
1.391 uebayasi 1604:
1605: return 0;
1606: }
1607:
1.382 uebayasi 1608: /*
1609: * Copy argv and env strings from kernel buffer (argp) to the new stack.
1610: * Those strings are located just after auxinfo.
1611: */
1.337 martin 1612: int
1.231 yamt 1613: copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
1614: char **stackp, void *argp)
1.67 christos 1615: {
1.138 lukem 1616: char **cpp, *dp, *sp;
1617: size_t len;
1618: void *nullp;
1619: long argc, envc;
1.144 christos 1620: int error;
1.138 lukem 1621:
1.144 christos 1622: cpp = (char **)*stackp;
1.138 lukem 1623: nullp = NULL;
1624: argc = arginfo->ps_nargvstr;
1625: envc = arginfo->ps_nenvstr;
1.382 uebayasi 1626:
1627: /* argc on stack is long */
1628: CTASSERT(sizeof(*cpp) == sizeof(argc));
1629:
1630: dp = (char *)(cpp +
1.402 uebayasi 1631: 1 + /* long argc */
1632: argc + /* char *argv[] */
1.382 uebayasi 1633: 1 + /* \0 */
1.402 uebayasi 1634: envc + /* char *env[] */
1.382 uebayasi 1635: 1 + /* \0 */
1.385 uebayasi 1636: /* XXX auxinfo multiplied by ptr size? */
1.382 uebayasi 1637: pack->ep_esch->es_arglen); /* auxinfo */
1638: sp = argp;
1639:
1.305 matt 1640: if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) {
1.312 christos 1641: COPYPRINTF("", cpp - 1, sizeof(argc));
1.144 christos 1642: return error;
1.305 matt 1643: }
1.67 christos 1644:
1645: /* XXX don't copy them out, remap them! */
1.69 mycroft 1646: arginfo->ps_argvstr = cpp; /* remember location of argv for later */
1.67 christos 1647:
1.305 matt 1648: for (; --argc >= 0; sp += len, dp += len) {
1649: if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1.312 christos 1650: COPYPRINTF("", cpp - 1, sizeof(dp));
1.305 matt 1651: return error;
1652: }
1653: if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1.313 jakllsch 1654: COPYPRINTF("str", dp, (size_t)ARG_MAX);
1.144 christos 1655: return error;
1.305 matt 1656: }
1657: }
1.67 christos 1658:
1.305 matt 1659: if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1.312 christos 1660: COPYPRINTF("", cpp - 1, sizeof(nullp));
1.144 christos 1661: return error;
1.305 matt 1662: }
1.67 christos 1663:
1.69 mycroft 1664: arginfo->ps_envstr = cpp; /* remember location of envp for later */
1.67 christos 1665:
1.305 matt 1666: for (; --envc >= 0; sp += len, dp += len) {
1667: if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1.312 christos 1668: COPYPRINTF("", cpp - 1, sizeof(dp));
1.144 christos 1669: return error;
1.305 matt 1670: }
1671: if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1.313 jakllsch 1672: COPYPRINTF("str", dp, (size_t)ARG_MAX);
1.305 matt 1673: return error;
1674: }
1.337 martin 1675:
1.305 matt 1676: }
1.67 christos 1677:
1.305 matt 1678: if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1.312 christos 1679: COPYPRINTF("", cpp - 1, sizeof(nullp));
1.144 christos 1680: return error;
1.305 matt 1681: }
1.67 christos 1682:
1.144 christos 1683: *stackp = (char *)cpp;
1684: return 0;
1.55 cgd 1685: }
1.130 jdolecek 1686:
1687:
1688: /*
1.282 ad 1689: * Add execsw[] entries.
1.130 jdolecek 1690: */
1691: int
1.282 ad 1692: exec_add(struct execsw *esp, int count)
1.130 jdolecek 1693: {
1.282 ad 1694: struct exec_entry *it;
1695: int i;
1.130 jdolecek 1696:
1.283 ad 1697: if (count == 0) {
1698: return 0;
1699: }
1.130 jdolecek 1700:
1.282 ad 1701: /* Check for duplicates. */
1.237 ad 1702: rw_enter(&exec_lock, RW_WRITER);
1.282 ad 1703: for (i = 0; i < count; i++) {
1704: LIST_FOREACH(it, &ex_head, ex_list) {
1705: /* assume unique (makecmds, probe_func, emulation) */
1706: if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
1707: it->ex_sw->u.elf_probe_func ==
1708: esp[i].u.elf_probe_func &&
1709: it->ex_sw->es_emul == esp[i].es_emul) {
1710: rw_exit(&exec_lock);
1711: return EEXIST;
1.130 jdolecek 1712: }
1713: }
1714: }
1715:
1.282 ad 1716: /* Allocate new entries. */
1717: for (i = 0; i < count; i++) {
1718: it = kmem_alloc(sizeof(*it), KM_SLEEP);
1719: it->ex_sw = &esp[i];
1720: LIST_INSERT_HEAD(&ex_head, it, ex_list);
1.130 jdolecek 1721: }
1722:
1723: /* update execsw[] */
1724: exec_init(0);
1.237 ad 1725: rw_exit(&exec_lock);
1.282 ad 1726: return 0;
1.130 jdolecek 1727: }
1728:
1729: /*
1730: * Remove execsw[] entry.
1731: */
1732: int
1.282 ad 1733: exec_remove(struct execsw *esp, int count)
1.130 jdolecek 1734: {
1.282 ad 1735: struct exec_entry *it, *next;
1736: int i;
1737: const struct proclist_desc *pd;
1738: proc_t *p;
1739:
1.283 ad 1740: if (count == 0) {
1741: return 0;
1742: }
1.130 jdolecek 1743:
1.282 ad 1744: /* Abort if any are busy. */
1.237 ad 1745: rw_enter(&exec_lock, RW_WRITER);
1.282 ad 1746: for (i = 0; i < count; i++) {
1747: mutex_enter(proc_lock);
1748: for (pd = proclists; pd->pd_list != NULL; pd++) {
1749: PROCLIST_FOREACH(p, pd->pd_list) {
1750: if (p->p_execsw == &esp[i]) {
1751: mutex_exit(proc_lock);
1752: rw_exit(&exec_lock);
1753: return EBUSY;
1754: }
1755: }
1756: }
1757: mutex_exit(proc_lock);
1758: }
1.130 jdolecek 1759:
1.282 ad 1760: /* None are busy, so remove them all. */
1761: for (i = 0; i < count; i++) {
1762: for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
1763: next = LIST_NEXT(it, ex_list);
1764: if (it->ex_sw == &esp[i]) {
1765: LIST_REMOVE(it, ex_list);
1766: kmem_free(it, sizeof(*it));
1767: break;
1768: }
1769: }
1.130 jdolecek 1770: }
1771:
1772: /* update execsw[] */
1773: exec_init(0);
1.237 ad 1774: rw_exit(&exec_lock);
1.282 ad 1775: return 0;
1.130 jdolecek 1776: }
1777:
1778: /*
1779: * Initialize exec structures. If init_boot is true, also does necessary
1780: * one-time initialization (it's called from main() that way).
1.147 jdolecek 1781: * Once system is multiuser, this should be called with exec_lock held,
1.130 jdolecek 1782: * i.e. via exec_{add|remove}().
1783: */
1784: int
1.138 lukem 1785: exec_init(int init_boot)
1.130 jdolecek 1786: {
1.282 ad 1787: const struct execsw **sw;
1788: struct exec_entry *ex;
1789: SLIST_HEAD(,exec_entry) first;
1790: SLIST_HEAD(,exec_entry) any;
1791: SLIST_HEAD(,exec_entry) last;
1792: int i, sz;
1.130 jdolecek 1793:
1794: if (init_boot) {
1795: /* do one-time initializations */
1.237 ad 1796: rw_init(&exec_lock);
1.259 ad 1797: mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
1.277 ad 1798: pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
1799: "execargs", &exec_palloc, IPL_NONE);
1800: pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1.282 ad 1801: } else {
1802: KASSERT(rw_write_held(&exec_lock));
1803: }
1.130 jdolecek 1804:
1.282 ad 1805: /* Sort each entry onto the appropriate queue. */
1806: SLIST_INIT(&first);
1807: SLIST_INIT(&any);
1808: SLIST_INIT(&last);
1809: sz = 0;
1810: LIST_FOREACH(ex, &ex_head, ex_list) {
1811: switch(ex->ex_sw->es_prio) {
1812: case EXECSW_PRIO_FIRST:
1813: SLIST_INSERT_HEAD(&first, ex, ex_slist);
1814: break;
1815: case EXECSW_PRIO_ANY:
1816: SLIST_INSERT_HEAD(&any, ex, ex_slist);
1817: break;
1818: case EXECSW_PRIO_LAST:
1819: SLIST_INSERT_HEAD(&last, ex, ex_slist);
1820: break;
1821: default:
1.312 christos 1822: panic("%s", __func__);
1.282 ad 1823: break;
1.130 jdolecek 1824: }
1.282 ad 1825: sz++;
1.130 jdolecek 1826: }
1827:
1828: /*
1.282 ad 1829: * Create new execsw[]. Ensure we do not try a zero-sized
1830: * allocation.
1.130 jdolecek 1831: */
1.282 ad 1832: sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
1833: i = 0;
1834: SLIST_FOREACH(ex, &first, ex_slist) {
1835: sw[i++] = ex->ex_sw;
1836: }
1837: SLIST_FOREACH(ex, &any, ex_slist) {
1838: sw[i++] = ex->ex_sw;
1839: }
1840: SLIST_FOREACH(ex, &last, ex_slist) {
1841: sw[i++] = ex->ex_sw;
1.130 jdolecek 1842: }
1.183 junyoung 1843:
1.282 ad 1844: /* Replace old execsw[] and free used memory. */
1845: if (execsw != NULL) {
1846: kmem_free(__UNCONST(execsw),
1847: nexecs * sizeof(struct execsw *) + 1);
1.130 jdolecek 1848: }
1.282 ad 1849: execsw = sw;
1850: nexecs = sz;
1.130 jdolecek 1851:
1.282 ad 1852: /* Figure out the maximum size of an exec header. */
1853: exec_maxhdrsz = sizeof(int);
1.130 jdolecek 1854: for (i = 0; i < nexecs; i++) {
1855: if (execsw[i]->es_hdrsz > exec_maxhdrsz)
1856: exec_maxhdrsz = execsw[i]->es_hdrsz;
1857: }
1858:
1859: return 0;
1860: }
1.171 chs 1861:
1862: static int
1863: exec_sigcode_map(struct proc *p, const struct emul *e)
1864: {
1865: vaddr_t va;
1866: vsize_t sz;
1867: int error;
1868: struct uvm_object *uobj;
1869:
1.184 drochner 1870: sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
1871:
1872: if (e->e_sigobject == NULL || sz == 0) {
1.171 chs 1873: return 0;
1874: }
1875:
1876: /*
1877: * If we don't have a sigobject for this emulation, create one.
1878: *
1879: * sigobject is an anonymous memory object (just like SYSV shared
1880: * memory) that we keep a permanent reference to and that we map
1881: * in all processes that need this sigcode. The creation is simple,
1882: * we create an object, add a permanent reference to it, map it in
1883: * kernel space, copy out the sigcode to it and unmap it.
1.189 jdolecek 1884: * We map it with PROT_READ|PROT_EXEC into the process just
1885: * the way sys_mmap() would map it.
1.171 chs 1886: */
1887:
1888: uobj = *e->e_sigobject;
1889: if (uobj == NULL) {
1.259 ad 1890: mutex_enter(&sigobject_lock);
1891: if ((uobj = *e->e_sigobject) == NULL) {
1892: uobj = uao_create(sz, 0);
1893: (*uobj->pgops->pgo_reference)(uobj);
1894: va = vm_map_min(kernel_map);
1895: if ((error = uvm_map(kernel_map, &va, round_page(sz),
1896: uobj, 0, 0,
1897: UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
1898: UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
1899: printf("kernel mapping failed %d\n", error);
1900: (*uobj->pgops->pgo_detach)(uobj);
1901: mutex_exit(&sigobject_lock);
1.374 martin 1902: return error;
1.259 ad 1903: }
1904: memcpy((void *)va, e->e_sigcode, sz);
1.171 chs 1905: #ifdef PMAP_NEED_PROCWR
1.259 ad 1906: pmap_procwr(&proc0, va, sz);
1.171 chs 1907: #endif
1.259 ad 1908: uvm_unmap(kernel_map, va, va + round_page(sz));
1909: *e->e_sigobject = uobj;
1910: }
1911: mutex_exit(&sigobject_lock);
1.171 chs 1912: }
1913:
1.172 enami 1914: /* Just a hint to uvm_map where to put it. */
1.195 fvdl 1915: va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
1916: round_page(sz));
1.187 chs 1917:
1918: #ifdef __alpha__
1919: /*
1920: * Tru64 puts /sbin/loader at the end of user virtual memory,
1921: * which causes the above calculation to put the sigcode at
1922: * an invalid address. Put it just below the text instead.
1923: */
1.193 jmc 1924: if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1.187 chs 1925: va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
1926: }
1927: #endif
1928:
1.171 chs 1929: (*uobj->pgops->pgo_reference)(uobj);
1930: error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
1931: uobj, 0, 0,
1932: UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
1933: UVM_ADV_RANDOM, 0));
1934: if (error) {
1.312 christos 1935: DPRINTF(("%s, %d: map %p "
1.305 matt 1936: "uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n",
1.312 christos 1937: __func__, __LINE__, &p->p_vmspace->vm_map, round_page(sz),
1938: va, error));
1.171 chs 1939: (*uobj->pgops->pgo_detach)(uobj);
1.374 martin 1940: return error;
1.171 chs 1941: }
1942: p->p_sigctx.ps_sigcode = (void *)va;
1.374 martin 1943: return 0;
1.171 chs 1944: }
1.336 matt 1945:
1.337 martin 1946: /*
1.348 martin 1947: * Release a refcount on spawn_exec_data and destroy memory, if this
1948: * was the last one.
1949: */
1950: static void
1951: spawn_exec_data_release(struct spawn_exec_data *data)
1952: {
1953: if (atomic_dec_32_nv(&data->sed_refcnt) != 0)
1954: return;
1955:
1956: cv_destroy(&data->sed_cv_child_ready);
1957: mutex_destroy(&data->sed_mtx_child);
1958:
1959: if (data->sed_actions)
1960: posix_spawn_fa_free(data->sed_actions,
1961: data->sed_actions->len);
1962: if (data->sed_attrs)
1963: kmem_free(data->sed_attrs,
1964: sizeof(*data->sed_attrs));
1965: kmem_free(data, sizeof(*data));
1966: }
1967:
1968: /*
1.337 martin 1969: * A child lwp of a posix_spawn operation starts here and ends up in
1970: * cpu_spawn_return, dealing with all filedescriptor and scheduler
1971: * manipulations in between.
1.369 christos 1972: * The parent waits for the child, as it is not clear whether the child
1973: * will be able to acquire its own exec_lock. If it can, the parent can
1.348 martin 1974: * be released early and continue running in parallel. If not (or if the
1975: * magic debug flag is passed in the scheduler attribute struct), the
1.369 christos 1976: * child rides on the parent's exec lock until it is ready to return to
1.348 martin 1977: * to userland - and only then releases the parent. This method loses
1978: * concurrency, but improves error reporting.
1.337 martin 1979: */
1980: static void
1981: spawn_return(void *arg)
1982: {
1983: struct spawn_exec_data *spawn_data = arg;
1984: struct lwp *l = curlwp;
1985: int error, newfd;
1.420 ! pgoyette 1986: int ostat;
1.337 martin 1987: size_t i;
1988: const struct posix_spawn_file_actions_entry *fae;
1.348 martin 1989: pid_t ppid;
1.337 martin 1990: register_t retval;
1.341 martin 1991: bool have_reflock;
1.348 martin 1992: bool parent_is_waiting = true;
1.345 martin 1993:
1.341 martin 1994: /*
1.348 martin 1995: * Check if we can release parent early.
1996: * We either need to have no sed_attrs, or sed_attrs does not
1997: * have POSIX_SPAWN_RETURNERROR or one of the flags, that require
1998: * safe access to the parent proc (passed in sed_parent).
1999: * We then try to get the exec_lock, and only if that works, we can
2000: * release the parent here already.
2001: */
2002: ppid = spawn_data->sed_parent->p_pid;
2003: if ((!spawn_data->sed_attrs
2004: || (spawn_data->sed_attrs->sa_flags
2005: & (POSIX_SPAWN_RETURNERROR|POSIX_SPAWN_SETPGROUP)) == 0)
2006: && rw_tryenter(&exec_lock, RW_READER)) {
2007: parent_is_waiting = false;
2008: mutex_enter(&spawn_data->sed_mtx_child);
2009: cv_signal(&spawn_data->sed_cv_child_ready);
2010: mutex_exit(&spawn_data->sed_mtx_child);
2011: }
1.341 martin 2012:
1.352 rmind 2013: /* don't allow debugger access yet */
2014: rw_enter(&l->l_proc->p_reflock, RW_WRITER);
2015: have_reflock = true;
2016:
2017: error = 0;
1.337 martin 2018: /* handle posix_spawn_file_actions */
2019: if (spawn_data->sed_actions != NULL) {
1.348 martin 2020: for (i = 0; i < spawn_data->sed_actions->len; i++) {
2021: fae = &spawn_data->sed_actions->fae[i];
1.337 martin 2022: switch (fae->fae_action) {
2023: case FAE_OPEN:
1.338 martin 2024: if (fd_getfile(fae->fae_fildes) != NULL) {
2025: error = fd_close(fae->fae_fildes);
2026: if (error)
2027: break;
2028: }
1.337 martin 2029: error = fd_open(fae->fae_path, fae->fae_oflag,
2030: fae->fae_mode, &newfd);
1.376 maxv 2031: if (error)
2032: break;
1.337 martin 2033: if (newfd != fae->fae_fildes) {
2034: error = dodup(l, newfd,
2035: fae->fae_fildes, 0, &retval);
2036: if (fd_getfile(newfd) != NULL)
2037: fd_close(newfd);
2038: }
2039: break;
2040: case FAE_DUP2:
2041: error = dodup(l, fae->fae_fildes,
2042: fae->fae_newfildes, 0, &retval);
2043: break;
2044: case FAE_CLOSE:
2045: if (fd_getfile(fae->fae_fildes) == NULL) {
2046: error = EBADF;
2047: break;
2048: }
2049: error = fd_close(fae->fae_fildes);
2050: break;
2051: }
2052: if (error)
2053: goto report_error;
2054: }
2055: }
2056:
2057: /* handle posix_spawnattr */
2058: if (spawn_data->sed_attrs != NULL) {
2059: struct sigaction sigact;
2060: sigact._sa_u._sa_handler = SIG_DFL;
2061: sigact.sa_flags = 0;
2062:
2063: /*
2064: * set state to SSTOP so that this proc can be found by pid.
2065: * see proc_enterprp, do_sched_setparam below
2066: */
1.420 ! pgoyette 2067: mutex_enter(proc_lock);
! 2068: /*
! 2069: * p_stat should be SACTIVE, so we need to adjust the
! 2070: * parent's p_nstopchild here. For safety, just make
! 2071: * we're on the good side of SDEAD before we adjust.
! 2072: */
1.358 christos 2073: ostat = l->l_proc->p_stat;
1.420 ! pgoyette 2074: KASSERT(ostat < SSTOP);
1.337 martin 2075: l->l_proc->p_stat = SSTOP;
1.420 ! pgoyette 2076: l->l_proc->p_waited = 0;
! 2077: l->l_proc->p_pptr->p_nstopchild++;
! 2078: mutex_exit(proc_lock);
1.337 martin 2079:
2080: /* Set process group */
2081: if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETPGROUP) {
2082: pid_t mypid = l->l_proc->p_pid,
2083: pgrp = spawn_data->sed_attrs->sa_pgroup;
2084:
2085: if (pgrp == 0)
2086: pgrp = mypid;
2087:
2088: error = proc_enterpgrp(spawn_data->sed_parent,
2089: mypid, pgrp, false);
2090: if (error)
1.420 ! pgoyette 2091: goto report_error_stopped;
1.337 martin 2092: }
2093:
2094: /* Set scheduler policy */
2095: if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSCHEDULER)
2096: error = do_sched_setparam(l->l_proc->p_pid, 0,
2097: spawn_data->sed_attrs->sa_schedpolicy,
2098: &spawn_data->sed_attrs->sa_schedparam);
2099: else if (spawn_data->sed_attrs->sa_flags
2100: & POSIX_SPAWN_SETSCHEDPARAM) {
1.348 martin 2101: error = do_sched_setparam(ppid, 0,
1.337 martin 2102: SCHED_NONE, &spawn_data->sed_attrs->sa_schedparam);
2103: }
2104: if (error)
1.420 ! pgoyette 2105: goto report_error_stopped;
1.337 martin 2106:
2107: /* Reset user ID's */
2108: if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_RESETIDS) {
2109: error = do_setresuid(l, -1,
2110: kauth_cred_getgid(l->l_cred), -1,
2111: ID_E_EQ_R | ID_E_EQ_S);
2112: if (error)
1.420 ! pgoyette 2113: goto report_error_stopped;
1.337 martin 2114: error = do_setresuid(l, -1,
2115: kauth_cred_getuid(l->l_cred), -1,
2116: ID_E_EQ_R | ID_E_EQ_S);
2117: if (error)
1.420 ! pgoyette 2118: goto report_error_stopped;
1.337 martin 2119: }
2120:
2121: /* Set signal masks/defaults */
2122: if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGMASK) {
2123: mutex_enter(l->l_proc->p_lock);
2124: error = sigprocmask1(l, SIG_SETMASK,
2125: &spawn_data->sed_attrs->sa_sigmask, NULL);
2126: mutex_exit(l->l_proc->p_lock);
2127: if (error)
1.420 ! pgoyette 2128: goto report_error_stopped;
1.337 martin 2129: }
2130:
2131: if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGDEF) {
1.375 christos 2132: /*
2133: * The following sigaction call is using a sigaction
2134: * version 0 trampoline which is in the compatibility
2135: * code only. This is not a problem because for SIG_DFL
2136: * and SIG_IGN, the trampolines are now ignored. If they
2137: * were not, this would be a problem because we are
2138: * holding the exec_lock, and the compat code needs
2139: * to do the same in order to replace the trampoline
2140: * code of the process.
2141: */
1.337 martin 2142: for (i = 1; i <= NSIG; i++) {
2143: if (sigismember(
2144: &spawn_data->sed_attrs->sa_sigdefault, i))
2145: sigaction1(l, i, &sigact, NULL, NULL,
2146: 0);
2147: }
2148: }
1.420 ! pgoyette 2149: mutex_enter(proc_lock);
1.358 christos 2150: l->l_proc->p_stat = ostat;
1.420 ! pgoyette 2151: l->l_proc->p_pptr->p_nstopchild--;
! 2152: mutex_exit(proc_lock);
1.337 martin 2153: }
2154:
1.352 rmind 2155: /* now do the real exec */
1.348 martin 2156: error = execve_runproc(l, &spawn_data->sed_exec, parent_is_waiting,
2157: true);
1.341 martin 2158: have_reflock = false;
1.352 rmind 2159: if (error == EJUSTRETURN)
2160: error = 0;
2161: else if (error)
1.337 martin 2162: goto report_error;
2163:
1.348 martin 2164: if (parent_is_waiting) {
2165: mutex_enter(&spawn_data->sed_mtx_child);
2166: cv_signal(&spawn_data->sed_cv_child_ready);
2167: mutex_exit(&spawn_data->sed_mtx_child);
2168: }
1.345 martin 2169:
1.348 martin 2170: /* release our refcount on the data */
2171: spawn_exec_data_release(spawn_data);
1.337 martin 2172:
1.369 christos 2173: /* and finally: leave to userland for the first time */
1.337 martin 2174: cpu_spawn_return(l);
2175:
2176: /* NOTREACHED */
2177: return;
2178:
1.420 ! pgoyette 2179: report_error_stopped:
! 2180: mutex_enter(proc_lock);
! 2181: l->l_proc->p_stat = ostat;
! 2182: l->l_proc->p_pptr->p_nstopchild--;
! 2183: mutex_exit(proc_lock);
1.337 martin 2184: report_error:
1.376 maxv 2185: if (have_reflock) {
2186: /*
1.350 martin 2187: * We have not passed through execve_runproc(),
2188: * which would have released the p_reflock and also
2189: * taken ownership of the sed_exec part of spawn_data,
2190: * so release/free both here.
2191: */
1.341 martin 2192: rw_exit(&l->l_proc->p_reflock);
1.350 martin 2193: execve_free_data(&spawn_data->sed_exec);
2194: }
1.341 martin 2195:
1.348 martin 2196: if (parent_is_waiting) {
2197: /* pass error to parent */
2198: mutex_enter(&spawn_data->sed_mtx_child);
2199: spawn_data->sed_error = error;
2200: cv_signal(&spawn_data->sed_cv_child_ready);
2201: mutex_exit(&spawn_data->sed_mtx_child);
2202: } else {
2203: rw_exit(&exec_lock);
1.337 martin 2204: }
2205:
1.348 martin 2206: /* release our refcount on the data */
2207: spawn_exec_data_release(spawn_data);
2208:
1.352 rmind 2209: /* done, exit */
2210: mutex_enter(l->l_proc->p_lock);
1.348 martin 2211: /*
1.352 rmind 2212: * Posix explicitly asks for an exit code of 127 if we report
1.348 martin 2213: * errors from the child process - so, unfortunately, there
2214: * is no way to report a more exact error code.
2215: * A NetBSD specific workaround is POSIX_SPAWN_RETURNERROR as
2216: * flag bit in the attrp argument to posix_spawn(2), see above.
2217: */
1.349 martin 2218: exit1(l, W_EXITCODE(127, 0));
1.337 martin 2219: }
2220:
1.348 martin 2221: void
1.344 christos 2222: posix_spawn_fa_free(struct posix_spawn_file_actions *fa, size_t len)
1.342 christos 2223: {
2224:
1.344 christos 2225: for (size_t i = 0; i < len; i++) {
1.342 christos 2226: struct posix_spawn_file_actions_entry *fae = &fa->fae[i];
2227: if (fae->fae_action != FAE_OPEN)
2228: continue;
2229: kmem_free(fae->fae_path, strlen(fae->fae_path) + 1);
2230: }
1.348 martin 2231: if (fa->len > 0)
1.343 christos 2232: kmem_free(fa->fae, sizeof(*fa->fae) * fa->len);
1.342 christos 2233: kmem_free(fa, sizeof(*fa));
2234: }
2235:
2236: static int
2237: posix_spawn_fa_alloc(struct posix_spawn_file_actions **fap,
1.373 martin 2238: const struct posix_spawn_file_actions *ufa, rlim_t lim)
1.342 christos 2239: {
2240: struct posix_spawn_file_actions *fa;
2241: struct posix_spawn_file_actions_entry *fae;
2242: char *pbuf = NULL;
2243: int error;
1.352 rmind 2244: size_t i = 0;
1.342 christos 2245:
2246: fa = kmem_alloc(sizeof(*fa), KM_SLEEP);
2247: error = copyin(ufa, fa, sizeof(*fa));
1.369 christos 2248: if (error || fa->len == 0) {
1.348 martin 2249: kmem_free(fa, sizeof(*fa));
1.369 christos 2250: return error; /* 0 if not an error, and len == 0 */
1.348 martin 2251: }
1.342 christos 2252:
1.373 martin 2253: if (fa->len > lim) {
2254: kmem_free(fa, sizeof(*fa));
2255: return EINVAL;
2256: }
2257:
1.348 martin 2258: fa->size = fa->len;
1.352 rmind 2259: size_t fal = fa->len * sizeof(*fae);
2260: fae = fa->fae;
2261: fa->fae = kmem_alloc(fal, KM_SLEEP);
2262: error = copyin(fae, fa->fae, fal);
1.344 christos 2263: if (error)
1.342 christos 2264: goto out;
2265:
2266: pbuf = PNBUF_GET();
1.344 christos 2267: for (; i < fa->len; i++) {
1.342 christos 2268: fae = &fa->fae[i];
2269: if (fae->fae_action != FAE_OPEN)
2270: continue;
1.352 rmind 2271: error = copyinstr(fae->fae_path, pbuf, MAXPATHLEN, &fal);
1.344 christos 2272: if (error)
1.342 christos 2273: goto out;
1.352 rmind 2274: fae->fae_path = kmem_alloc(fal, KM_SLEEP);
2275: memcpy(fae->fae_path, pbuf, fal);
1.342 christos 2276: }
2277: PNBUF_PUT(pbuf);
1.348 martin 2278:
1.342 christos 2279: *fap = fa;
2280: return 0;
2281: out:
2282: if (pbuf)
2283: PNBUF_PUT(pbuf);
1.344 christos 2284: posix_spawn_fa_free(fa, i);
1.342 christos 2285: return error;
2286: }
2287:
1.337 martin 2288: int
1.348 martin 2289: check_posix_spawn(struct lwp *l1)
1.337 martin 2290: {
1.348 martin 2291: int error, tnprocs, count;
1.337 martin 2292: uid_t uid;
1.348 martin 2293: struct proc *p1;
1.337 martin 2294:
2295: p1 = l1->l_proc;
2296: uid = kauth_cred_getuid(l1->l_cred);
2297: tnprocs = atomic_inc_uint_nv(&nprocs);
2298:
2299: /*
2300: * Although process entries are dynamically created, we still keep
2301: * a global limit on the maximum number we will create.
2302: */
2303: if (__predict_false(tnprocs >= maxproc))
2304: error = -1;
2305: else
2306: error = kauth_authorize_process(l1->l_cred,
2307: KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL);
2308:
2309: if (error) {
2310: atomic_dec_uint(&nprocs);
1.348 martin 2311: return EAGAIN;
1.337 martin 2312: }
2313:
2314: /*
2315: * Enforce limits.
2316: */
2317: count = chgproccnt(uid, 1);
1.347 elad 2318: if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT,
2319: p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
2320: &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0 &&
2321: __predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
1.348 martin 2322: (void)chgproccnt(uid, -1);
2323: atomic_dec_uint(&nprocs);
2324: return EAGAIN;
1.337 martin 2325: }
2326:
1.348 martin 2327: return 0;
2328: }
2329:
2330: int
1.352 rmind 2331: do_posix_spawn(struct lwp *l1, pid_t *pid_res, bool *child_ok, const char *path,
2332: struct posix_spawn_file_actions *fa,
2333: struct posix_spawnattr *sa,
2334: char *const *argv, char *const *envp,
2335: execve_fetch_element_t fetch)
1.348 martin 2336: {
1.352 rmind 2337:
1.348 martin 2338: struct proc *p1, *p2;
2339: struct lwp *l2;
2340: int error;
2341: struct spawn_exec_data *spawn_data;
2342: vaddr_t uaddr;
2343: pid_t pid;
1.352 rmind 2344: bool have_exec_lock = false;
1.348 martin 2345:
2346: p1 = l1->l_proc;
1.342 christos 2347:
1.348 martin 2348: /* Allocate and init spawn_data */
2349: spawn_data = kmem_zalloc(sizeof(*spawn_data), KM_SLEEP);
2350: spawn_data->sed_refcnt = 1; /* only parent so far */
2351: cv_init(&spawn_data->sed_cv_child_ready, "pspawn");
2352: mutex_init(&spawn_data->sed_mtx_child, MUTEX_DEFAULT, IPL_NONE);
1.352 rmind 2353: mutex_enter(&spawn_data->sed_mtx_child);
2354:
2355: /*
2356: * Do the first part of the exec now, collect state
2357: * in spawn_data.
2358: */
2359: error = execve_loadvm(l1, path, argv,
2360: envp, fetch, &spawn_data->sed_exec);
2361: if (error == EJUSTRETURN)
2362: error = 0;
2363: else if (error)
2364: goto error_exit;
2365:
2366: have_exec_lock = true;
1.337 martin 2367:
2368: /*
2369: * Allocate virtual address space for the U-area now, while it
2370: * is still easy to abort the fork operation if we're out of
2371: * kernel virtual address space.
2372: */
2373: uaddr = uvm_uarea_alloc();
2374: if (__predict_false(uaddr == 0)) {
1.352 rmind 2375: error = ENOMEM;
2376: goto error_exit;
1.351 rmind 2377: }
1.352 rmind 2378:
1.337 martin 2379: /*
1.348 martin 2380: * Allocate new proc. Borrow proc0 vmspace for it, we will
2381: * replace it with its own before returning to userland
2382: * in the child.
1.337 martin 2383: * This is a point of no return, we will have to go through
2384: * the child proc to properly clean it up past this point.
2385: */
2386: p2 = proc_alloc();
2387: pid = p2->p_pid;
2388:
2389: /*
2390: * Make a proc table entry for the new process.
2391: * Start by zeroing the section of proc that is zero-initialized,
2392: * then copy the section that is copied directly from the parent.
2393: */
2394: memset(&p2->p_startzero, 0,
2395: (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero));
2396: memcpy(&p2->p_startcopy, &p1->p_startcopy,
2397: (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy));
1.348 martin 2398: p2->p_vmspace = proc0.p_vmspace;
1.337 martin 2399:
1.366 christos 2400: TAILQ_INIT(&p2->p_sigpend.sp_info);
1.337 martin 2401:
2402: LIST_INIT(&p2->p_lwps);
2403: LIST_INIT(&p2->p_sigwaiters);
2404:
2405: /*
2406: * Duplicate sub-structures as needed.
2407: * Increase reference counts on shared objects.
2408: * Inherit flags we want to keep. The flags related to SIGCHLD
2409: * handling are important in order to keep a consistent behaviour
2410: * for the child after the fork. If we are a 32-bit process, the
2411: * child will be too.
2412: */
2413: p2->p_flag =
2414: p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32);
2415: p2->p_emul = p1->p_emul;
2416: p2->p_execsw = p1->p_execsw;
2417:
2418: mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
2419: mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
2420: rw_init(&p2->p_reflock);
2421: cv_init(&p2->p_waitcv, "wait");
2422: cv_init(&p2->p_lwpcv, "lwpwait");
2423:
2424: p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
2425:
2426: kauth_proc_fork(p1, p2);
2427:
2428: p2->p_raslist = NULL;
2429: p2->p_fd = fd_copy();
2430:
2431: /* XXX racy */
2432: p2->p_mqueue_cnt = p1->p_mqueue_cnt;
2433:
2434: p2->p_cwdi = cwdinit();
2435:
2436: /*
2437: * Note: p_limit (rlimit stuff) is copy-on-write, so normally
2438: * we just need increase pl_refcnt.
2439: */
1.348 martin 2440: if (!p1->p_limit->pl_writeable) {
2441: lim_addref(p1->p_limit);
2442: p2->p_limit = p1->p_limit;
1.337 martin 2443: } else {
2444: p2->p_limit = lim_copy(p1->p_limit);
2445: }
2446:
2447: p2->p_lflag = 0;
2448: p2->p_sflag = 0;
2449: p2->p_slflag = 0;
2450: p2->p_pptr = p1;
2451: p2->p_ppid = p1->p_pid;
2452: LIST_INIT(&p2->p_children);
2453:
2454: p2->p_aio = NULL;
2455:
2456: #ifdef KTRACE
2457: /*
2458: * Copy traceflag and tracefile if enabled.
2459: * If not inherited, these were zeroed above.
2460: */
2461: if (p1->p_traceflag & KTRFAC_INHERIT) {
2462: mutex_enter(&ktrace_lock);
2463: p2->p_traceflag = p1->p_traceflag;
2464: if ((p2->p_tracep = p1->p_tracep) != NULL)
2465: ktradref(p2);
2466: mutex_exit(&ktrace_lock);
2467: }
2468: #endif
2469:
2470: /*
2471: * Create signal actions for the child process.
2472: */
2473: p2->p_sigacts = sigactsinit(p1, 0);
2474: mutex_enter(p1->p_lock);
2475: p2->p_sflag |=
2476: (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP));
2477: sched_proc_fork(p1, p2);
2478: mutex_exit(p1->p_lock);
2479:
2480: p2->p_stflag = p1->p_stflag;
2481:
2482: /*
2483: * p_stats.
2484: * Copy parts of p_stats, and zero out the rest.
2485: */
2486: p2->p_stats = pstatscopy(p1->p_stats);
2487:
2488: /* copy over machdep flags to the new proc */
2489: cpu_proc_fork(p1, p2);
2490:
2491: /*
1.352 rmind 2492: * Prepare remaining parts of spawn data
1.337 martin 2493: */
1.348 martin 2494: spawn_data->sed_actions = fa;
2495: spawn_data->sed_attrs = sa;
1.352 rmind 2496:
1.337 martin 2497: spawn_data->sed_parent = p1;
2498:
1.352 rmind 2499: /* create LWP */
1.337 martin 2500: lwp_create(l1, p2, uaddr, 0, NULL, 0, spawn_return, spawn_data,
2501: &l2, l1->l_class);
2502: l2->l_ctxlink = NULL; /* reset ucontext link */
2503:
2504: /*
2505: * Copy the credential so other references don't see our changes.
2506: * Test to see if this is necessary first, since in the common case
2507: * we won't need a private reference.
2508: */
2509: if (kauth_cred_geteuid(l2->l_cred) != kauth_cred_getsvuid(l2->l_cred) ||
2510: kauth_cred_getegid(l2->l_cred) != kauth_cred_getsvgid(l2->l_cred)) {
2511: l2->l_cred = kauth_cred_copy(l2->l_cred);
2512: kauth_cred_setsvuid(l2->l_cred, kauth_cred_geteuid(l2->l_cred));
2513: kauth_cred_setsvgid(l2->l_cred, kauth_cred_getegid(l2->l_cred));
2514: }
2515:
2516: /* Update the master credentials. */
2517: if (l2->l_cred != p2->p_cred) {
2518: kauth_cred_t ocred;
2519:
2520: kauth_cred_hold(l2->l_cred);
2521: mutex_enter(p2->p_lock);
2522: ocred = p2->p_cred;
2523: p2->p_cred = l2->l_cred;
2524: mutex_exit(p2->p_lock);
2525: kauth_cred_free(ocred);
2526: }
2527:
1.352 rmind 2528: *child_ok = true;
2529: spawn_data->sed_refcnt = 2; /* child gets it as well */
1.348 martin 2530: #if 0
1.345 martin 2531: l2->l_nopreempt = 1; /* start it non-preemptable */
1.348 martin 2532: #endif
1.345 martin 2533:
1.337 martin 2534: /*
2535: * It's now safe for the scheduler and other processes to see the
2536: * child process.
2537: */
2538: mutex_enter(proc_lock);
2539:
2540: if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT)
2541: p2->p_lflag |= PL_CONTROLT;
2542:
2543: LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
2544: p2->p_exitsig = SIGCHLD; /* signal for parent on exit */
2545:
2546: LIST_INSERT_AFTER(p1, p2, p_pglist);
2547: LIST_INSERT_HEAD(&allproc, p2, p_list);
2548:
2549: p2->p_trace_enabled = trace_is_enabled(p2);
2550: #ifdef __HAVE_SYSCALL_INTERN
2551: (*p2->p_emul->e_syscall_intern)(p2);
2552: #endif
2553:
2554: /*
2555: * Make child runnable, set start time, and add to run queue except
2556: * if the parent requested the child to start in SSTOP state.
2557: */
2558: mutex_enter(p2->p_lock);
2559:
2560: getmicrotime(&p2->p_stats->p_start);
2561:
2562: lwp_lock(l2);
2563: KASSERT(p2->p_nrlwps == 1);
2564: p2->p_nrlwps = 1;
2565: p2->p_stat = SACTIVE;
2566: l2->l_stat = LSRUN;
2567: sched_enqueue(l2, false);
2568: lwp_unlock(l2);
2569:
2570: mutex_exit(p2->p_lock);
2571: mutex_exit(proc_lock);
2572:
2573: cv_wait(&spawn_data->sed_cv_child_ready, &spawn_data->sed_mtx_child);
1.348 martin 2574: error = spawn_data->sed_error;
1.337 martin 2575: mutex_exit(&spawn_data->sed_mtx_child);
1.352 rmind 2576: spawn_exec_data_release(spawn_data);
1.337 martin 2577:
1.341 martin 2578: rw_exit(&p1->p_reflock);
1.337 martin 2579: rw_exit(&exec_lock);
1.352 rmind 2580: have_exec_lock = false;
1.351 rmind 2581:
1.352 rmind 2582: *pid_res = pid;
2583: return error;
2584:
2585: error_exit:
1.376 maxv 2586: if (have_exec_lock) {
1.352 rmind 2587: execve_free_data(&spawn_data->sed_exec);
2588: rw_exit(&p1->p_reflock);
1.376 maxv 2589: rw_exit(&exec_lock);
1.352 rmind 2590: }
2591: mutex_exit(&spawn_data->sed_mtx_child);
1.351 rmind 2592: spawn_exec_data_release(spawn_data);
1.376 maxv 2593:
1.348 martin 2594: return error;
2595: }
1.337 martin 2596:
1.348 martin 2597: int
2598: sys_posix_spawn(struct lwp *l1, const struct sys_posix_spawn_args *uap,
2599: register_t *retval)
2600: {
2601: /* {
2602: syscallarg(pid_t *) pid;
2603: syscallarg(const char *) path;
2604: syscallarg(const struct posix_spawn_file_actions *) file_actions;
2605: syscallarg(const struct posix_spawnattr *) attrp;
2606: syscallarg(char *const *) argv;
2607: syscallarg(char *const *) envp;
2608: } */
2609:
2610: int error;
2611: struct posix_spawn_file_actions *fa = NULL;
2612: struct posix_spawnattr *sa = NULL;
2613: pid_t pid;
1.352 rmind 2614: bool child_ok = false;
1.373 martin 2615: rlim_t max_fileactions;
2616: proc_t *p = l1->l_proc;
1.348 martin 2617:
2618: error = check_posix_spawn(l1);
2619: if (error) {
2620: *retval = error;
2621: return 0;
2622: }
2623:
2624: /* copy in file_actions struct */
2625: if (SCARG(uap, file_actions) != NULL) {
1.373 martin 2626: max_fileactions = 2 * min(p->p_rlimit[RLIMIT_NOFILE].rlim_cur,
2627: maxfiles);
2628: error = posix_spawn_fa_alloc(&fa, SCARG(uap, file_actions),
2629: max_fileactions);
1.348 martin 2630: if (error)
1.352 rmind 2631: goto error_exit;
1.348 martin 2632: }
2633:
2634: /* copyin posix_spawnattr struct */
2635: if (SCARG(uap, attrp) != NULL) {
2636: sa = kmem_alloc(sizeof(*sa), KM_SLEEP);
2637: error = copyin(SCARG(uap, attrp), sa, sizeof(*sa));
2638: if (error)
1.352 rmind 2639: goto error_exit;
1.348 martin 2640: }
1.337 martin 2641:
1.348 martin 2642: /*
2643: * Do the spawn
2644: */
1.352 rmind 2645: error = do_posix_spawn(l1, &pid, &child_ok, SCARG(uap, path), fa, sa,
1.348 martin 2646: SCARG(uap, argv), SCARG(uap, envp), execve_fetch_element);
2647: if (error)
1.352 rmind 2648: goto error_exit;
1.337 martin 2649:
2650: if (error == 0 && SCARG(uap, pid) != NULL)
2651: error = copyout(&pid, SCARG(uap, pid), sizeof(pid));
2652:
2653: *retval = error;
2654: return 0;
2655:
1.352 rmind 2656: error_exit:
2657: if (!child_ok) {
2658: (void)chgproccnt(kauth_cred_getuid(l1->l_cred), -1);
2659: atomic_dec_uint(&nprocs);
2660:
2661: if (sa)
2662: kmem_free(sa, sizeof(*sa));
2663: if (fa)
2664: posix_spawn_fa_free(fa, fa->len);
2665: }
2666:
1.337 martin 2667: *retval = error;
2668: return 0;
2669: }
2670:
1.336 matt 2671: void
2672: exec_free_emul_arg(struct exec_package *epp)
2673: {
2674: if (epp->ep_emul_arg_free != NULL) {
2675: KASSERT(epp->ep_emul_arg != NULL);
2676: (*epp->ep_emul_arg_free)(epp->ep_emul_arg);
2677: epp->ep_emul_arg_free = NULL;
2678: epp->ep_emul_arg = NULL;
2679: } else {
2680: KASSERT(epp->ep_emul_arg == NULL);
2681: }
2682: }
1.388 uebayasi 2683:
2684: #ifdef DEBUG_EXEC
2685: static void
2686: dump_vmcmds(const struct exec_package * const epp, size_t x, int error)
2687: {
2688: struct exec_vmcmd *vp = &epp->ep_vmcmds.evs_cmds[0];
2689: size_t j;
2690:
2691: if (error == 0)
2692: DPRINTF(("vmcmds %u\n", epp->ep_vmcmds.evs_used));
2693: else
2694: DPRINTF(("vmcmds %zu/%u, error %d\n", x,
2695: epp->ep_vmcmds.evs_used, error));
2696:
2697: for (j = 0; j < epp->ep_vmcmds.evs_used; j++) {
2698: DPRINTF(("vmcmd[%zu] = vmcmd_map_%s %#"
2699: PRIxVADDR"/%#"PRIxVSIZE" fd@%#"
2700: PRIxVSIZE" prot=0%o flags=%d\n", j,
2701: vp[j].ev_proc == vmcmd_map_pagedvn ?
2702: "pagedvn" :
2703: vp[j].ev_proc == vmcmd_map_readvn ?
2704: "readvn" :
2705: vp[j].ev_proc == vmcmd_map_zero ?
2706: "zero" : "*unknown*",
2707: vp[j].ev_addr, vp[j].ev_len,
2708: vp[j].ev_offset, vp[j].ev_prot,
2709: vp[j].ev_flags));
2710: if (error != 0 && j == x)
2711: DPRINTF((" ^--- failed\n"));
2712: }
2713: }
2714: #endif
CVSweb <webmaster@jp.NetBSD.org>