Annotation of src/sys/kern/kern_exec.c, Revision 1.280.2.3
1.280.2.3! skrll 1: /* $NetBSD: kern_exec.c,v 1.280.2.2 2009/03/03 18:32:55 skrll Exp $ */
1.277 ad 2:
3: /*-
4: * Copyright (c) 2008 The NetBSD Foundation, Inc.
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26: * POSSIBILITY OF SUCH DAMAGE.
27: */
1.55 cgd 28:
29: /*-
1.77 cgd 30: * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
1.55 cgd 31: * Copyright (C) 1992 Wolfgang Solfrank.
32: * Copyright (C) 1992 TooLs GmbH.
33: * All rights reserved.
34: *
35: * Redistribution and use in source and binary forms, with or without
36: * modification, are permitted provided that the following conditions
37: * are met:
38: * 1. Redistributions of source code must retain the above copyright
39: * notice, this list of conditions and the following disclaimer.
40: * 2. Redistributions in binary form must reproduce the above copyright
41: * notice, this list of conditions and the following disclaimer in the
42: * documentation and/or other materials provided with the distribution.
43: * 3. All advertising materials mentioning features or use of this software
44: * must display the following acknowledgement:
45: * This product includes software developed by TooLs GmbH.
46: * 4. The name of TooLs GmbH may not be used to endorse or promote products
47: * derived from this software without specific prior written permission.
48: *
49: * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
50: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
51: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
52: * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
53: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
54: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
55: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
56: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
57: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
58: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59: */
1.146 lukem 60:
61: #include <sys/cdefs.h>
1.280.2.3! skrll 62: __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.280.2.2 2009/03/03 18:32:55 skrll Exp $");
1.89 mrg 63:
1.92 thorpej 64: #include "opt_ktrace.h"
1.280.2.2 skrll 65: #include "opt_modular.h"
1.124 jdolecek 66: #include "opt_syscall_debug.h"
1.226 dogcow 67: #include "veriexec.h"
1.232 elad 68: #include "opt_pax.h"
1.279 wrstuden 69: #include "opt_sa.h"
1.55 cgd 70:
71: #include <sys/param.h>
72: #include <sys/systm.h>
73: #include <sys/filedesc.h>
74: #include <sys/kernel.h>
75: #include <sys/proc.h>
76: #include <sys/mount.h>
77: #include <sys/malloc.h>
1.265 yamt 78: #include <sys/kmem.h>
1.55 cgd 79: #include <sys/namei.h>
80: #include <sys/vnode.h>
81: #include <sys/file.h>
82: #include <sys/acct.h>
83: #include <sys/exec.h>
84: #include <sys/ktrace.h>
1.278 pooka 85: #include <sys/uidinfo.h>
1.55 cgd 86: #include <sys/wait.h>
87: #include <sys/mman.h>
1.155 gmcgarry 88: #include <sys/ras.h>
1.55 cgd 89: #include <sys/signalvar.h>
90: #include <sys/stat.h>
1.124 jdolecek 91: #include <sys/syscall.h>
1.218 elad 92: #include <sys/kauth.h>
1.253 ad 93: #include <sys/lwpctl.h>
1.260 christos 94: #include <sys/pax.h>
1.263 ad 95: #include <sys/cpu.h>
1.280.2.1 skrll 96: #include <sys/module.h>
1.279 wrstuden 97: #include <sys/sa.h>
98: #include <sys/savar.h>
1.56 cgd 99: #include <sys/syscallargs.h>
1.222 elad 100: #if NVERIEXEC > 0
1.197 blymn 101: #include <sys/verified_exec.h>
1.222 elad 102: #endif /* NVERIEXEC > 0 */
1.55 cgd 103:
1.88 mrg 104: #include <uvm/uvm_extern.h>
105:
1.55 cgd 106: #include <machine/reg.h>
107:
1.244 dsl 108: #include <compat/common/compat_util.h>
109:
1.171 chs 110: static int exec_sigcode_map(struct proc *, const struct emul *);
111:
1.143 christos 112: #ifdef DEBUG_EXEC
113: #define DPRINTF(a) uprintf a
114: #else
115: #define DPRINTF(a)
116: #endif /* DEBUG_EXEC */
1.165 thorpej 117:
1.130 jdolecek 118: /*
119: * Exec function switch:
120: *
121: * Note that each makecmds function is responsible for loading the
122: * exec package with the necessary functions for any exec-type-specific
123: * handling.
124: *
125: * Functions for specific exec types should be defined in their own
126: * header file.
127: */
1.138 lukem 128: static const struct execsw **execsw = NULL;
129: static int nexecs;
130:
1.280.2.1 skrll 131: u_int exec_maxhdrsz; /* must not be static - used by netbsd32 */
1.130 jdolecek 132:
133: /* list of dynamically loaded execsw entries */
1.280.2.1 skrll 134: static LIST_HEAD(execlist_head, exec_entry) ex_head =
135: LIST_HEAD_INITIALIZER(ex_head);
1.130 jdolecek 136: struct exec_entry {
1.138 lukem 137: LIST_ENTRY(exec_entry) ex_list;
1.280.2.1 skrll 138: SLIST_ENTRY(exec_entry) ex_slist;
139: const struct execsw *ex_sw;
1.130 jdolecek 140: };
141:
1.124 jdolecek 142: #ifdef SYSCALL_DEBUG
143: extern const char * const syscallnames[];
144: #endif
145:
1.203 christos 146: #ifndef __HAVE_SYSCALL_INTERN
147: void syscall(void);
148: #endif
149:
1.280 matt 150: #ifdef KERN_SA
1.280.2.1 skrll 151: static struct sa_emul saemul_netbsd = {
1.279 wrstuden 152: sizeof(ucontext_t),
153: sizeof(struct sa_t),
154: sizeof(struct sa_t *),
155: NULL,
156: NULL,
157: cpu_upcall,
158: (void (*)(struct lwp *, void *))getucontext_sa,
159: sa_ucsp
160: };
1.280 matt 161: #endif /* KERN_SA */
1.279 wrstuden 162:
1.173 christos 163: /* NetBSD emul struct */
1.280.2.1 skrll 164: struct emul emul_netbsd = {
1.124 jdolecek 165: "netbsd",
1.127 jdolecek 166: NULL, /* emulation path */
1.133 mycroft 167: #ifndef __HAVE_MINIMAL_EMUL
1.140 manu 168: EMUL_HAS_SYS___syscall,
1.124 jdolecek 169: NULL,
170: SYS_syscall,
1.161 jdolecek 171: SYS_NSYSENT,
1.133 mycroft 172: #endif
1.124 jdolecek 173: sysent,
174: #ifdef SYSCALL_DEBUG
175: syscallnames,
176: #else
177: NULL,
178: #endif
1.133 mycroft 179: sendsig,
1.142 christos 180: trapsignal,
1.180 fvdl 181: NULL,
1.173 christos 182: NULL,
183: NULL,
184: NULL,
1.145 jdolecek 185: setregs,
1.128 jdolecek 186: NULL,
187: NULL,
188: NULL,
1.179 manu 189: NULL,
190: NULL,
1.133 mycroft 191: #ifdef __HAVE_SYSCALL_INTERN
192: syscall_intern,
193: #else
194: syscall,
195: #endif
1.156 manu 196: NULL,
197: NULL,
1.195 fvdl 198:
199: uvm_default_mapaddr,
1.216 cube 200: NULL,
1.280 matt 201: #ifdef KERN_SA
1.279 wrstuden 202: &saemul_netbsd,
1.280 matt 203: #else
204: NULL,
205: #endif
1.237 ad 206: sizeof(ucontext_t),
1.239 cube 207: startlwp,
1.124 jdolecek 208: };
209:
1.55 cgd 210: /*
1.130 jdolecek 211: * Exec lock. Used to control access to execsw[] structures.
212: * This must not be static so that netbsd32 can access it, too.
213: */
1.237 ad 214: krwlock_t exec_lock;
1.183 junyoung 215:
1.259 ad 216: static kmutex_t sigobject_lock;
217:
1.277 ad 218: static void *
219: exec_pool_alloc(struct pool *pp, int flags)
220: {
221:
222: return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
223: UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
224: }
225:
226: static void
227: exec_pool_free(struct pool *pp, void *addr)
228: {
229:
230: uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
231: }
232:
233: static struct pool exec_pool;
234:
235: static struct pool_allocator exec_palloc = {
236: .pa_alloc = exec_pool_alloc,
237: .pa_free = exec_pool_free,
238: .pa_pagesz = NCARGS
239: };
240:
1.130 jdolecek 241: /*
1.55 cgd 242: * check exec:
243: * given an "executable" described in the exec package's namei info,
244: * see what we can do with it.
245: *
246: * ON ENTRY:
247: * exec package with appropriate namei info
1.212 christos 248: * lwp pointer of exec'ing lwp
1.55 cgd 249: * NO SELF-LOCKED VNODES
250: *
251: * ON EXIT:
252: * error: nothing held, etc. exec header still allocated.
1.77 cgd 253: * ok: filled exec package, executable's vnode (unlocked).
1.55 cgd 254: *
255: * EXEC SWITCH ENTRY:
256: * Locked vnode to check, exec package, proc.
257: *
258: * EXEC SWITCH EXIT:
1.77 cgd 259: * ok: return 0, filled exec package, executable's vnode (unlocked).
1.55 cgd 260: * error: destructive:
261: * everything deallocated execept exec header.
1.76 cgd 262: * non-destructive:
1.77 cgd 263: * error code, executable's vnode (unlocked),
1.76 cgd 264: * exec header unmodified.
1.55 cgd 265: */
266: int
1.205 christos 267: /*ARGSUSED*/
1.233 elad 268: check_exec(struct lwp *l, struct exec_package *epp)
1.55 cgd 269: {
1.138 lukem 270: int error, i;
271: struct vnode *vp;
1.55 cgd 272: struct nameidata *ndp;
1.138 lukem 273: size_t resid;
1.55 cgd 274:
275: ndp = epp->ep_ndp;
276: ndp->ni_cnd.cn_nameiop = LOOKUP;
1.244 dsl 277: ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | SAVENAME | TRYEMULROOT;
1.55 cgd 278: /* first get the vnode */
1.74 christos 279: if ((error = namei(ndp)) != 0)
1.55 cgd 280: return error;
281: epp->ep_vp = vp = ndp->ni_vp;
282:
1.84 mycroft 283: /* check access and type */
1.55 cgd 284: if (vp->v_type != VREG) {
1.81 kleink 285: error = EACCES;
1.55 cgd 286: goto bad1;
287: }
1.254 pooka 288: if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
1.84 mycroft 289: goto bad1;
1.55 cgd 290:
291: /* get attributes */
1.254 pooka 292: if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
1.55 cgd 293: goto bad1;
294:
295: /* Check mount point */
296: if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
297: error = EACCES;
298: goto bad1;
299: }
1.141 thorpej 300: if (vp->v_mount->mnt_flag & MNT_NOSUID)
1.83 mycroft 301: epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
1.55 cgd 302:
303: /* try to open it */
1.254 pooka 304: if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
1.55 cgd 305: goto bad1;
306:
1.99 wrstuden 307: /* unlock vp, since we need it unlocked from here on out. */
1.90 fvdl 308: VOP_UNLOCK(vp, 0);
1.77 cgd 309:
1.222 elad 310: #if NVERIEXEC > 0
1.236 elad 311: error = veriexec_verify(l, vp, ndp->ni_cnd.cn_pnbuf,
1.233 elad 312: epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
1.236 elad 313: NULL);
314: if (error)
1.234 elad 315: goto bad2;
1.222 elad 316: #endif /* NVERIEXEC > 0 */
1.160 blymn 317:
1.232 elad 318: #ifdef PAX_SEGVGUARD
1.240 thorpej 319: error = pax_segvguard(l, vp, ndp->ni_cnd.cn_pnbuf, false);
1.234 elad 320: if (error)
321: goto bad2;
1.232 elad 322: #endif /* PAX_SEGVGUARD */
323:
1.55 cgd 324: /* now we have the file, get the exec header */
1.74 christos 325: error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
1.223 ad 326: UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
1.74 christos 327: if (error)
1.55 cgd 328: goto bad2;
329: epp->ep_hdrvalid = epp->ep_hdrlen - resid;
330:
331: /*
1.136 eeh 332: * Set up default address space limits. Can be overridden
333: * by individual exec packages.
1.183 junyoung 334: *
1.235 rillig 335: * XXX probably should be all done in the exec packages.
1.136 eeh 336: */
337: epp->ep_vm_minaddr = VM_MIN_ADDRESS;
338: epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
339: /*
1.55 cgd 340: * set up the vmcmds for creation of the process
341: * address space
342: */
343: error = ENOEXEC;
1.244 dsl 344: for (i = 0; i < nexecs; i++) {
1.68 cgd 345: int newerror;
346:
1.130 jdolecek 347: epp->ep_esch = execsw[i];
1.212 christos 348: newerror = (*execsw[i]->es_makecmds)(l, epp);
1.244 dsl 349:
350: if (!newerror) {
351: /* Seems ok: check that entry point is sane */
352: if (epp->ep_entry > VM_MAXUSER_ADDRESS) {
353: error = ENOEXEC;
354: break;
355: }
356:
357: /* check limits */
358: if ((epp->ep_tsize > MAXTSIZ) ||
359: (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
360: [RLIMIT_DATA].rlim_cur)) {
361: error = ENOMEM;
362: break;
363: }
364: return 0;
365: }
366:
367: if (epp->ep_emul_root != NULL) {
368: vrele(epp->ep_emul_root);
369: epp->ep_emul_root = NULL;
370: }
371: if (epp->ep_interp != NULL) {
372: vrele(epp->ep_interp);
373: epp->ep_interp = NULL;
374: }
375:
1.68 cgd 376: /* make sure the first "interesting" error code is saved. */
1.244 dsl 377: if (error == ENOEXEC)
1.68 cgd 378: error = newerror;
1.124 jdolecek 379:
1.244 dsl 380: if (epp->ep_flags & EXEC_DESTR)
381: /* Error from "#!" code, tidied up by recursive call */
1.55 cgd 382: return error;
383: }
384:
1.249 pooka 385: /* not found, error */
386:
1.55 cgd 387: /*
388: * free any vmspace-creation commands,
389: * and release their references
390: */
391: kill_vmcmds(&epp->ep_vmcmds);
392:
393: bad2:
394: /*
1.99 wrstuden 395: * close and release the vnode, restore the old one, free the
1.55 cgd 396: * pathname buf, and punt.
397: */
1.99 wrstuden 398: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.254 pooka 399: VOP_CLOSE(vp, FREAD, l->l_cred);
1.99 wrstuden 400: vput(vp);
1.120 thorpej 401: PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
1.55 cgd 402: return error;
403:
404: bad1:
405: /*
406: * free the namei pathname buffer, and put the vnode
407: * (which we don't yet have open).
408: */
1.77 cgd 409: vput(vp); /* was still locked */
1.120 thorpej 410: PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
1.55 cgd 411: return error;
412: }
413:
1.188 chs 414: #ifdef __MACHINE_STACK_GROWS_UP
415: #define STACK_PTHREADSPACE NBPG
416: #else
417: #define STACK_PTHREADSPACE 0
418: #endif
419:
1.204 cube 420: static int
421: execve_fetch_element(char * const *array, size_t index, char **value)
422: {
423: return copyin(array + index, value, sizeof(*value));
424: }
425:
1.55 cgd 426: /*
427: * exec system call
428: */
429: /* ARGSUSED */
1.75 christos 430: int
1.258 dsl 431: sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
1.71 thorpej 432: {
1.258 dsl 433: /* {
1.138 lukem 434: syscallarg(const char *) path;
435: syscallarg(char * const *) argp;
436: syscallarg(char * const *) envp;
1.258 dsl 437: } */
1.204 cube 438:
439: return execve1(l, SCARG(uap, path), SCARG(uap, argp),
440: SCARG(uap, envp), execve_fetch_element);
441: }
442:
1.280.2.1 skrll 443: /*
444: * Load modules to try and execute an image that we do not understand.
445: * If no execsw entries are present, we load those likely to be needed
446: * in order to run native images only. Otherwise, we autoload all
447: * possible modules that could let us run the binary. XXX lame
448: */
449: static void
450: exec_autoload(void)
451: {
452: #ifdef MODULAR
453: static const char * const native[] = {
454: "exec_elf32",
455: "exec_elf64",
456: "exec_script",
457: NULL
458: };
459: static const char * const compat[] = {
460: "exec_elf32",
461: "exec_elf64",
462: "exec_script",
463: "exec_aout",
464: "exec_coff",
465: "exec_ecoff",
466: "compat_aoutm68k",
467: "compat_freebsd",
468: "compat_ibcs2",
469: "compat_irix",
470: "compat_linux",
471: "compat_linux32",
472: "compat_netbsd32",
473: "compat_sunos",
474: "compat_sunos32",
475: "compat_svr4",
476: "compat_svr4_32",
477: "compat_ultrix",
478: NULL
479: };
480: char const * const *list;
481: int i;
482:
483: mutex_enter(&module_lock);
484: list = (nexecs == 0 ? native : compat);
485: for (i = 0; list[i] != NULL; i++) {
486: if (module_autoload(list[i], MODULE_CLASS_MISC) != 0) {
487: continue;
488: }
489: mutex_exit(&module_lock);
490: yield();
491: mutex_enter(&module_lock);
492: }
493: mutex_exit(&module_lock);
494: #endif
495: }
496:
1.204 cube 497: int
498: execve1(struct lwp *l, const char *path, char * const *args,
499: char * const *envs, execve_fetch_element_t fetch_element)
500: {
1.153 thorpej 501: int error;
1.138 lukem 502: struct exec_package pack;
503: struct nameidata nid;
504: struct vattr attr;
1.164 thorpej 505: struct proc *p;
1.138 lukem 506: char *argp;
507: char *dp, *sp;
508: long argc, envc;
1.248 christos 509: size_t i, len;
1.138 lukem 510: char *stack;
511: struct ps_strings arginfo;
1.213 manu 512: struct ps_strings *aip = &arginfo;
1.138 lukem 513: struct vmspace *vm;
1.265 yamt 514: struct exec_fakearg *tmpfap;
1.138 lukem 515: int szsigcode;
516: struct exec_vmcmd *base_vcp;
1.279 wrstuden 517: int oldlwpflags;
1.237 ad 518: ksiginfo_t ksi;
519: ksiginfoq_t kq;
1.260 christos 520: char *pathbuf;
1.255 christos 521: size_t pathbuflen;
1.280.2.1 skrll 522: u_int modgen;
1.55 cgd 523:
1.237 ad 524: p = l->l_proc;
1.280.2.1 skrll 525: modgen = 0;
1.164 thorpej 526:
1.149 christos 527: /*
1.269 christos 528: * Check if we have exceeded our number of processes limit.
529: * This is so that we handle the case where a root daemon
530: * forked, ran setuid to become the desired user and is trying
531: * to exec. The obvious place to do the reference counting check
532: * is setuid(), but we don't do the reference counting check there
533: * like other OS's do because then all the programs that use setuid()
534: * must be modified to check the return code of setuid() and exit().
535: * It is dangerous to make setuid() fail, because it fails open and
536: * the program will continue to run as root. If we make it succeed
537: * and return an error code, again we are not enforcing the limit.
538: * The best place to enforce the limit is here, when the process tries
539: * to execute a new image, because eventually the process will need
540: * to call exec in order to do something useful.
541: */
1.280.2.1 skrll 542: retry:
1.280.2.3! skrll 543: if ((p->p_flag & PK_SUGID) && kauth_authorize_generic(l->l_cred,
! 544: KAUTH_GENERIC_ISSUSER, NULL) != 0 && chgproccnt(kauth_cred_getuid(
! 545: l->l_cred), 0) > p->p_rlimit[RLIMIT_NPROC].rlim_cur)
1.269 christos 546: return EAGAIN;
547:
1.279 wrstuden 548: oldlwpflags = l->l_flag & (LW_SA | LW_SA_UPCALL);
549: if (l->l_flag & LW_SA) {
550: lwp_lock(l);
551: l->l_flag &= ~(LW_SA | LW_SA_UPCALL);
552: lwp_unlock(l);
553: }
554:
1.269 christos 555: /*
1.237 ad 556: * Drain existing references and forbid new ones. The process
557: * should be left alone until we're done here. This is necessary
558: * to avoid race conditions - e.g. in ptrace() - that might allow
559: * a local user to illicitly obtain elevated privileges.
560: */
1.252 ad 561: rw_enter(&p->p_reflock, RW_WRITER);
1.149 christos 562:
1.138 lukem 563: base_vcp = NULL;
1.55 cgd 564: /*
1.129 jdolecek 565: * Init the namei data to point the file user's program name.
566: * This is done here rather than in check_exec(), so that it's
567: * possible to override this settings if any of makecmd/probe
568: * functions call check_exec() recursively - for example,
569: * see exec_script_makecmds().
570: */
1.260 christos 571: pathbuf = PNBUF_GET();
572: error = copyinstr(path, pathbuf, MAXPATHLEN, &pathbuflen);
1.248 christos 573: if (error) {
574: DPRINTF(("execve: copyinstr path %d", error));
1.200 elad 575: goto clrflg;
1.248 christos 576: }
1.200 elad 577:
1.257 pooka 578: NDINIT(&nid, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_SYSSPACE, pathbuf);
1.55 cgd 579:
580: /*
581: * initialize the fields of the exec package.
582: */
1.204 cube 583: pack.ep_name = path;
1.265 yamt 584: pack.ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
1.55 cgd 585: pack.ep_hdrlen = exec_maxhdrsz;
586: pack.ep_hdrvalid = 0;
587: pack.ep_ndp = &nid;
1.67 christos 588: pack.ep_emul_arg = NULL;
1.55 cgd 589: pack.ep_vmcmds.evs_cnt = 0;
590: pack.ep_vmcmds.evs_used = 0;
591: pack.ep_vap = &attr;
592: pack.ep_flags = 0;
1.244 dsl 593: pack.ep_emul_root = NULL;
594: pack.ep_interp = NULL;
595: pack.ep_esch = NULL;
1.273 ad 596: pack.ep_pax_flags = 0;
1.55 cgd 597:
1.237 ad 598: rw_enter(&exec_lock, RW_READER);
1.130 jdolecek 599:
1.55 cgd 600: /* see if we can run it. */
1.248 christos 601: if ((error = check_exec(l, &pack)) != 0) {
1.261 xtraeme 602: if (error != ENOENT) {
1.260 christos 603: DPRINTF(("execve: check exec failed %d\n", error));
1.261 xtraeme 604: }
1.55 cgd 605: goto freehdr;
1.248 christos 606: }
1.55 cgd 607:
608: /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
609:
610: /* allocate an argument buffer */
1.277 ad 611: argp = pool_get(&exec_pool, PR_WAITOK);
612: KASSERT(argp != NULL);
1.55 cgd 613: dp = argp;
614: argc = 0;
615:
616: /* copy the fake args list, if there's one, freeing it as we go */
617: if (pack.ep_flags & EXEC_HASARGL) {
618: tmpfap = pack.ep_fa;
1.265 yamt 619: while (tmpfap->fa_arg != NULL) {
620: const char *cp;
1.55 cgd 621:
1.265 yamt 622: cp = tmpfap->fa_arg;
1.55 cgd 623: while (*cp)
624: *dp++ = *cp++;
1.276 ad 625: *dp++ = '\0';
1.55 cgd 626:
1.265 yamt 627: kmem_free(tmpfap->fa_arg, tmpfap->fa_len);
1.55 cgd 628: tmpfap++; argc++;
629: }
1.265 yamt 630: kmem_free(pack.ep_fa, pack.ep_fa_len);
1.55 cgd 631: pack.ep_flags &= ~EXEC_HASARGL;
632: }
633:
634: /* Now get argv & environment */
1.204 cube 635: if (args == NULL) {
1.248 christos 636: DPRINTF(("execve: null args\n"));
1.55 cgd 637: error = EINVAL;
638: goto bad;
639: }
1.204 cube 640: /* 'i' will index the argp/envp element to be retrieved */
641: i = 0;
1.55 cgd 642: if (pack.ep_flags & EXEC_SKIPARG)
1.204 cube 643: i++;
1.55 cgd 644:
645: while (1) {
646: len = argp + ARG_MAX - dp;
1.248 christos 647: if ((error = (*fetch_element)(args, i, &sp)) != 0) {
648: DPRINTF(("execve: fetch_element args %d\n", error));
1.55 cgd 649: goto bad;
1.248 christos 650: }
1.55 cgd 651: if (!sp)
652: break;
1.74 christos 653: if ((error = copyinstr(sp, dp, len, &len)) != 0) {
1.248 christos 654: DPRINTF(("execve: copyinstr args %d\n", error));
1.55 cgd 655: if (error == ENAMETOOLONG)
656: error = E2BIG;
657: goto bad;
658: }
1.247 ad 659: ktrexecarg(dp, len - 1);
1.55 cgd 660: dp += len;
1.204 cube 661: i++;
1.55 cgd 662: argc++;
663: }
664:
665: envc = 0;
1.74 christos 666: /* environment need not be there */
1.204 cube 667: if (envs != NULL) {
668: i = 0;
1.55 cgd 669: while (1) {
670: len = argp + ARG_MAX - dp;
1.248 christos 671: if ((error = (*fetch_element)(envs, i, &sp)) != 0) {
672: DPRINTF(("execve: fetch_element env %d\n", error));
1.55 cgd 673: goto bad;
1.248 christos 674: }
1.55 cgd 675: if (!sp)
676: break;
1.74 christos 677: if ((error = copyinstr(sp, dp, len, &len)) != 0) {
1.248 christos 678: DPRINTF(("execve: copyinstr env %d\n", error));
1.55 cgd 679: if (error == ENAMETOOLONG)
680: error = E2BIG;
681: goto bad;
682: }
1.247 ad 683: ktrexecenv(dp, len - 1);
1.55 cgd 684: dp += len;
1.204 cube 685: i++;
1.55 cgd 686: envc++;
687: }
688: }
1.61 mycroft 689:
690: dp = (char *) ALIGN(dp);
1.55 cgd 691:
1.244 dsl 692: szsigcode = pack.ep_esch->es_emul->e_esigcode -
693: pack.ep_esch->es_emul->e_sigcode;
1.65 fvdl 694:
1.267 dsl 695: #ifdef __MACHINE_STACK_GROWS_UP
696: /* See big comment lower down */
697: #define RTLD_GAP 32
698: #else
699: #define RTLD_GAP 0
700: #endif
701:
1.55 cgd 702: /* Now check if args & environ fit into new stack */
1.105 eeh 703: if (pack.ep_flags & EXEC_32)
1.244 dsl 704: len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
1.267 dsl 705: sizeof(int) + sizeof(int) + dp + RTLD_GAP +
1.188 chs 706: szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
707: - argp;
1.105 eeh 708: else
1.244 dsl 709: len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
1.267 dsl 710: sizeof(char *) + sizeof(int) + dp + RTLD_GAP +
1.188 chs 711: szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
712: - argp;
1.67 christos 713:
1.262 elad 714: #ifdef PAX_ASLR
715: if (pax_aslr_active(l))
716: len += (arc4random() % PAGE_SIZE);
717: #endif /* PAX_ASLR */
718:
1.243 matt 719: #ifdef STACKLALIGN /* arm, etc. */
720: len = STACKALIGN(len); /* make the stack "safely" aligned */
721: #else
1.55 cgd 722: len = ALIGN(len); /* make the stack "safely" aligned */
1.243 matt 723: #endif
1.55 cgd 724:
725: if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
1.248 christos 726: DPRINTF(("execve: stack limit exceeded %zu\n", len));
1.55 cgd 727: error = ENOMEM;
728: goto bad;
729: }
730:
1.237 ad 731: /* Get rid of other LWPs. */
1.279 wrstuden 732: if (p->p_sa || p->p_nlwps > 1) {
1.272 ad 733: mutex_enter(p->p_lock);
1.237 ad 734: exit_lwps(l);
1.272 ad 735: mutex_exit(p->p_lock);
1.237 ad 736: }
1.164 thorpej 737: KDASSERT(p->p_nlwps == 1);
738:
1.253 ad 739: /* Destroy any lwpctl info. */
740: if (p->p_lwpctl != NULL)
741: lwp_ctl_exit();
742:
1.164 thorpej 743: /* This is now LWP 1 */
744: l->l_lid = 1;
745: p->p_nlwpid = 1;
746:
1.279 wrstuden 747: #ifdef KERN_SA
748: /* Release any SA state. */
749: if (p->p_sa)
750: sa_release(p);
751: #endif /* KERN_SA */
752:
1.164 thorpej 753: /* Remove POSIX timers */
754: timers_free(p, TIMERS_POSIX);
755:
1.55 cgd 756: /* adjust "active stack depth" for process VSZ */
757: pack.ep_ssize = len; /* maybe should go elsewhere, but... */
758:
1.86 thorpej 759: /*
760: * Do whatever is necessary to prepare the address space
761: * for remapping. Note that this might replace the current
762: * vmspace with another!
763: */
1.164 thorpej 764: uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
1.55 cgd 765:
1.186 chs 766: /* record proc's vnode, for use by procfs and others */
767: if (p->p_textvp)
768: vrele(p->p_textvp);
769: VREF(pack.ep_vp);
770: p->p_textvp = pack.ep_vp;
771:
1.55 cgd 772: /* Now map address space */
1.86 thorpej 773: vm = p->p_vmspace;
1.241 dogcow 774: vm->vm_taddr = (void *)pack.ep_taddr;
1.55 cgd 775: vm->vm_tsize = btoc(pack.ep_tsize);
1.241 dogcow 776: vm->vm_daddr = (void*)pack.ep_daddr;
1.55 cgd 777: vm->vm_dsize = btoc(pack.ep_dsize);
778: vm->vm_ssize = btoc(pack.ep_ssize);
1.280.2.3! skrll 779: vm->vm_issize = 0;
1.241 dogcow 780: vm->vm_maxsaddr = (void *)pack.ep_maxsaddr;
781: vm->vm_minsaddr = (void *)pack.ep_minsaddr;
1.55 cgd 782:
1.260 christos 783: #ifdef PAX_ASLR
784: pax_aslr_init(l, vm);
785: #endif /* PAX_ASLR */
786:
1.55 cgd 787: /* create the new process's VM space by running the vmcmds */
788: #ifdef DIAGNOSTIC
789: if (pack.ep_vmcmds.evs_used == 0)
790: panic("execve: no vmcmds");
791: #endif
792: for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
793: struct exec_vmcmd *vcp;
794:
795: vcp = &pack.ep_vmcmds.evs_cmds[i];
1.114 matt 796: if (vcp->ev_flags & VMCMD_RELATIVE) {
797: #ifdef DIAGNOSTIC
798: if (base_vcp == NULL)
799: panic("execve: relative vmcmd with no base");
800: if (vcp->ev_flags & VMCMD_BASE)
801: panic("execve: illegal base & relative vmcmd");
802: #endif
803: vcp->ev_addr += base_vcp->ev_addr;
804: }
1.212 christos 805: error = (*vcp->ev_proc)(l, vcp);
1.143 christos 806: #ifdef DEBUG_EXEC
1.111 matt 807: if (error) {
1.248 christos 808: size_t j;
1.143 christos 809: struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
810: for (j = 0; j <= i; j++)
811: uprintf(
1.248 christos 812: "vmcmd[%zu] = %#lx/%#lx fd@%#lx prot=0%o flags=%d\n",
1.143 christos 813: j, vp[j].ev_addr, vp[j].ev_len,
814: vp[j].ev_offset, vp[j].ev_prot,
815: vp[j].ev_flags);
1.111 matt 816: }
1.143 christos 817: #endif /* DEBUG_EXEC */
1.114 matt 818: if (vcp->ev_flags & VMCMD_BASE)
819: base_vcp = vcp;
1.55 cgd 820: }
821:
822: /* free the vmspace-creation commands, and release their references */
823: kill_vmcmds(&pack.ep_vmcmds);
824:
1.186 chs 825: vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
1.254 pooka 826: VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
1.186 chs 827: vput(pack.ep_vp);
828:
1.55 cgd 829: /* if an error happened, deallocate and punt */
1.111 matt 830: if (error) {
1.248 christos 831: DPRINTF(("execve: vmcmd %zu failed: %d\n", i - 1, error));
1.55 cgd 832: goto exec_abort;
1.111 matt 833: }
1.55 cgd 834:
835: /* remember information about the process */
836: arginfo.ps_nargvstr = argc;
837: arginfo.ps_nenvstr = envc;
838:
1.255 christos 839: /* set command name & other accounting info */
840: i = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
841: (void)memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, i);
842: p->p_comm[i] = '\0';
843:
844: dp = PNBUF_GET();
845: /*
846: * If the path starts with /, we don't need to do any work.
847: * This handles the majority of the cases.
848: * In the future perhaps we could canonicalize it?
849: */
850: if (pathbuf[0] == '/')
851: (void)strlcpy(pack.ep_path = dp, pathbuf, MAXPATHLEN);
852: #ifdef notyet
853: /*
854: * Although this works most of the time [since the entry was just
855: * entered in the cache] we don't use it because it theoretically
856: * can fail and it is not the cleanest interface, because there
857: * could be races. When the namei cache is re-written, this can
858: * be changed to use the appropriate function.
859: */
860: else if (!(error = vnode_to_path(dp, MAXPATHLEN, p->p_textvp, l, p)))
861: pack.ep_path = dp;
862: #endif
863: else {
1.256 christos 864: #ifdef notyet
1.255 christos 865: printf("Cannot get path for pid %d [%s] (error %d)",
866: (int)p->p_pid, p->p_comm, error);
867: #endif
868: pack.ep_path = NULL;
869: PNBUF_PUT(dp);
870: }
871:
1.163 chs 872: stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
1.188 chs 873: STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
1.163 chs 874: len - (sizeof(struct ps_strings) + szsigcode));
1.267 dsl 875:
1.163 chs 876: #ifdef __MACHINE_STACK_GROWS_UP
877: /*
878: * The copyargs call always copies into lower addresses
879: * first, moving towards higher addresses, starting with
1.183 junyoung 880: * the stack pointer that we give. When the stack grows
881: * down, this puts argc/argv/envp very shallow on the
1.267 dsl 882: * stack, right at the first user stack pointer.
883: * When the stack grows up, the situation is reversed.
1.163 chs 884: *
885: * Normally, this is no big deal. But the ld_elf.so _rtld()
1.183 junyoung 886: * function expects to be called with a single pointer to
887: * a region that has a few words it can stash values into,
1.163 chs 888: * followed by argc/argv/envp. When the stack grows down,
889: * it's easy to decrement the stack pointer a little bit to
890: * allocate the space for these few words and pass the new
891: * stack pointer to _rtld. When the stack grows up, however,
1.171 chs 892: * a few words before argc is part of the signal trampoline, XXX
1.163 chs 893: * so we have a problem.
894: *
1.183 junyoung 895: * Instead of changing how _rtld works, we take the easy way
1.267 dsl 896: * out and steal 32 bytes before we call copyargs.
897: * This extra space was allowed for when 'len' was calculated.
1.163 chs 898: */
1.267 dsl 899: stack += RTLD_GAP;
1.163 chs 900: #endif /* __MACHINE_STACK_GROWS_UP */
901:
1.55 cgd 902: /* Now copy argc, args & environ to new stack */
1.244 dsl 903: error = (*pack.ep_esch->es_copyargs)(l, &pack, &arginfo, &stack, argp);
1.255 christos 904: if (pack.ep_path) {
905: PNBUF_PUT(pack.ep_path);
906: pack.ep_path = NULL;
907: }
1.144 christos 908: if (error) {
909: DPRINTF(("execve: copyargs failed %d\n", error));
1.55 cgd 910: goto exec_abort;
1.111 matt 911: }
1.144 christos 912: /* Move the stack back to original point */
1.163 chs 913: stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
1.55 cgd 914:
1.121 eeh 915: /* fill process ps_strings info */
1.188 chs 916: p->p_psstr = (struct ps_strings *)
917: STACK_ALLOC(STACK_GROW(vm->vm_minsaddr, STACK_PTHREADSPACE),
1.163 chs 918: sizeof(struct ps_strings));
1.121 eeh 919: p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
920: p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
921: p->p_psenv = offsetof(struct ps_strings, ps_envstr);
922: p->p_psnenv = offsetof(struct ps_strings, ps_nenvstr);
923:
1.55 cgd 924: /* copy out the process's ps_strings structure */
1.213 manu 925: if ((error = copyout(aip, (char *)p->p_psstr,
1.144 christos 926: sizeof(arginfo))) != 0) {
1.143 christos 927: DPRINTF(("execve: ps_strings copyout %p->%p size %ld failed\n",
1.213 manu 928: aip, (char *)p->p_psstr, (long)sizeof(arginfo)));
1.55 cgd 929: goto exec_abort;
1.111 matt 930: }
1.109 simonb 931:
1.270 ad 932: fd_closeexec(); /* handle close on exec */
1.55 cgd 933: execsigs(p); /* reset catched signals */
1.183 junyoung 934:
1.164 thorpej 935: l->l_ctxlink = NULL; /* reset ucontext link */
1.55 cgd 936:
1.255 christos 937:
1.55 cgd 938: p->p_acflag &= ~AFORK;
1.272 ad 939: mutex_enter(p->p_lock);
1.238 pavel 940: p->p_flag |= PK_EXEC;
1.272 ad 941: mutex_exit(p->p_lock);
1.237 ad 942:
943: /*
944: * Stop profiling.
945: */
946: if ((p->p_stflag & PST_PROFIL) != 0) {
947: mutex_spin_enter(&p->p_stmutex);
948: stopprofclock(p);
949: mutex_spin_exit(&p->p_stmutex);
950: }
951:
952: /*
1.275 ad 953: * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
1.237 ad 954: * exited and exec()/exit() are the only places it will be cleared.
955: */
1.275 ad 956: if ((p->p_lflag & PL_PPWAIT) != 0) {
1.271 ad 957: mutex_enter(proc_lock);
1.275 ad 958: p->p_lflag &= ~PL_PPWAIT;
1.237 ad 959: cv_broadcast(&p->p_pptr->p_waitcv);
1.271 ad 960: mutex_exit(proc_lock);
1.55 cgd 961: }
962:
963: /*
1.237 ad 964: * Deal with set[ug]id. MNT_NOSUID has already been used to disable
965: * s[ug]id. It's OK to check for PSL_TRACED here as we have blocked
966: * out additional references on the process for the moment.
1.55 cgd 967: */
1.237 ad 968: if ((p->p_slflag & PSL_TRACED) == 0 &&
1.141 thorpej 969:
970: (((attr.va_mode & S_ISUID) != 0 &&
1.221 ad 971: kauth_cred_geteuid(l->l_cred) != attr.va_uid) ||
1.141 thorpej 972:
973: ((attr.va_mode & S_ISGID) != 0 &&
1.221 ad 974: kauth_cred_getegid(l->l_cred) != attr.va_gid))) {
1.141 thorpej 975: /*
976: * Mark the process as SUGID before we do
977: * anything that might block.
978: */
1.237 ad 979: proc_crmod_enter();
1.240 thorpej 980: proc_crmod_leave(NULL, NULL, true);
1.152 christos 981:
982: /* Make sure file descriptors 0..2 are in use. */
1.270 ad 983: if ((error = fd_checkstd()) != 0) {
1.209 christos 984: DPRINTF(("execve: fdcheckstd failed %d\n", error));
1.152 christos 985: goto exec_abort;
1.209 christos 986: }
1.141 thorpej 987:
1.220 ad 988: /*
989: * Copy the credential so other references don't see our
990: * changes.
991: */
1.221 ad 992: l->l_cred = kauth_cred_copy(l->l_cred);
1.55 cgd 993: #ifdef KTRACE
994: /*
1.268 elad 995: * If the persistent trace flag isn't set, turn off.
1.55 cgd 996: */
1.237 ad 997: if (p->p_tracep) {
1.247 ad 998: mutex_enter(&ktrace_lock);
1.268 elad 999: if (!(p->p_traceflag & KTRFAC_PERSISTENT))
1.237 ad 1000: ktrderef(p);
1.247 ad 1001: mutex_exit(&ktrace_lock);
1.237 ad 1002: }
1.55 cgd 1003: #endif
1.83 mycroft 1004: if (attr.va_mode & S_ISUID)
1.221 ad 1005: kauth_cred_seteuid(l->l_cred, attr.va_uid);
1.83 mycroft 1006: if (attr.va_mode & S_ISGID)
1.221 ad 1007: kauth_cred_setegid(l->l_cred, attr.va_gid);
1.210 christos 1008: } else {
1.221 ad 1009: if (kauth_cred_geteuid(l->l_cred) ==
1010: kauth_cred_getuid(l->l_cred) &&
1011: kauth_cred_getegid(l->l_cred) ==
1012: kauth_cred_getgid(l->l_cred))
1.238 pavel 1013: p->p_flag &= ~PK_SUGID;
1.210 christos 1014: }
1.220 ad 1015:
1016: /*
1017: * Copy the credential so other references don't see our changes.
1018: * Test to see if this is necessary first, since in the common case
1019: * we won't need a private reference.
1020: */
1.221 ad 1021: if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
1022: kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
1023: l->l_cred = kauth_cred_copy(l->l_cred);
1024: kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
1025: kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
1.220 ad 1026: }
1.155 gmcgarry 1027:
1.221 ad 1028: /* Update the master credentials. */
1.227 ad 1029: if (l->l_cred != p->p_cred) {
1030: kauth_cred_t ocred;
1031:
1032: kauth_cred_hold(l->l_cred);
1.272 ad 1033: mutex_enter(p->p_lock);
1.227 ad 1034: ocred = p->p_cred;
1035: p->p_cred = l->l_cred;
1.272 ad 1036: mutex_exit(p->p_lock);
1.227 ad 1037: kauth_cred_free(ocred);
1038: }
1.221 ad 1039:
1.155 gmcgarry 1040: #if defined(__HAVE_RAS)
1041: /*
1042: * Remove all RASs from the address space.
1043: */
1.251 ad 1044: ras_purgeall();
1.155 gmcgarry 1045: #endif
1.107 fvdl 1046:
1047: doexechooks(p);
1.55 cgd 1048:
1049: /* setup new registers and do misc. setup. */
1.244 dsl 1050: (*pack.ep_esch->es_emul->e_setregs)(l, &pack, (u_long) stack);
1051: if (pack.ep_esch->es_setregs)
1052: (*pack.ep_esch->es_setregs)(l, &pack, (u_long) stack);
1.55 cgd 1053:
1.171 chs 1054: /* map the process's signal trampoline code */
1.244 dsl 1055: if (exec_sigcode_map(p, pack.ep_esch->es_emul)) {
1.209 christos 1056: DPRINTF(("execve: map sigcode failed %d\n", error));
1.171 chs 1057: goto exec_abort;
1.209 christos 1058: }
1.171 chs 1059:
1.277 ad 1060: pool_put(&exec_pool, argp);
1.276 ad 1061:
1062: PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1063:
1064: /* notify others that we exec'd */
1065: KNOTE(&p->p_klist, NOTE_EXEC);
1066:
1.265 yamt 1067: kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1.122 jdolecek 1068:
1.244 dsl 1069: /* The emulation root will usually have been found when we looked
1070: * for the elf interpreter (or similar), if not look now. */
1071: if (pack.ep_esch->es_emul->e_path != NULL && pack.ep_emul_root == NULL)
1072: emul_find_root(l, &pack);
1073:
1074: /* Any old emulation root got removed by fdcloseexec */
1.259 ad 1075: rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
1.244 dsl 1076: p->p_cwdi->cwdi_edir = pack.ep_emul_root;
1.259 ad 1077: rw_exit(&p->p_cwdi->cwdi_lock);
1.244 dsl 1078: pack.ep_emul_root = NULL;
1079: if (pack.ep_interp != NULL)
1080: vrele(pack.ep_interp);
1081:
1.122 jdolecek 1082: /*
1.194 peter 1083: * Call emulation specific exec hook. This can setup per-process
1.122 jdolecek 1084: * p->p_emuldata or do any other per-process stuff an emulation needs.
1085: *
1086: * If we are executing process of different emulation than the
1087: * original forked process, call e_proc_exit() of the old emulation
1088: * first, then e_proc_exec() of new emulation. If the emulation is
1089: * same, the exec hook code should deallocate any old emulation
1090: * resources held previously by this process.
1091: */
1.124 jdolecek 1092: if (p->p_emul && p->p_emul->e_proc_exit
1.244 dsl 1093: && p->p_emul != pack.ep_esch->es_emul)
1.122 jdolecek 1094: (*p->p_emul->e_proc_exit)(p);
1095:
1.123 jdolecek 1096: /*
1097: * Call exec hook. Emulation code may NOT store reference to anything
1098: * from &pack.
1099: */
1.244 dsl 1100: if (pack.ep_esch->es_emul->e_proc_exec)
1101: (*pack.ep_esch->es_emul->e_proc_exec)(p, &pack);
1.122 jdolecek 1102:
1103: /* update p_emul, the old value is no longer needed */
1.244 dsl 1104: p->p_emul = pack.ep_esch->es_emul;
1.148 thorpej 1105:
1106: /* ...and the same for p_execsw */
1.244 dsl 1107: p->p_execsw = pack.ep_esch;
1.148 thorpej 1108:
1.133 mycroft 1109: #ifdef __HAVE_SYSCALL_INTERN
1110: (*p->p_emul->e_syscall_intern)(p);
1111: #endif
1.247 ad 1112: ktremul();
1.85 mycroft 1113:
1.252 ad 1114: /* Allow new references from the debugger/procfs. */
1115: rw_exit(&p->p_reflock);
1.237 ad 1116: rw_exit(&exec_lock);
1.162 manu 1117:
1.271 ad 1118: mutex_enter(proc_lock);
1.237 ad 1119:
1120: if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
1121: KSI_INIT_EMPTY(&ksi);
1122: ksi.ksi_signo = SIGTRAP;
1123: ksi.ksi_lid = l->l_lid;
1124: kpsignal(p, &ksi, NULL);
1125: }
1.162 manu 1126:
1.237 ad 1127: if (p->p_sflag & PS_STOPEXEC) {
1128: KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.175 dsl 1129: p->p_pptr->p_nstopchild++;
1.237 ad 1130: p->p_pptr->p_waited = 0;
1.272 ad 1131: mutex_enter(p->p_lock);
1.237 ad 1132: ksiginfo_queue_init(&kq);
1133: sigclearall(p, &contsigmask, &kq);
1134: lwp_lock(l);
1135: l->l_stat = LSSTOP;
1.162 manu 1136: p->p_stat = SSTOP;
1.164 thorpej 1137: p->p_nrlwps--;
1.272 ad 1138: mutex_exit(p->p_lock);
1.271 ad 1139: mutex_exit(proc_lock);
1.245 yamt 1140: mi_switch(l);
1.237 ad 1141: ksiginfo_queue_drain(&kq);
1142: KERNEL_LOCK(l->l_biglocks, l);
1143: } else {
1.271 ad 1144: mutex_exit(proc_lock);
1.162 manu 1145: }
1146:
1.260 christos 1147: PNBUF_PUT(pathbuf);
1.85 mycroft 1148: return (EJUSTRETURN);
1.55 cgd 1149:
1.138 lukem 1150: bad:
1.55 cgd 1151: /* free the vmspace-creation commands, and release their references */
1152: kill_vmcmds(&pack.ep_vmcmds);
1153: /* kill any opened file descriptor, if necessary */
1154: if (pack.ep_flags & EXEC_HASFD) {
1155: pack.ep_flags &= ~EXEC_HASFD;
1.270 ad 1156: fd_close(pack.ep_fd);
1.55 cgd 1157: }
1158: /* close and put the exec'd file */
1.99 wrstuden 1159: vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
1.254 pooka 1160: VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
1.99 wrstuden 1161: vput(pack.ep_vp);
1.120 thorpej 1162: PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1.277 ad 1163: pool_put(&exec_pool, argp);
1.55 cgd 1164:
1.138 lukem 1165: freehdr:
1.265 yamt 1166: kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1.244 dsl 1167: if (pack.ep_emul_root != NULL)
1168: vrele(pack.ep_emul_root);
1169: if (pack.ep_interp != NULL)
1170: vrele(pack.ep_interp);
1.200 elad 1171:
1.274 ad 1172: rw_exit(&exec_lock);
1173:
1.200 elad 1174: clrflg:
1.279 wrstuden 1175: lwp_lock(l);
1176: l->l_flag |= oldlwpflags;
1177: lwp_unlock(l);
1.260 christos 1178: PNBUF_PUT(pathbuf);
1.252 ad 1179: rw_exit(&p->p_reflock);
1.130 jdolecek 1180:
1.280.2.1 skrll 1181: if (modgen != module_gen && error == ENOEXEC) {
1182: modgen = module_gen;
1183: exec_autoload();
1184: goto retry;
1185: }
1186:
1.55 cgd 1187: return error;
1188:
1.138 lukem 1189: exec_abort:
1.260 christos 1190: PNBUF_PUT(pathbuf);
1.252 ad 1191: rw_exit(&p->p_reflock);
1.237 ad 1192: rw_exit(&exec_lock);
1.130 jdolecek 1193:
1.55 cgd 1194: /*
1195: * the old process doesn't exist anymore. exit gracefully.
1196: * get rid of the (new) address space we have created, if any, get rid
1197: * of our namei data and vnode, and exit noting failure
1198: */
1.88 mrg 1199: uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
1200: VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
1.73 mycroft 1201: if (pack.ep_emul_arg)
1.280.2.1 skrll 1202: free(pack.ep_emul_arg, M_TEMP);
1.120 thorpej 1203: PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1.277 ad 1204: pool_put(&exec_pool, argp);
1.265 yamt 1205: kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1.244 dsl 1206: if (pack.ep_emul_root != NULL)
1207: vrele(pack.ep_emul_root);
1208: if (pack.ep_interp != NULL)
1209: vrele(pack.ep_interp);
1.237 ad 1210:
1.252 ad 1211: /* Acquire the sched-state mutex (exit1() will release it). */
1.272 ad 1212: mutex_enter(p->p_lock);
1.164 thorpej 1213: exit1(l, W_EXITCODE(error, SIGABRT));
1.55 cgd 1214:
1215: /* NOTREACHED */
1216: return 0;
1.67 christos 1217: }
1218:
1219:
1.144 christos 1220: int
1.231 yamt 1221: copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
1222: char **stackp, void *argp)
1.67 christos 1223: {
1.138 lukem 1224: char **cpp, *dp, *sp;
1225: size_t len;
1226: void *nullp;
1227: long argc, envc;
1.144 christos 1228: int error;
1.138 lukem 1229:
1.144 christos 1230: cpp = (char **)*stackp;
1.138 lukem 1231: nullp = NULL;
1232: argc = arginfo->ps_nargvstr;
1233: envc = arginfo->ps_nenvstr;
1.144 christos 1234: if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0)
1235: return error;
1.67 christos 1236:
1.244 dsl 1237: dp = (char *) (cpp + argc + envc + 2 + pack->ep_esch->es_arglen);
1.67 christos 1238: sp = argp;
1239:
1240: /* XXX don't copy them out, remap them! */
1.69 mycroft 1241: arginfo->ps_argvstr = cpp; /* remember location of argv for later */
1.67 christos 1242:
1243: for (; --argc >= 0; sp += len, dp += len)
1.144 christos 1244: if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
1245: (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
1246: return error;
1.67 christos 1247:
1.144 christos 1248: if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
1249: return error;
1.67 christos 1250:
1.69 mycroft 1251: arginfo->ps_envstr = cpp; /* remember location of envp for later */
1.67 christos 1252:
1253: for (; --envc >= 0; sp += len, dp += len)
1.144 christos 1254: if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
1255: (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
1256: return error;
1.67 christos 1257:
1.144 christos 1258: if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
1259: return error;
1.67 christos 1260:
1.144 christos 1261: *stackp = (char *)cpp;
1262: return 0;
1.55 cgd 1263: }
1.130 jdolecek 1264:
1265:
1266: /*
1.280.2.1 skrll 1267: * Add execsw[] entries.
1.130 jdolecek 1268: */
1269: int
1.280.2.1 skrll 1270: exec_add(struct execsw *esp, int count)
1.130 jdolecek 1271: {
1.280.2.1 skrll 1272: struct exec_entry *it;
1273: int i;
1.130 jdolecek 1274:
1.280.2.1 skrll 1275: if (count == 0) {
1276: return 0;
1.130 jdolecek 1277: }
1278:
1.280.2.1 skrll 1279: /* Check for duplicates. */
1.237 ad 1280: rw_enter(&exec_lock, RW_WRITER);
1.280.2.1 skrll 1281: for (i = 0; i < count; i++) {
1282: LIST_FOREACH(it, &ex_head, ex_list) {
1283: /* assume unique (makecmds, probe_func, emulation) */
1284: if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
1285: it->ex_sw->u.elf_probe_func ==
1286: esp[i].u.elf_probe_func &&
1287: it->ex_sw->es_emul == esp[i].es_emul) {
1288: rw_exit(&exec_lock);
1289: return EEXIST;
1.130 jdolecek 1290: }
1291: }
1292: }
1293:
1.280.2.1 skrll 1294: /* Allocate new entries. */
1295: for (i = 0; i < count; i++) {
1296: it = kmem_alloc(sizeof(*it), KM_SLEEP);
1297: it->ex_sw = &esp[i];
1298: LIST_INSERT_HEAD(&ex_head, it, ex_list);
1.130 jdolecek 1299: }
1300:
1301: /* update execsw[] */
1302: exec_init(0);
1.237 ad 1303: rw_exit(&exec_lock);
1.280.2.1 skrll 1304: return 0;
1.130 jdolecek 1305: }
1306:
1307: /*
1308: * Remove execsw[] entry.
1309: */
1310: int
1.280.2.1 skrll 1311: exec_remove(struct execsw *esp, int count)
1.130 jdolecek 1312: {
1.280.2.1 skrll 1313: struct exec_entry *it, *next;
1314: int i;
1315: const struct proclist_desc *pd;
1316: proc_t *p;
1.130 jdolecek 1317:
1.280.2.1 skrll 1318: if (count == 0) {
1319: return 0;
1.130 jdolecek 1320: }
1.280.2.1 skrll 1321:
1322: /* Abort if any are busy. */
1323: rw_enter(&exec_lock, RW_WRITER);
1324: for (i = 0; i < count; i++) {
1325: mutex_enter(proc_lock);
1326: for (pd = proclists; pd->pd_list != NULL; pd++) {
1327: PROCLIST_FOREACH(p, pd->pd_list) {
1328: if (p->p_execsw == &esp[i]) {
1329: mutex_exit(proc_lock);
1330: rw_exit(&exec_lock);
1331: return EBUSY;
1332: }
1333: }
1334: }
1335: mutex_exit(proc_lock);
1.130 jdolecek 1336: }
1337:
1.280.2.1 skrll 1338: /* None are busy, so remove them all. */
1339: for (i = 0; i < count; i++) {
1340: for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
1341: next = LIST_NEXT(it, ex_list);
1342: if (it->ex_sw == &esp[i]) {
1343: LIST_REMOVE(it, ex_list);
1344: kmem_free(it, sizeof(*it));
1345: break;
1346: }
1347: }
1348: }
1.130 jdolecek 1349:
1350: /* update execsw[] */
1351: exec_init(0);
1.237 ad 1352: rw_exit(&exec_lock);
1.280.2.1 skrll 1353: return 0;
1.130 jdolecek 1354: }
1355:
1356: /*
1357: * Initialize exec structures. If init_boot is true, also does necessary
1358: * one-time initialization (it's called from main() that way).
1.147 jdolecek 1359: * Once system is multiuser, this should be called with exec_lock held,
1.130 jdolecek 1360: * i.e. via exec_{add|remove}().
1361: */
1362: int
1.138 lukem 1363: exec_init(int init_boot)
1.130 jdolecek 1364: {
1.280.2.1 skrll 1365: const struct execsw **sw;
1366: struct exec_entry *ex;
1367: SLIST_HEAD(,exec_entry) first;
1368: SLIST_HEAD(,exec_entry) any;
1369: SLIST_HEAD(,exec_entry) last;
1370: int i, sz;
1.130 jdolecek 1371:
1372: if (init_boot) {
1373: /* do one-time initializations */
1.237 ad 1374: rw_init(&exec_lock);
1.259 ad 1375: mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
1.277 ad 1376: pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
1377: "execargs", &exec_palloc, IPL_NONE);
1378: pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1.280.2.1 skrll 1379: } else {
1380: KASSERT(rw_write_held(&exec_lock));
1381: }
1.130 jdolecek 1382:
1.280.2.1 skrll 1383: /* Sort each entry onto the appropriate queue. */
1384: SLIST_INIT(&first);
1385: SLIST_INIT(&any);
1386: SLIST_INIT(&last);
1387: sz = 0;
1388: LIST_FOREACH(ex, &ex_head, ex_list) {
1389: switch(ex->ex_sw->es_prio) {
1390: case EXECSW_PRIO_FIRST:
1391: SLIST_INSERT_HEAD(&first, ex, ex_slist);
1392: break;
1393: case EXECSW_PRIO_ANY:
1394: SLIST_INSERT_HEAD(&any, ex, ex_slist);
1395: break;
1396: case EXECSW_PRIO_LAST:
1397: SLIST_INSERT_HEAD(&last, ex, ex_slist);
1398: break;
1399: default:
1400: panic("exec_init");
1401: break;
1.130 jdolecek 1402: }
1.280.2.1 skrll 1403: sz++;
1.130 jdolecek 1404: }
1405:
1406: /*
1.280.2.1 skrll 1407: * Create new execsw[]. Ensure we do not try a zero-sized
1408: * allocation.
1.130 jdolecek 1409: */
1.280.2.1 skrll 1410: sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
1411: i = 0;
1412: SLIST_FOREACH(ex, &first, ex_slist) {
1413: sw[i++] = ex->ex_sw;
1.130 jdolecek 1414: }
1.280.2.1 skrll 1415: SLIST_FOREACH(ex, &any, ex_slist) {
1416: sw[i++] = ex->ex_sw;
1417: }
1418: SLIST_FOREACH(ex, &last, ex_slist) {
1419: sw[i++] = ex->ex_sw;
1.130 jdolecek 1420: }
1421:
1.280.2.1 skrll 1422: /* Replace old execsw[] and free used memory. */
1423: if (execsw != NULL) {
1424: kmem_free(__UNCONST(execsw),
1425: nexecs * sizeof(struct execsw *) + 1);
1426: }
1427: execsw = sw;
1428: nexecs = sz;
1.130 jdolecek 1429:
1.280.2.1 skrll 1430: /* Figure out the maximum size of an exec header. */
1431: exec_maxhdrsz = sizeof(int);
1.130 jdolecek 1432: for (i = 0; i < nexecs; i++) {
1433: if (execsw[i]->es_hdrsz > exec_maxhdrsz)
1434: exec_maxhdrsz = execsw[i]->es_hdrsz;
1435: }
1436:
1437: return 0;
1438: }
1.171 chs 1439:
1440: static int
1441: exec_sigcode_map(struct proc *p, const struct emul *e)
1442: {
1443: vaddr_t va;
1444: vsize_t sz;
1445: int error;
1446: struct uvm_object *uobj;
1447:
1.184 drochner 1448: sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
1449:
1450: if (e->e_sigobject == NULL || sz == 0) {
1.171 chs 1451: return 0;
1452: }
1453:
1454: /*
1455: * If we don't have a sigobject for this emulation, create one.
1456: *
1457: * sigobject is an anonymous memory object (just like SYSV shared
1458: * memory) that we keep a permanent reference to and that we map
1459: * in all processes that need this sigcode. The creation is simple,
1460: * we create an object, add a permanent reference to it, map it in
1461: * kernel space, copy out the sigcode to it and unmap it.
1.189 jdolecek 1462: * We map it with PROT_READ|PROT_EXEC into the process just
1463: * the way sys_mmap() would map it.
1.171 chs 1464: */
1465:
1466: uobj = *e->e_sigobject;
1467: if (uobj == NULL) {
1.259 ad 1468: mutex_enter(&sigobject_lock);
1469: if ((uobj = *e->e_sigobject) == NULL) {
1470: uobj = uao_create(sz, 0);
1471: (*uobj->pgops->pgo_reference)(uobj);
1472: va = vm_map_min(kernel_map);
1473: if ((error = uvm_map(kernel_map, &va, round_page(sz),
1474: uobj, 0, 0,
1475: UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
1476: UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
1477: printf("kernel mapping failed %d\n", error);
1478: (*uobj->pgops->pgo_detach)(uobj);
1479: mutex_exit(&sigobject_lock);
1480: return (error);
1481: }
1482: memcpy((void *)va, e->e_sigcode, sz);
1.171 chs 1483: #ifdef PMAP_NEED_PROCWR
1.259 ad 1484: pmap_procwr(&proc0, va, sz);
1.171 chs 1485: #endif
1.259 ad 1486: uvm_unmap(kernel_map, va, va + round_page(sz));
1487: *e->e_sigobject = uobj;
1488: }
1489: mutex_exit(&sigobject_lock);
1.171 chs 1490: }
1491:
1.172 enami 1492: /* Just a hint to uvm_map where to put it. */
1.195 fvdl 1493: va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
1494: round_page(sz));
1.187 chs 1495:
1496: #ifdef __alpha__
1497: /*
1498: * Tru64 puts /sbin/loader at the end of user virtual memory,
1499: * which causes the above calculation to put the sigcode at
1500: * an invalid address. Put it just below the text instead.
1501: */
1.193 jmc 1502: if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1.187 chs 1503: va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
1504: }
1505: #endif
1506:
1.171 chs 1507: (*uobj->pgops->pgo_reference)(uobj);
1508: error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
1509: uobj, 0, 0,
1510: UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
1511: UVM_ADV_RANDOM, 0));
1512: if (error) {
1513: (*uobj->pgops->pgo_detach)(uobj);
1514: return (error);
1515: }
1516: p->p_sigctx.ps_sigcode = (void *)va;
1517: return (0);
1518: }
CVSweb <webmaster@jp.NetBSD.org>